10b57cec5SDimitry Andric //===- SIInstrInfo.cpp - SI Instruction Information ----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// SI Implementation of TargetInstrInfo. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "SIInstrInfo.h" 150b57cec5SDimitry Andric #include "AMDGPU.h" 16e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h" 170b57cec5SDimitry Andric #include "GCNHazardRecognizer.h" 18e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 19e8d8bef9SDimitry Andric #include "SIMachineFunctionInfo.h" 20*5f757f3fSDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 22*5f757f3fSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 23349cc55cSDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 24e8d8bef9SDimitry Andric #include "llvm/CodeGen/LiveVariables.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 2681ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 27349cc55cSDimitry Andric #include "llvm/CodeGen/MachineScheduler.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h" 290b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 300b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 31e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 32fe6060f1SDimitry Andric #include "llvm/MC/MCContext.h" 330b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 340b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric using namespace llvm; 370b57cec5SDimitry Andric 385ffd83dbSDimitry Andric #define DEBUG_TYPE "si-instr-info" 395ffd83dbSDimitry Andric 400b57cec5SDimitry Andric #define GET_INSTRINFO_CTOR_DTOR 410b57cec5SDimitry Andric #include "AMDGPUGenInstrInfo.inc" 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric namespace llvm { 440b57cec5SDimitry Andric namespace AMDGPU { 450b57cec5SDimitry Andric #define GET_D16ImageDimIntrinsics_IMPL 460b57cec5SDimitry Andric #define GET_ImageDimIntrinsicTable_IMPL 470b57cec5SDimitry Andric #define GET_RsrcIntrinsics_IMPL 480b57cec5SDimitry Andric #include "AMDGPUGenSearchableTables.inc" 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric } 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric // Must be at least 4 to be able to branch over minimum unconditional branch 540b57cec5SDimitry Andric // code. This is only for making it possible to write reasonably small tests for 550b57cec5SDimitry Andric // long branches. 560b57cec5SDimitry Andric static cl::opt<unsigned> 570b57cec5SDimitry Andric BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), 580b57cec5SDimitry Andric cl::desc("Restrict range of branch instructions (DEBUG)")); 590b57cec5SDimitry Andric 605ffd83dbSDimitry Andric static cl::opt<bool> Fix16BitCopies( 615ffd83dbSDimitry Andric "amdgpu-fix-16-bit-physreg-copies", 625ffd83dbSDimitry Andric cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), 635ffd83dbSDimitry Andric cl::init(true), 645ffd83dbSDimitry Andric cl::ReallyHidden); 655ffd83dbSDimitry Andric 660b57cec5SDimitry Andric SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) 670b57cec5SDimitry Andric : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), 68480093f4SDimitry Andric RI(ST), ST(ST) { 69480093f4SDimitry Andric SchedModel.init(&ST); 70480093f4SDimitry Andric } 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 730b57cec5SDimitry Andric // TargetInstrInfo callbacks 740b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric static unsigned getNumOperandsNoGlue(SDNode *Node) { 770b57cec5SDimitry Andric unsigned N = Node->getNumOperands(); 780b57cec5SDimitry Andric while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) 790b57cec5SDimitry Andric --N; 800b57cec5SDimitry Andric return N; 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric /// Returns true if both nodes have the same value for the given 840b57cec5SDimitry Andric /// operand \p Op, or if both nodes do not have this operand. 850b57cec5SDimitry Andric static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { 860b57cec5SDimitry Andric unsigned Opc0 = N0->getMachineOpcode(); 870b57cec5SDimitry Andric unsigned Opc1 = N1->getMachineOpcode(); 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); 900b57cec5SDimitry Andric int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric if (Op0Idx == -1 && Op1Idx == -1) 930b57cec5SDimitry Andric return true; 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric if ((Op0Idx == -1 && Op1Idx != -1) || 970b57cec5SDimitry Andric (Op1Idx == -1 && Op0Idx != -1)) 980b57cec5SDimitry Andric return false; 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric // getNamedOperandIdx returns the index for the MachineInstr's operands, 1010b57cec5SDimitry Andric // which includes the result as the first operand. We are indexing into the 1020b57cec5SDimitry Andric // MachineSDNode's operands, so we need to skip the result operand to get 1030b57cec5SDimitry Andric // the real index. 1040b57cec5SDimitry Andric --Op0Idx; 1050b57cec5SDimitry Andric --Op1Idx; 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric 110*5f757f3fSDimitry Andric static bool canRemat(const MachineInstr &MI) { 111*5f757f3fSDimitry Andric 112*5f757f3fSDimitry Andric if (SIInstrInfo::isVOP1(MI) || SIInstrInfo::isVOP2(MI) || 113*5f757f3fSDimitry Andric SIInstrInfo::isVOP3(MI) || SIInstrInfo::isSDWA(MI) || 114*5f757f3fSDimitry Andric SIInstrInfo::isSALU(MI)) 115*5f757f3fSDimitry Andric return true; 116*5f757f3fSDimitry Andric 117*5f757f3fSDimitry Andric if (SIInstrInfo::isSMRD(MI)) { 118*5f757f3fSDimitry Andric return !MI.memoperands_empty() && 119*5f757f3fSDimitry Andric llvm::all_of(MI.memoperands(), [](const MachineMemOperand *MMO) { 120*5f757f3fSDimitry Andric return MMO->isLoad() && MMO->isInvariant(); 121*5f757f3fSDimitry Andric }); 122*5f757f3fSDimitry Andric } 123*5f757f3fSDimitry Andric 124*5f757f3fSDimitry Andric return false; 125*5f757f3fSDimitry Andric } 126*5f757f3fSDimitry Andric 127fcaf7f86SDimitry Andric bool SIInstrInfo::isReallyTriviallyReMaterializable( 128fcaf7f86SDimitry Andric const MachineInstr &MI) const { 129*5f757f3fSDimitry Andric 130*5f757f3fSDimitry Andric if (canRemat(MI)) { 131fe6060f1SDimitry Andric // Normally VALU use of exec would block the rematerialization, but that 132fe6060f1SDimitry Andric // is OK in this case to have an implicit exec read as all VALU do. 133fe6060f1SDimitry Andric // We really want all of the generic logic for this except for this. 134fe6060f1SDimitry Andric 135fe6060f1SDimitry Andric // Another potential implicit use is mode register. The core logic of 136fe6060f1SDimitry Andric // the RA will not attempt rematerialization if mode is set anywhere 137fe6060f1SDimitry Andric // in the function, otherwise it is safe since mode is not changed. 138349cc55cSDimitry Andric 139349cc55cSDimitry Andric // There is difference to generic method which does not allow 140349cc55cSDimitry Andric // rematerialization if there are virtual register uses. We allow this, 141349cc55cSDimitry Andric // therefore this method includes SOP instructions as well. 142*5f757f3fSDimitry Andric if (!MI.hasImplicitDef() && 143bdd1243dSDimitry Andric MI.getNumImplicitOperands() == MI.getDesc().implicit_uses().size() && 144*5f757f3fSDimitry Andric !MI.mayRaiseFPException()) 145*5f757f3fSDimitry Andric return true; 146fe6060f1SDimitry Andric } 147fe6060f1SDimitry Andric 148*5f757f3fSDimitry Andric return TargetInstrInfo::isReallyTriviallyReMaterializable(MI); 1490b57cec5SDimitry Andric } 150fe6060f1SDimitry Andric 15181ad6265SDimitry Andric // Returns true if the scalar result of a VALU instruction depends on exec. 15281ad6265SDimitry Andric static bool resultDependsOnExec(const MachineInstr &MI) { 15381ad6265SDimitry Andric // Ignore comparisons which are only used masked with exec. 15481ad6265SDimitry Andric // This allows some hoisting/sinking of VALU comparisons. 15581ad6265SDimitry Andric if (MI.isCompare()) { 15681ad6265SDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 15781ad6265SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 15881ad6265SDimitry Andric if (!DstReg.isVirtual()) 15904eeddc0SDimitry Andric return true; 16081ad6265SDimitry Andric for (MachineInstr &Use : MRI.use_nodbg_instructions(DstReg)) { 16181ad6265SDimitry Andric switch (Use.getOpcode()) { 16281ad6265SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B32: 16381ad6265SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B64: 16481ad6265SDimitry Andric break; 16581ad6265SDimitry Andric case AMDGPU::S_AND_B32: 16681ad6265SDimitry Andric case AMDGPU::S_AND_B64: 16781ad6265SDimitry Andric if (!Use.readsRegister(AMDGPU::EXEC)) 16881ad6265SDimitry Andric return true; 16981ad6265SDimitry Andric break; 17081ad6265SDimitry Andric default: 17181ad6265SDimitry Andric return true; 17281ad6265SDimitry Andric } 17381ad6265SDimitry Andric } 17481ad6265SDimitry Andric return false; 17581ad6265SDimitry Andric } 17604eeddc0SDimitry Andric 17704eeddc0SDimitry Andric switch (MI.getOpcode()) { 17804eeddc0SDimitry Andric default: 17904eeddc0SDimitry Andric break; 18004eeddc0SDimitry Andric case AMDGPU::V_READFIRSTLANE_B32: 18104eeddc0SDimitry Andric return true; 18204eeddc0SDimitry Andric } 18304eeddc0SDimitry Andric 18404eeddc0SDimitry Andric return false; 18504eeddc0SDimitry Andric } 18604eeddc0SDimitry Andric 187fe6060f1SDimitry Andric bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const { 188fe6060f1SDimitry Andric // Any implicit use of exec by VALU is not a real register read. 189fe6060f1SDimitry Andric return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() && 19081ad6265SDimitry Andric isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent()); 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric 193*5f757f3fSDimitry Andric bool SIInstrInfo::isSafeToSink(MachineInstr &MI, 194*5f757f3fSDimitry Andric MachineBasicBlock *SuccToSinkTo, 195*5f757f3fSDimitry Andric MachineCycleInfo *CI) const { 196*5f757f3fSDimitry Andric // Allow sinking if MI edits lane mask (divergent i1 in sgpr). 197*5f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::SI_IF_BREAK) 198*5f757f3fSDimitry Andric return true; 199*5f757f3fSDimitry Andric 200*5f757f3fSDimitry Andric MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); 201*5f757f3fSDimitry Andric // Check if sinking of MI would create temporal divergent use. 202*5f757f3fSDimitry Andric for (auto Op : MI.uses()) { 203*5f757f3fSDimitry Andric if (Op.isReg() && Op.getReg().isVirtual() && 204*5f757f3fSDimitry Andric RI.isSGPRClass(MRI.getRegClass(Op.getReg()))) { 205*5f757f3fSDimitry Andric MachineInstr *SgprDef = MRI.getVRegDef(Op.getReg()); 206*5f757f3fSDimitry Andric 207*5f757f3fSDimitry Andric // SgprDef defined inside cycle 208*5f757f3fSDimitry Andric MachineCycle *FromCycle = CI->getCycle(SgprDef->getParent()); 209*5f757f3fSDimitry Andric if (FromCycle == nullptr) 210*5f757f3fSDimitry Andric continue; 211*5f757f3fSDimitry Andric 212*5f757f3fSDimitry Andric MachineCycle *ToCycle = CI->getCycle(SuccToSinkTo); 213*5f757f3fSDimitry Andric // Check if there is a FromCycle that contains SgprDef's basic block but 214*5f757f3fSDimitry Andric // does not contain SuccToSinkTo and also has divergent exit condition. 215*5f757f3fSDimitry Andric while (FromCycle && !FromCycle->contains(ToCycle)) { 216*5f757f3fSDimitry Andric // After structurize-cfg, there should be exactly one cycle exit. 217*5f757f3fSDimitry Andric SmallVector<MachineBasicBlock *, 1> ExitBlocks; 218*5f757f3fSDimitry Andric FromCycle->getExitBlocks(ExitBlocks); 219*5f757f3fSDimitry Andric assert(ExitBlocks.size() == 1); 220*5f757f3fSDimitry Andric assert(ExitBlocks[0]->getSinglePredecessor()); 221*5f757f3fSDimitry Andric 222*5f757f3fSDimitry Andric // FromCycle has divergent exit condition. 223*5f757f3fSDimitry Andric if (hasDivergentBranch(ExitBlocks[0]->getSinglePredecessor())) { 224*5f757f3fSDimitry Andric return false; 225*5f757f3fSDimitry Andric } 226*5f757f3fSDimitry Andric 227*5f757f3fSDimitry Andric FromCycle = FromCycle->getParentCycle(); 228*5f757f3fSDimitry Andric } 229*5f757f3fSDimitry Andric } 230*5f757f3fSDimitry Andric } 231*5f757f3fSDimitry Andric 232*5f757f3fSDimitry Andric return true; 233*5f757f3fSDimitry Andric } 234*5f757f3fSDimitry Andric 2350b57cec5SDimitry Andric bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, 2360b57cec5SDimitry Andric int64_t &Offset0, 2370b57cec5SDimitry Andric int64_t &Offset1) const { 2380b57cec5SDimitry Andric if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) 2390b57cec5SDimitry Andric return false; 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric unsigned Opc0 = Load0->getMachineOpcode(); 2420b57cec5SDimitry Andric unsigned Opc1 = Load1->getMachineOpcode(); 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric // Make sure both are actually loads. 2450b57cec5SDimitry Andric if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 2460b57cec5SDimitry Andric return false; 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric if (isDS(Opc0) && isDS(Opc1)) { 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric // FIXME: Handle this case: 2510b57cec5SDimitry Andric if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) 2520b57cec5SDimitry Andric return false; 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric // Check base reg. 2550b57cec5SDimitry Andric if (Load0->getOperand(0) != Load1->getOperand(0)) 2560b57cec5SDimitry Andric return false; 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric // Skip read2 / write2 variants for simplicity. 2590b57cec5SDimitry Andric // TODO: We should report true if the used offsets are adjacent (excluded 2600b57cec5SDimitry Andric // st64 versions). 2610b57cec5SDimitry Andric int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 2620b57cec5SDimitry Andric int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 2630b57cec5SDimitry Andric if (Offset0Idx == -1 || Offset1Idx == -1) 2640b57cec5SDimitry Andric return false; 2650b57cec5SDimitry Andric 26681ad6265SDimitry Andric // XXX - be careful of dataless loads 2670b57cec5SDimitry Andric // getNamedOperandIdx returns the index for MachineInstrs. Since they 2680b57cec5SDimitry Andric // include the output in the operand list, but SDNodes don't, we need to 2690b57cec5SDimitry Andric // subtract the index by one. 2700b57cec5SDimitry Andric Offset0Idx -= get(Opc0).NumDefs; 2710b57cec5SDimitry Andric Offset1Idx -= get(Opc1).NumDefs; 2720b57cec5SDimitry Andric Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue(); 2730b57cec5SDimitry Andric Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue(); 2740b57cec5SDimitry Andric return true; 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andric if (isSMRD(Opc0) && isSMRD(Opc1)) { 2780b57cec5SDimitry Andric // Skip time and cache invalidation instructions. 279bdd1243dSDimitry Andric if (!AMDGPU::hasNamedOperand(Opc0, AMDGPU::OpName::sbase) || 280bdd1243dSDimitry Andric !AMDGPU::hasNamedOperand(Opc1, AMDGPU::OpName::sbase)) 2810b57cec5SDimitry Andric return false; 2820b57cec5SDimitry Andric 283fcaf7f86SDimitry Andric unsigned NumOps = getNumOperandsNoGlue(Load0); 284fcaf7f86SDimitry Andric if (NumOps != getNumOperandsNoGlue(Load1)) 285fcaf7f86SDimitry Andric return false; 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric // Check base reg. 2880b57cec5SDimitry Andric if (Load0->getOperand(0) != Load1->getOperand(0)) 2890b57cec5SDimitry Andric return false; 2900b57cec5SDimitry Andric 291fcaf7f86SDimitry Andric // Match register offsets, if both register and immediate offsets present. 292fcaf7f86SDimitry Andric assert(NumOps == 4 || NumOps == 5); 293fcaf7f86SDimitry Andric if (NumOps == 5 && Load0->getOperand(1) != Load1->getOperand(1)) 294fcaf7f86SDimitry Andric return false; 295fcaf7f86SDimitry Andric 2960b57cec5SDimitry Andric const ConstantSDNode *Load0Offset = 297fcaf7f86SDimitry Andric dyn_cast<ConstantSDNode>(Load0->getOperand(NumOps - 3)); 2980b57cec5SDimitry Andric const ConstantSDNode *Load1Offset = 299fcaf7f86SDimitry Andric dyn_cast<ConstantSDNode>(Load1->getOperand(NumOps - 3)); 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric if (!Load0Offset || !Load1Offset) 3020b57cec5SDimitry Andric return false; 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric Offset0 = Load0Offset->getZExtValue(); 3050b57cec5SDimitry Andric Offset1 = Load1Offset->getZExtValue(); 3060b57cec5SDimitry Andric return true; 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric // MUBUF and MTBUF can access the same addresses. 3100b57cec5SDimitry Andric if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric // MUBUF and MTBUF have vaddr at different indices. 3130b57cec5SDimitry Andric if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || 3140b57cec5SDimitry Andric !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || 3150b57cec5SDimitry Andric !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) 3160b57cec5SDimitry Andric return false; 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 3190b57cec5SDimitry Andric int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric if (OffIdx0 == -1 || OffIdx1 == -1) 3220b57cec5SDimitry Andric return false; 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric // getNamedOperandIdx returns the index for MachineInstrs. Since they 3250b57cec5SDimitry Andric // include the output in the operand list, but SDNodes don't, we need to 3260b57cec5SDimitry Andric // subtract the index by one. 3270b57cec5SDimitry Andric OffIdx0 -= get(Opc0).NumDefs; 3280b57cec5SDimitry Andric OffIdx1 -= get(Opc1).NumDefs; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric SDValue Off0 = Load0->getOperand(OffIdx0); 3310b57cec5SDimitry Andric SDValue Off1 = Load1->getOperand(OffIdx1); 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric // The offset might be a FrameIndexSDNode. 3340b57cec5SDimitry Andric if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) 3350b57cec5SDimitry Andric return false; 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); 3380b57cec5SDimitry Andric Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 3390b57cec5SDimitry Andric return true; 3400b57cec5SDimitry Andric } 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric return false; 3430b57cec5SDimitry Andric } 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric static bool isStride64(unsigned Opc) { 3460b57cec5SDimitry Andric switch (Opc) { 3470b57cec5SDimitry Andric case AMDGPU::DS_READ2ST64_B32: 3480b57cec5SDimitry Andric case AMDGPU::DS_READ2ST64_B64: 3490b57cec5SDimitry Andric case AMDGPU::DS_WRITE2ST64_B32: 3500b57cec5SDimitry Andric case AMDGPU::DS_WRITE2ST64_B64: 3510b57cec5SDimitry Andric return true; 3520b57cec5SDimitry Andric default: 3530b57cec5SDimitry Andric return false; 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric } 3560b57cec5SDimitry Andric 3575ffd83dbSDimitry Andric bool SIInstrInfo::getMemOperandsWithOffsetWidth( 3585ffd83dbSDimitry Andric const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 3595ffd83dbSDimitry Andric int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 3600b57cec5SDimitry Andric const TargetRegisterInfo *TRI) const { 361480093f4SDimitry Andric if (!LdSt.mayLoadOrStore()) 362480093f4SDimitry Andric return false; 363480093f4SDimitry Andric 3640b57cec5SDimitry Andric unsigned Opc = LdSt.getOpcode(); 3655ffd83dbSDimitry Andric OffsetIsScalable = false; 3665ffd83dbSDimitry Andric const MachineOperand *BaseOp, *OffsetOp; 3675ffd83dbSDimitry Andric int DataOpIdx; 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric if (isDS(LdSt)) { 3700b57cec5SDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr); 3715ffd83dbSDimitry Andric OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); 3725ffd83dbSDimitry Andric if (OffsetOp) { 3735ffd83dbSDimitry Andric // Normal, single offset LDS instruction. 3745ffd83dbSDimitry Andric if (!BaseOp) { 3755ffd83dbSDimitry Andric // DS_CONSUME/DS_APPEND use M0 for the base address. 3765ffd83dbSDimitry Andric // TODO: find the implicit use operand for M0 and use that as BaseOp? 3770b57cec5SDimitry Andric return false; 3780b57cec5SDimitry Andric } 3795ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 3805ffd83dbSDimitry Andric Offset = OffsetOp->getImm(); 3815ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 3825ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 3835ffd83dbSDimitry Andric if (DataOpIdx == -1) 3845ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 3855ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 3865ffd83dbSDimitry Andric } else { 3870b57cec5SDimitry Andric // The 2 offset instructions use offset0 and offset1 instead. We can treat 3885ffd83dbSDimitry Andric // these as a load with a single offset if the 2 offsets are consecutive. 3895ffd83dbSDimitry Andric // We will use this for some partially aligned loads. 3905ffd83dbSDimitry Andric const MachineOperand *Offset0Op = 3910b57cec5SDimitry Andric getNamedOperand(LdSt, AMDGPU::OpName::offset0); 3925ffd83dbSDimitry Andric const MachineOperand *Offset1Op = 3930b57cec5SDimitry Andric getNamedOperand(LdSt, AMDGPU::OpName::offset1); 3940b57cec5SDimitry Andric 39506c3fb27SDimitry Andric unsigned Offset0 = Offset0Op->getImm() & 0xff; 39606c3fb27SDimitry Andric unsigned Offset1 = Offset1Op->getImm() & 0xff; 3975ffd83dbSDimitry Andric if (Offset0 + 1 != Offset1) 3985ffd83dbSDimitry Andric return false; 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric // Each of these offsets is in element sized units, so we need to convert 4010b57cec5SDimitry Andric // to bytes of the individual reads. 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric unsigned EltSize; 4040b57cec5SDimitry Andric if (LdSt.mayLoad()) 4050b57cec5SDimitry Andric EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; 4060b57cec5SDimitry Andric else { 4070b57cec5SDimitry Andric assert(LdSt.mayStore()); 4080b57cec5SDimitry Andric int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 4090b57cec5SDimitry Andric EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric if (isStride64(Opc)) 4130b57cec5SDimitry Andric EltSize *= 64; 4140b57cec5SDimitry Andric 4155ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 4160b57cec5SDimitry Andric Offset = EltSize * Offset0; 4175ffd83dbSDimitry Andric // Get appropriate operand(s), and compute width accordingly. 4185ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 4195ffd83dbSDimitry Andric if (DataOpIdx == -1) { 4205ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 4215ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4225ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1); 4235ffd83dbSDimitry Andric Width += getOpSize(LdSt, DataOpIdx); 4245ffd83dbSDimitry Andric } else { 4255ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4260b57cec5SDimitry Andric } 4275ffd83dbSDimitry Andric } 4285ffd83dbSDimitry Andric return true; 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric if (isMUBUF(LdSt) || isMTBUF(LdSt)) { 4328bcb0991SDimitry Andric const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); 433fe6060f1SDimitry Andric if (!RSrc) // e.g. BUFFER_WBINVL1_VOL 4348bcb0991SDimitry Andric return false; 4355ffd83dbSDimitry Andric BaseOps.push_back(RSrc); 4365ffd83dbSDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); 437fe6060f1SDimitry Andric if (BaseOp && !BaseOp->isFI()) 4385ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 4390b57cec5SDimitry Andric const MachineOperand *OffsetImm = 4400b57cec5SDimitry Andric getNamedOperand(LdSt, AMDGPU::OpName::offset); 4410b57cec5SDimitry Andric Offset = OffsetImm->getImm(); 442fe6060f1SDimitry Andric const MachineOperand *SOffset = 443fe6060f1SDimitry Andric getNamedOperand(LdSt, AMDGPU::OpName::soffset); 444fe6060f1SDimitry Andric if (SOffset) { 445fe6060f1SDimitry Andric if (SOffset->isReg()) 446fe6060f1SDimitry Andric BaseOps.push_back(SOffset); 447fe6060f1SDimitry Andric else 4480b57cec5SDimitry Andric Offset += SOffset->getImm(); 4495ffd83dbSDimitry Andric } 4505ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 4515ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 4525ffd83dbSDimitry Andric if (DataOpIdx == -1) 4535ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 45481ad6265SDimitry Andric if (DataOpIdx == -1) // LDS DMA 45581ad6265SDimitry Andric return false; 4565ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4575ffd83dbSDimitry Andric return true; 4585ffd83dbSDimitry Andric } 4590b57cec5SDimitry Andric 4605ffd83dbSDimitry Andric if (isMIMG(LdSt)) { 4615ffd83dbSDimitry Andric int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 4625ffd83dbSDimitry Andric BaseOps.push_back(&LdSt.getOperand(SRsrcIdx)); 4635ffd83dbSDimitry Andric int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 4645ffd83dbSDimitry Andric if (VAddr0Idx >= 0) { 4655ffd83dbSDimitry Andric // GFX10 possible NSA encoding. 4665ffd83dbSDimitry Andric for (int I = VAddr0Idx; I < SRsrcIdx; ++I) 4675ffd83dbSDimitry Andric BaseOps.push_back(&LdSt.getOperand(I)); 4685ffd83dbSDimitry Andric } else { 4695ffd83dbSDimitry Andric BaseOps.push_back(getNamedOperand(LdSt, AMDGPU::OpName::vaddr)); 4705ffd83dbSDimitry Andric } 4715ffd83dbSDimitry Andric Offset = 0; 4725ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 4735ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 4745ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4750b57cec5SDimitry Andric return true; 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric if (isSMRD(LdSt)) { 4795ffd83dbSDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::sbase); 4805ffd83dbSDimitry Andric if (!BaseOp) // e.g. S_MEMTIME 4810b57cec5SDimitry Andric return false; 4825ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 4835ffd83dbSDimitry Andric OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); 4845ffd83dbSDimitry Andric Offset = OffsetOp ? OffsetOp->getImm() : 0; 4855ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 4865ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 487*5f757f3fSDimitry Andric if (DataOpIdx == -1) 488*5f757f3fSDimitry Andric return false; 4895ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4900b57cec5SDimitry Andric return true; 4910b57cec5SDimitry Andric } 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andric if (isFLAT(LdSt)) { 494e8d8bef9SDimitry Andric // Instructions have either vaddr or saddr or both or none. 4955ffd83dbSDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); 4965ffd83dbSDimitry Andric if (BaseOp) 4975ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 4980b57cec5SDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr); 4995ffd83dbSDimitry Andric if (BaseOp) 5005ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 5010b57cec5SDimitry Andric Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); 5025ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 5035ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 5045ffd83dbSDimitry Andric if (DataOpIdx == -1) 5055ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 50681ad6265SDimitry Andric if (DataOpIdx == -1) // LDS DMA 50781ad6265SDimitry Andric return false; 5085ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 5090b57cec5SDimitry Andric return true; 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric return false; 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, 5165ffd83dbSDimitry Andric ArrayRef<const MachineOperand *> BaseOps1, 5170b57cec5SDimitry Andric const MachineInstr &MI2, 5185ffd83dbSDimitry Andric ArrayRef<const MachineOperand *> BaseOps2) { 5195ffd83dbSDimitry Andric // Only examine the first "base" operand of each instruction, on the 5205ffd83dbSDimitry Andric // assumption that it represents the real base address of the memory access. 5215ffd83dbSDimitry Andric // Other operands are typically offsets or indices from this base address. 5225ffd83dbSDimitry Andric if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) 5230b57cec5SDimitry Andric return true; 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) 5260b57cec5SDimitry Andric return false; 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric auto MO1 = *MI1.memoperands_begin(); 5290b57cec5SDimitry Andric auto MO2 = *MI2.memoperands_begin(); 5300b57cec5SDimitry Andric if (MO1->getAddrSpace() != MO2->getAddrSpace()) 5310b57cec5SDimitry Andric return false; 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric auto Base1 = MO1->getValue(); 5340b57cec5SDimitry Andric auto Base2 = MO2->getValue(); 5350b57cec5SDimitry Andric if (!Base1 || !Base2) 5360b57cec5SDimitry Andric return false; 537e8d8bef9SDimitry Andric Base1 = getUnderlyingObject(Base1); 538e8d8bef9SDimitry Andric Base2 = getUnderlyingObject(Base2); 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) 5410b57cec5SDimitry Andric return false; 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric return Base1 == Base2; 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric 5465ffd83dbSDimitry Andric bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 547*5f757f3fSDimitry Andric int64_t Offset1, bool OffsetIsScalable1, 5485ffd83dbSDimitry Andric ArrayRef<const MachineOperand *> BaseOps2, 549*5f757f3fSDimitry Andric int64_t Offset2, bool OffsetIsScalable2, 550*5f757f3fSDimitry Andric unsigned ClusterSize, 5515ffd83dbSDimitry Andric unsigned NumBytes) const { 552e8d8bef9SDimitry Andric // If the mem ops (to be clustered) do not have the same base ptr, then they 553e8d8bef9SDimitry Andric // should not be clustered 554e8d8bef9SDimitry Andric if (!BaseOps1.empty() && !BaseOps2.empty()) { 5555ffd83dbSDimitry Andric const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); 5565ffd83dbSDimitry Andric const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); 5575ffd83dbSDimitry Andric if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) 5580b57cec5SDimitry Andric return false; 559e8d8bef9SDimitry Andric } else if (!BaseOps1.empty() || !BaseOps2.empty()) { 560e8d8bef9SDimitry Andric // If only one base op is empty, they do not have the same base ptr 561e8d8bef9SDimitry Andric return false; 5620b57cec5SDimitry Andric } 563e8d8bef9SDimitry Andric 56481ad6265SDimitry Andric // In order to avoid register pressure, on an average, the number of DWORDS 565e8d8bef9SDimitry Andric // loaded together by all clustered mem ops should not exceed 8. This is an 566e8d8bef9SDimitry Andric // empirical value based on certain observations and performance related 567e8d8bef9SDimitry Andric // experiments. 568e8d8bef9SDimitry Andric // The good thing about this heuristic is - it avoids clustering of too many 569e8d8bef9SDimitry Andric // sub-word loads, and also avoids clustering of wide loads. Below is the 570e8d8bef9SDimitry Andric // brief summary of how the heuristic behaves for various `LoadSize`. 571e8d8bef9SDimitry Andric // (1) 1 <= LoadSize <= 4: cluster at max 8 mem ops 572e8d8bef9SDimitry Andric // (2) 5 <= LoadSize <= 8: cluster at max 4 mem ops 573e8d8bef9SDimitry Andric // (3) 9 <= LoadSize <= 12: cluster at max 2 mem ops 574e8d8bef9SDimitry Andric // (4) 13 <= LoadSize <= 16: cluster at max 2 mem ops 575e8d8bef9SDimitry Andric // (5) LoadSize >= 17: do not cluster 576*5f757f3fSDimitry Andric const unsigned LoadSize = NumBytes / ClusterSize; 577*5f757f3fSDimitry Andric const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize; 578e8d8bef9SDimitry Andric return NumDWORDs <= 8; 5790b57cec5SDimitry Andric } 5800b57cec5SDimitry Andric 5810b57cec5SDimitry Andric // FIXME: This behaves strangely. If, for example, you have 32 load + stores, 5820b57cec5SDimitry Andric // the first 16 loads will be interleaved with the stores, and the next 16 will 5830b57cec5SDimitry Andric // be clustered as expected. It should really split into 2 16 store batches. 5840b57cec5SDimitry Andric // 5850b57cec5SDimitry Andric // Loads are clustered until this returns false, rather than trying to schedule 5860b57cec5SDimitry Andric // groups of stores. This also means we have to deal with saying different 5870b57cec5SDimitry Andric // address space loads should be clustered, and ones which might cause bank 5880b57cec5SDimitry Andric // conflicts. 5890b57cec5SDimitry Andric // 5900b57cec5SDimitry Andric // This might be deprecated so it might not be worth that much effort to fix. 5910b57cec5SDimitry Andric bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, 5920b57cec5SDimitry Andric int64_t Offset0, int64_t Offset1, 5930b57cec5SDimitry Andric unsigned NumLoads) const { 5940b57cec5SDimitry Andric assert(Offset1 > Offset0 && 5950b57cec5SDimitry Andric "Second offset should be larger than first offset!"); 5960b57cec5SDimitry Andric // If we have less than 16 loads in a row, and the offsets are within 64 5970b57cec5SDimitry Andric // bytes, then schedule together. 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric // A cacheline is 64 bytes (for global memory). 6000b57cec5SDimitry Andric return (NumLoads <= 16 && (Offset1 - Offset0) < 64); 6010b57cec5SDimitry Andric } 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, 6040b57cec5SDimitry Andric MachineBasicBlock::iterator MI, 605480093f4SDimitry Andric const DebugLoc &DL, MCRegister DestReg, 6065ffd83dbSDimitry Andric MCRegister SrcReg, bool KillSrc, 60706c3fb27SDimitry Andric const char *Msg = "illegal VGPR to SGPR copy") { 6080b57cec5SDimitry Andric MachineFunction *MF = MBB.getParent(); 6095ffd83dbSDimitry Andric DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error); 6100b57cec5SDimitry Andric LLVMContext &C = MF->getFunction().getContext(); 6110b57cec5SDimitry Andric C.diagnose(IllegalCopy); 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg) 6140b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric 61781ad6265SDimitry Andric /// Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908. It is not 61881ad6265SDimitry Andric /// possible to have a direct copy in these cases on GFX908, so an intermediate 61981ad6265SDimitry Andric /// VGPR copy is required. 620e8d8bef9SDimitry Andric static void indirectCopyToAGPR(const SIInstrInfo &TII, 621e8d8bef9SDimitry Andric MachineBasicBlock &MBB, 622e8d8bef9SDimitry Andric MachineBasicBlock::iterator MI, 623e8d8bef9SDimitry Andric const DebugLoc &DL, MCRegister DestReg, 624e8d8bef9SDimitry Andric MCRegister SrcReg, bool KillSrc, 625bdd1243dSDimitry Andric RegScavenger &RS, bool RegsOverlap, 626e8d8bef9SDimitry Andric Register ImpDefSuperReg = Register(), 627e8d8bef9SDimitry Andric Register ImpUseSuperReg = Register()) { 62881ad6265SDimitry Andric assert((TII.getSubtarget().hasMAIInsts() && 62981ad6265SDimitry Andric !TII.getSubtarget().hasGFX90AInsts()) && 63081ad6265SDimitry Andric "Expected GFX908 subtarget."); 631e8d8bef9SDimitry Andric 63281ad6265SDimitry Andric assert((AMDGPU::SReg_32RegClass.contains(SrcReg) || 63381ad6265SDimitry Andric AMDGPU::AGPR_32RegClass.contains(SrcReg)) && 63481ad6265SDimitry Andric "Source register of the copy should be either an SGPR or an AGPR."); 63581ad6265SDimitry Andric 63681ad6265SDimitry Andric assert(AMDGPU::AGPR_32RegClass.contains(DestReg) && 63781ad6265SDimitry Andric "Destination register of the copy should be an AGPR."); 63881ad6265SDimitry Andric 63981ad6265SDimitry Andric const SIRegisterInfo &RI = TII.getRegisterInfo(); 640e8d8bef9SDimitry Andric 641e8d8bef9SDimitry Andric // First try to find defining accvgpr_write to avoid temporary registers. 642bdd1243dSDimitry Andric // In the case of copies of overlapping AGPRs, we conservatively do not 643bdd1243dSDimitry Andric // reuse previous accvgpr_writes. Otherwise, we may incorrectly pick up 644bdd1243dSDimitry Andric // an accvgpr_write used for this same copy due to implicit-defs 645bdd1243dSDimitry Andric if (!RegsOverlap) { 646e8d8bef9SDimitry Andric for (auto Def = MI, E = MBB.begin(); Def != E; ) { 647e8d8bef9SDimitry Andric --Def; 64806c3fb27SDimitry Andric 64906c3fb27SDimitry Andric if (!Def->modifiesRegister(SrcReg, &RI)) 650e8d8bef9SDimitry Andric continue; 65106c3fb27SDimitry Andric 65206c3fb27SDimitry Andric if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 || 65306c3fb27SDimitry Andric Def->getOperand(0).getReg() != SrcReg) 654e8d8bef9SDimitry Andric break; 655e8d8bef9SDimitry Andric 656e8d8bef9SDimitry Andric MachineOperand &DefOp = Def->getOperand(1); 657e8d8bef9SDimitry Andric assert(DefOp.isReg() || DefOp.isImm()); 658e8d8bef9SDimitry Andric 659e8d8bef9SDimitry Andric if (DefOp.isReg()) { 660e8d8bef9SDimitry Andric bool SafeToPropagate = true; 661bdd1243dSDimitry Andric // Check that register source operand is not clobbered before MI. 662bdd1243dSDimitry Andric // Immediate operands are always safe to propagate. 663e8d8bef9SDimitry Andric for (auto I = Def; I != MI && SafeToPropagate; ++I) 664e8d8bef9SDimitry Andric if (I->modifiesRegister(DefOp.getReg(), &RI)) 665e8d8bef9SDimitry Andric SafeToPropagate = false; 666e8d8bef9SDimitry Andric 667e8d8bef9SDimitry Andric if (!SafeToPropagate) 668e8d8bef9SDimitry Andric break; 669e8d8bef9SDimitry Andric 670e8d8bef9SDimitry Andric DefOp.setIsKill(false); 671e8d8bef9SDimitry Andric } 672e8d8bef9SDimitry Andric 673e8d8bef9SDimitry Andric MachineInstrBuilder Builder = 674e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) 675e8d8bef9SDimitry Andric .add(DefOp); 676e8d8bef9SDimitry Andric if (ImpDefSuperReg) 677e8d8bef9SDimitry Andric Builder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); 678e8d8bef9SDimitry Andric 679e8d8bef9SDimitry Andric if (ImpUseSuperReg) { 680e8d8bef9SDimitry Andric Builder.addReg(ImpUseSuperReg, 681e8d8bef9SDimitry Andric getKillRegState(KillSrc) | RegState::Implicit); 682e8d8bef9SDimitry Andric } 683e8d8bef9SDimitry Andric 684e8d8bef9SDimitry Andric return; 685e8d8bef9SDimitry Andric } 686bdd1243dSDimitry Andric } 687e8d8bef9SDimitry Andric 68806c3fb27SDimitry Andric RS.enterBasicBlockEnd(MBB); 689*5f757f3fSDimitry Andric RS.backward(std::next(MI)); 690e8d8bef9SDimitry Andric 691e8d8bef9SDimitry Andric // Ideally we want to have three registers for a long reg_sequence copy 692e8d8bef9SDimitry Andric // to hide 2 waitstates between v_mov_b32 and accvgpr_write. 693e8d8bef9SDimitry Andric unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass, 694e8d8bef9SDimitry Andric *MBB.getParent()); 695e8d8bef9SDimitry Andric 696e8d8bef9SDimitry Andric // Registers in the sequence are allocated contiguously so we can just 697e8d8bef9SDimitry Andric // use register number to pick one of three round-robin temps. 69881ad6265SDimitry Andric unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3; 69981ad6265SDimitry Andric Register Tmp = 70081ad6265SDimitry Andric MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getVGPRForAGPRCopy(); 70181ad6265SDimitry Andric assert(MBB.getParent()->getRegInfo().isReserved(Tmp) && 70281ad6265SDimitry Andric "VGPR used for an intermediate copy should have been reserved."); 703fe6060f1SDimitry Andric 70406c3fb27SDimitry Andric // Only loop through if there are any free registers left. We don't want to 70506c3fb27SDimitry Andric // spill. 70606c3fb27SDimitry Andric while (RegNo--) { 70706c3fb27SDimitry Andric Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, 70806c3fb27SDimitry Andric /* RestoreAfter */ false, 0, 70906c3fb27SDimitry Andric /* AllowSpill */ false); 710e8d8bef9SDimitry Andric if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) 711e8d8bef9SDimitry Andric break; 712e8d8bef9SDimitry Andric Tmp = Tmp2; 713e8d8bef9SDimitry Andric RS.setRegUsed(Tmp); 714e8d8bef9SDimitry Andric } 715e8d8bef9SDimitry Andric 716e8d8bef9SDimitry Andric // Insert copy to temporary VGPR. 717e8d8bef9SDimitry Andric unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32; 718e8d8bef9SDimitry Andric if (AMDGPU::AGPR_32RegClass.contains(SrcReg)) { 719e8d8bef9SDimitry Andric TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64; 720e8d8bef9SDimitry Andric } else { 721e8d8bef9SDimitry Andric assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 722e8d8bef9SDimitry Andric } 723e8d8bef9SDimitry Andric 724e8d8bef9SDimitry Andric MachineInstrBuilder UseBuilder = BuildMI(MBB, MI, DL, TII.get(TmpCopyOp), Tmp) 725e8d8bef9SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 726e8d8bef9SDimitry Andric if (ImpUseSuperReg) { 727e8d8bef9SDimitry Andric UseBuilder.addReg(ImpUseSuperReg, 728e8d8bef9SDimitry Andric getKillRegState(KillSrc) | RegState::Implicit); 729e8d8bef9SDimitry Andric } 730e8d8bef9SDimitry Andric 731e8d8bef9SDimitry Andric MachineInstrBuilder DefBuilder 732e8d8bef9SDimitry Andric = BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) 733e8d8bef9SDimitry Andric .addReg(Tmp, RegState::Kill); 734e8d8bef9SDimitry Andric 735e8d8bef9SDimitry Andric if (ImpDefSuperReg) 736e8d8bef9SDimitry Andric DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); 737e8d8bef9SDimitry Andric } 738e8d8bef9SDimitry Andric 739e8d8bef9SDimitry Andric static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, 740e8d8bef9SDimitry Andric MachineBasicBlock::iterator MI, const DebugLoc &DL, 741e8d8bef9SDimitry Andric MCRegister DestReg, MCRegister SrcReg, bool KillSrc, 742e8d8bef9SDimitry Andric const TargetRegisterClass *RC, bool Forward) { 743e8d8bef9SDimitry Andric const SIRegisterInfo &RI = TII.getRegisterInfo(); 744e8d8bef9SDimitry Andric ArrayRef<int16_t> BaseIndices = RI.getRegSplitParts(RC, 4); 745e8d8bef9SDimitry Andric MachineBasicBlock::iterator I = MI; 746e8d8bef9SDimitry Andric MachineInstr *FirstMI = nullptr, *LastMI = nullptr; 747e8d8bef9SDimitry Andric 748e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) { 749e8d8bef9SDimitry Andric int16_t SubIdx = BaseIndices[Idx]; 750*5f757f3fSDimitry Andric Register DestSubReg = RI.getSubReg(DestReg, SubIdx); 751*5f757f3fSDimitry Andric Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); 752*5f757f3fSDimitry Andric assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); 753e8d8bef9SDimitry Andric unsigned Opcode = AMDGPU::S_MOV_B32; 754e8d8bef9SDimitry Andric 755e8d8bef9SDimitry Andric // Is SGPR aligned? If so try to combine with next. 756*5f757f3fSDimitry Andric bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0; 757*5f757f3fSDimitry Andric bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0; 758e8d8bef9SDimitry Andric if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) { 759e8d8bef9SDimitry Andric // Can use SGPR64 copy 760e8d8bef9SDimitry Andric unsigned Channel = RI.getChannelFromSubReg(SubIdx); 761e8d8bef9SDimitry Andric SubIdx = RI.getSubRegFromChannel(Channel, 2); 762*5f757f3fSDimitry Andric DestSubReg = RI.getSubReg(DestReg, SubIdx); 763*5f757f3fSDimitry Andric SrcSubReg = RI.getSubReg(SrcReg, SubIdx); 764*5f757f3fSDimitry Andric assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); 765e8d8bef9SDimitry Andric Opcode = AMDGPU::S_MOV_B64; 766e8d8bef9SDimitry Andric Idx++; 767e8d8bef9SDimitry Andric } 768e8d8bef9SDimitry Andric 769*5f757f3fSDimitry Andric LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), DestSubReg) 770*5f757f3fSDimitry Andric .addReg(SrcSubReg) 771e8d8bef9SDimitry Andric .addReg(SrcReg, RegState::Implicit); 772e8d8bef9SDimitry Andric 773e8d8bef9SDimitry Andric if (!FirstMI) 774e8d8bef9SDimitry Andric FirstMI = LastMI; 775e8d8bef9SDimitry Andric 776e8d8bef9SDimitry Andric if (!Forward) 777e8d8bef9SDimitry Andric I--; 778e8d8bef9SDimitry Andric } 779e8d8bef9SDimitry Andric 780e8d8bef9SDimitry Andric assert(FirstMI && LastMI); 781e8d8bef9SDimitry Andric if (!Forward) 782e8d8bef9SDimitry Andric std::swap(FirstMI, LastMI); 783e8d8bef9SDimitry Andric 784e8d8bef9SDimitry Andric FirstMI->addOperand( 785e8d8bef9SDimitry Andric MachineOperand::CreateReg(DestReg, true /*IsDef*/, true /*IsImp*/)); 786e8d8bef9SDimitry Andric 787e8d8bef9SDimitry Andric if (KillSrc) 788e8d8bef9SDimitry Andric LastMI->addRegisterKilled(SrcReg, &RI); 789e8d8bef9SDimitry Andric } 790e8d8bef9SDimitry Andric 7910b57cec5SDimitry Andric void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 7920b57cec5SDimitry Andric MachineBasicBlock::iterator MI, 793480093f4SDimitry Andric const DebugLoc &DL, MCRegister DestReg, 794480093f4SDimitry Andric MCRegister SrcReg, bool KillSrc) const { 795bdd1243dSDimitry Andric const TargetRegisterClass *RC = RI.getPhysRegBaseClass(DestReg); 796*5f757f3fSDimitry Andric unsigned Size = RI.getRegSizeInBits(*RC); 797*5f757f3fSDimitry Andric const TargetRegisterClass *SrcRC = RI.getPhysRegBaseClass(SrcReg); 798*5f757f3fSDimitry Andric unsigned SrcSize = RI.getRegSizeInBits(*SrcRC); 7990b57cec5SDimitry Andric 800*5f757f3fSDimitry Andric // The rest of copyPhysReg assumes Src and Dst size are the same size. 801*5f757f3fSDimitry Andric // TODO-GFX11_16BIT If all true 16 bit instruction patterns are completed can 802*5f757f3fSDimitry Andric // we remove Fix16BitCopies and this code block? 803*5f757f3fSDimitry Andric if (Fix16BitCopies) { 804*5f757f3fSDimitry Andric if (((Size == 16) != (SrcSize == 16))) { 805*5f757f3fSDimitry Andric // Non-VGPR Src and Dst will later be expanded back to 32 bits. 806*5f757f3fSDimitry Andric assert(ST.hasTrue16BitInsts()); 807*5f757f3fSDimitry Andric MCRegister &RegToFix = (Size == 32) ? DestReg : SrcReg; 808*5f757f3fSDimitry Andric MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16); 809*5f757f3fSDimitry Andric RegToFix = SubReg; 8105ffd83dbSDimitry Andric 8115ffd83dbSDimitry Andric if (DestReg == SrcReg) { 812*5f757f3fSDimitry Andric // Identity copy. Insert empty bundle since ExpandPostRA expects an 813*5f757f3fSDimitry Andric // instruction here. 8145ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE)); 8155ffd83dbSDimitry Andric return; 8165ffd83dbSDimitry Andric } 817bdd1243dSDimitry Andric RC = RI.getPhysRegBaseClass(DestReg); 818*5f757f3fSDimitry Andric Size = RI.getRegSizeInBits(*RC); 819*5f757f3fSDimitry Andric SrcRC = RI.getPhysRegBaseClass(SrcReg); 820*5f757f3fSDimitry Andric SrcSize = RI.getRegSizeInBits(*SrcRC); 821*5f757f3fSDimitry Andric } 8225ffd83dbSDimitry Andric } 8235ffd83dbSDimitry Andric 8240b57cec5SDimitry Andric if (RC == &AMDGPU::VGPR_32RegClass) { 8250b57cec5SDimitry Andric assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || 8260b57cec5SDimitry Andric AMDGPU::SReg_32RegClass.contains(SrcReg) || 8270b57cec5SDimitry Andric AMDGPU::AGPR_32RegClass.contains(SrcReg)); 8280b57cec5SDimitry Andric unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ? 829e8d8bef9SDimitry Andric AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32; 8300b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(Opc), DestReg) 8310b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8320b57cec5SDimitry Andric return; 8330b57cec5SDimitry Andric } 8340b57cec5SDimitry Andric 8350b57cec5SDimitry Andric if (RC == &AMDGPU::SReg_32_XM0RegClass || 8360b57cec5SDimitry Andric RC == &AMDGPU::SReg_32RegClass) { 8370b57cec5SDimitry Andric if (SrcReg == AMDGPU::SCC) { 8380b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) 839480093f4SDimitry Andric .addImm(1) 8400b57cec5SDimitry Andric .addImm(0); 8410b57cec5SDimitry Andric return; 8420b57cec5SDimitry Andric } 8430b57cec5SDimitry Andric 8440b57cec5SDimitry Andric if (DestReg == AMDGPU::VCC_LO) { 8450b57cec5SDimitry Andric if (AMDGPU::SReg_32RegClass.contains(SrcReg)) { 8460b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO) 8470b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8480b57cec5SDimitry Andric } else { 8490b57cec5SDimitry Andric // FIXME: Hack until VReg_1 removed. 8500b57cec5SDimitry Andric assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 8510b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) 8520b57cec5SDimitry Andric .addImm(0) 8530b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8540b57cec5SDimitry Andric } 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric return; 8570b57cec5SDimitry Andric } 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { 8600b57cec5SDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); 8610b57cec5SDimitry Andric return; 8620b57cec5SDimitry Andric } 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 8650b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8660b57cec5SDimitry Andric return; 8670b57cec5SDimitry Andric } 8680b57cec5SDimitry Andric 8690b57cec5SDimitry Andric if (RC == &AMDGPU::SReg_64RegClass) { 8705ffd83dbSDimitry Andric if (SrcReg == AMDGPU::SCC) { 8715ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B64), DestReg) 8725ffd83dbSDimitry Andric .addImm(1) 8735ffd83dbSDimitry Andric .addImm(0); 8745ffd83dbSDimitry Andric return; 8755ffd83dbSDimitry Andric } 8765ffd83dbSDimitry Andric 8770b57cec5SDimitry Andric if (DestReg == AMDGPU::VCC) { 8780b57cec5SDimitry Andric if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 8790b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) 8800b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8810b57cec5SDimitry Andric } else { 8820b57cec5SDimitry Andric // FIXME: Hack until VReg_1 removed. 8830b57cec5SDimitry Andric assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 8840b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) 8850b57cec5SDimitry Andric .addImm(0) 8860b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8870b57cec5SDimitry Andric } 8880b57cec5SDimitry Andric 8890b57cec5SDimitry Andric return; 8900b57cec5SDimitry Andric } 8910b57cec5SDimitry Andric 8920b57cec5SDimitry Andric if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) { 8930b57cec5SDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); 8940b57cec5SDimitry Andric return; 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 8980b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8990b57cec5SDimitry Andric return; 9000b57cec5SDimitry Andric } 9010b57cec5SDimitry Andric 9020b57cec5SDimitry Andric if (DestReg == AMDGPU::SCC) { 9035ffd83dbSDimitry Andric // Copying 64-bit or 32-bit sources to SCC barely makes sense, 9045ffd83dbSDimitry Andric // but SelectionDAG emits such copies for i1 sources. 9055ffd83dbSDimitry Andric if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 906e8d8bef9SDimitry Andric // This copy can only be produced by patterns 907e8d8bef9SDimitry Andric // with explicit SCC, which are known to be enabled 908e8d8bef9SDimitry Andric // only for subtargets with S_CMP_LG_U64 present. 909e8d8bef9SDimitry Andric assert(ST.hasScalarCompareEq64()); 910e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U64)) 911e8d8bef9SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)) 912e8d8bef9SDimitry Andric .addImm(0); 913e8d8bef9SDimitry Andric } else { 9140b57cec5SDimitry Andric assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 9150b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32)) 9160b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)) 9170b57cec5SDimitry Andric .addImm(0); 918e8d8bef9SDimitry Andric } 9195ffd83dbSDimitry Andric 9200b57cec5SDimitry Andric return; 9210b57cec5SDimitry Andric } 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andric if (RC == &AMDGPU::AGPR_32RegClass) { 92481ad6265SDimitry Andric if (AMDGPU::VGPR_32RegClass.contains(SrcReg) || 92581ad6265SDimitry Andric (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) { 926e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) 9270b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 9280b57cec5SDimitry Andric return; 9290b57cec5SDimitry Andric } 9300b57cec5SDimitry Andric 931fe6060f1SDimitry Andric if (AMDGPU::AGPR_32RegClass.contains(SrcReg) && ST.hasGFX90AInsts()) { 932fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_MOV_B32), DestReg) 933fe6060f1SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 934fe6060f1SDimitry Andric return; 935fe6060f1SDimitry Andric } 936fe6060f1SDimitry Andric 937e8d8bef9SDimitry Andric // FIXME: Pass should maintain scavenger to avoid scan through the block on 938e8d8bef9SDimitry Andric // every AGPR spill. 939e8d8bef9SDimitry Andric RegScavenger RS; 940bdd1243dSDimitry Andric const bool Overlap = RI.regsOverlap(SrcReg, DestReg); 941bdd1243dSDimitry Andric indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS, Overlap); 942e8d8bef9SDimitry Andric return; 943e8d8bef9SDimitry Andric } 944e8d8bef9SDimitry Andric 945fe6060f1SDimitry Andric if (Size == 16) { 946*5f757f3fSDimitry Andric assert(AMDGPU::VGPR_16RegClass.contains(SrcReg) || 9475ffd83dbSDimitry Andric AMDGPU::SReg_LO16RegClass.contains(SrcReg) || 9485ffd83dbSDimitry Andric AMDGPU::AGPR_LO16RegClass.contains(SrcReg)); 9495ffd83dbSDimitry Andric 9505ffd83dbSDimitry Andric bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg); 9515ffd83dbSDimitry Andric bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg); 9525ffd83dbSDimitry Andric bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg); 9535ffd83dbSDimitry Andric bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg); 9545ffd83dbSDimitry Andric bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) || 9555ffd83dbSDimitry Andric AMDGPU::SReg_LO16RegClass.contains(DestReg) || 9565ffd83dbSDimitry Andric AMDGPU::AGPR_LO16RegClass.contains(DestReg); 9575ffd83dbSDimitry Andric bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || 9585ffd83dbSDimitry Andric AMDGPU::SReg_LO16RegClass.contains(SrcReg) || 9595ffd83dbSDimitry Andric AMDGPU::AGPR_LO16RegClass.contains(SrcReg); 9605ffd83dbSDimitry Andric MCRegister NewDestReg = RI.get32BitRegister(DestReg); 9615ffd83dbSDimitry Andric MCRegister NewSrcReg = RI.get32BitRegister(SrcReg); 9625ffd83dbSDimitry Andric 9635ffd83dbSDimitry Andric if (IsSGPRDst) { 9645ffd83dbSDimitry Andric if (!IsSGPRSrc) { 9655ffd83dbSDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); 9665ffd83dbSDimitry Andric return; 9675ffd83dbSDimitry Andric } 9685ffd83dbSDimitry Andric 9695ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg) 9705ffd83dbSDimitry Andric .addReg(NewSrcReg, getKillRegState(KillSrc)); 9715ffd83dbSDimitry Andric return; 9725ffd83dbSDimitry Andric } 9735ffd83dbSDimitry Andric 9745ffd83dbSDimitry Andric if (IsAGPRDst || IsAGPRSrc) { 9755ffd83dbSDimitry Andric if (!DstLow || !SrcLow) { 9765ffd83dbSDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, 9775ffd83dbSDimitry Andric "Cannot use hi16 subreg with an AGPR!"); 9785ffd83dbSDimitry Andric } 9795ffd83dbSDimitry Andric 9805ffd83dbSDimitry Andric copyPhysReg(MBB, MI, DL, NewDestReg, NewSrcReg, KillSrc); 9815ffd83dbSDimitry Andric return; 9825ffd83dbSDimitry Andric } 9835ffd83dbSDimitry Andric 984*5f757f3fSDimitry Andric if (ST.hasTrue16BitInsts()) { 985*5f757f3fSDimitry Andric if (IsSGPRSrc) { 986*5f757f3fSDimitry Andric assert(SrcLow); 987*5f757f3fSDimitry Andric SrcReg = NewSrcReg; 988*5f757f3fSDimitry Andric } 989*5f757f3fSDimitry Andric // Use the smaller instruction encoding if possible. 990*5f757f3fSDimitry Andric if (AMDGPU::VGPR_16_Lo128RegClass.contains(DestReg) && 991*5f757f3fSDimitry Andric (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.contains(SrcReg))) { 992*5f757f3fSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B16_t16_e32), DestReg) 993*5f757f3fSDimitry Andric .addReg(SrcReg); 994*5f757f3fSDimitry Andric } else { 995*5f757f3fSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B16_t16_e64), DestReg) 996*5f757f3fSDimitry Andric .addImm(0) // src0_modifiers 997*5f757f3fSDimitry Andric .addReg(SrcReg) 998*5f757f3fSDimitry Andric .addImm(0); // op_sel 999*5f757f3fSDimitry Andric } 1000*5f757f3fSDimitry Andric return; 1001*5f757f3fSDimitry Andric } 1002*5f757f3fSDimitry Andric 10035ffd83dbSDimitry Andric if (IsSGPRSrc && !ST.hasSDWAScalar()) { 10045ffd83dbSDimitry Andric if (!DstLow || !SrcLow) { 10055ffd83dbSDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, 10065ffd83dbSDimitry Andric "Cannot use hi16 subreg on VI!"); 10075ffd83dbSDimitry Andric } 10085ffd83dbSDimitry Andric 10095ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg) 10105ffd83dbSDimitry Andric .addReg(NewSrcReg, getKillRegState(KillSrc)); 10115ffd83dbSDimitry Andric return; 10125ffd83dbSDimitry Andric } 10135ffd83dbSDimitry Andric 10145ffd83dbSDimitry Andric auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg) 10155ffd83dbSDimitry Andric .addImm(0) // src0_modifiers 10165ffd83dbSDimitry Andric .addReg(NewSrcReg) 10175ffd83dbSDimitry Andric .addImm(0) // clamp 10185ffd83dbSDimitry Andric .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0 10195ffd83dbSDimitry Andric : AMDGPU::SDWA::SdwaSel::WORD_1) 10205ffd83dbSDimitry Andric .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) 10215ffd83dbSDimitry Andric .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0 10225ffd83dbSDimitry Andric : AMDGPU::SDWA::SdwaSel::WORD_1) 10235ffd83dbSDimitry Andric .addReg(NewDestReg, RegState::Implicit | RegState::Undef); 10245ffd83dbSDimitry Andric // First implicit operand is $exec. 10255ffd83dbSDimitry Andric MIB->tieOperands(0, MIB->getNumOperands() - 1); 10265ffd83dbSDimitry Andric return; 10275ffd83dbSDimitry Andric } 10285ffd83dbSDimitry Andric 1029fe6060f1SDimitry Andric if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) { 103081ad6265SDimitry Andric if (ST.hasMovB64()) { 103181ad6265SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_e32), DestReg) 103281ad6265SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 103381ad6265SDimitry Andric return; 103481ad6265SDimitry Andric } 1035*5f757f3fSDimitry Andric if (ST.hasPkMovB32()) { 1036fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg) 1037fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) 1038fe6060f1SDimitry Andric .addReg(SrcReg) 1039fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) 1040fe6060f1SDimitry Andric .addReg(SrcReg) 1041fe6060f1SDimitry Andric .addImm(0) // op_sel_lo 1042fe6060f1SDimitry Andric .addImm(0) // op_sel_hi 1043fe6060f1SDimitry Andric .addImm(0) // neg_lo 1044fe6060f1SDimitry Andric .addImm(0) // neg_hi 1045fe6060f1SDimitry Andric .addImm(0) // clamp 1046fe6060f1SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); 1047fe6060f1SDimitry Andric return; 1048fe6060f1SDimitry Andric } 1049fe6060f1SDimitry Andric } 1050fe6060f1SDimitry Andric 1051e8d8bef9SDimitry Andric const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); 10520b57cec5SDimitry Andric if (RI.isSGPRClass(RC)) { 1053fe6060f1SDimitry Andric if (!RI.isSGPRClass(SrcRC)) { 10540b57cec5SDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); 10550b57cec5SDimitry Andric return; 10560b57cec5SDimitry Andric } 105781ad6265SDimitry Andric const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg); 105881ad6265SDimitry Andric expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, CanKillSuperReg, RC, 105981ad6265SDimitry Andric Forward); 1060e8d8bef9SDimitry Andric return; 10610b57cec5SDimitry Andric } 10620b57cec5SDimitry Andric 1063fe6060f1SDimitry Andric unsigned EltSize = 4; 1064e8d8bef9SDimitry Andric unsigned Opcode = AMDGPU::V_MOV_B32_e32; 10654824e7fdSDimitry Andric if (RI.isAGPRClass(RC)) { 10660eae32dcSDimitry Andric if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC)) 10670eae32dcSDimitry Andric Opcode = AMDGPU::V_ACCVGPR_MOV_B32; 106881ad6265SDimitry Andric else if (RI.hasVGPRs(SrcRC) || 106981ad6265SDimitry Andric (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC))) 10700eae32dcSDimitry Andric Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64; 10710eae32dcSDimitry Andric else 10720eae32dcSDimitry Andric Opcode = AMDGPU::INSTRUCTION_LIST_END; 10734824e7fdSDimitry Andric } else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) { 1074e8d8bef9SDimitry Andric Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64; 1075fe6060f1SDimitry Andric } else if ((Size % 64 == 0) && RI.hasVGPRs(RC) && 1076fe6060f1SDimitry Andric (RI.isProperlyAlignedRC(*RC) && 1077fe6060f1SDimitry Andric (SrcRC == RC || RI.isSGPRClass(SrcRC)))) { 1078fe6060f1SDimitry Andric // TODO: In 96-bit case, could do a 64-bit mov and then a 32-bit mov. 107981ad6265SDimitry Andric if (ST.hasMovB64()) { 108081ad6265SDimitry Andric Opcode = AMDGPU::V_MOV_B64_e32; 108181ad6265SDimitry Andric EltSize = 8; 1082*5f757f3fSDimitry Andric } else if (ST.hasPkMovB32()) { 1083fe6060f1SDimitry Andric Opcode = AMDGPU::V_PK_MOV_B32; 1084fe6060f1SDimitry Andric EltSize = 8; 1085fe6060f1SDimitry Andric } 1086e8d8bef9SDimitry Andric } 1087e8d8bef9SDimitry Andric 1088e8d8bef9SDimitry Andric // For the cases where we need an intermediate instruction/temporary register 1089e8d8bef9SDimitry Andric // (destination is an AGPR), we need a scavenger. 1090e8d8bef9SDimitry Andric // 1091e8d8bef9SDimitry Andric // FIXME: The pass should maintain this for us so we don't have to re-scan the 1092e8d8bef9SDimitry Andric // whole block for every handled copy. 1093e8d8bef9SDimitry Andric std::unique_ptr<RegScavenger> RS; 1094e8d8bef9SDimitry Andric if (Opcode == AMDGPU::INSTRUCTION_LIST_END) 1095e8d8bef9SDimitry Andric RS.reset(new RegScavenger()); 1096e8d8bef9SDimitry Andric 1097fe6060f1SDimitry Andric ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize); 1098e8d8bef9SDimitry Andric 1099e8d8bef9SDimitry Andric // If there is an overlap, we can't kill the super-register on the last 1100e8d8bef9SDimitry Andric // instruction, since it will also kill the components made live by this def. 1101bdd1243dSDimitry Andric const bool Overlap = RI.regsOverlap(SrcReg, DestReg); 1102bdd1243dSDimitry Andric const bool CanKillSuperReg = KillSrc && !Overlap; 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { 11050b57cec5SDimitry Andric unsigned SubIdx; 11060b57cec5SDimitry Andric if (Forward) 11070b57cec5SDimitry Andric SubIdx = SubIndices[Idx]; 11080b57cec5SDimitry Andric else 11090b57cec5SDimitry Andric SubIdx = SubIndices[SubIndices.size() - Idx - 1]; 1110*5f757f3fSDimitry Andric Register DestSubReg = RI.getSubReg(DestReg, SubIdx); 1111*5f757f3fSDimitry Andric Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); 1112*5f757f3fSDimitry Andric assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); 11130b57cec5SDimitry Andric 1114bdd1243dSDimitry Andric bool IsFirstSubreg = Idx == 0; 1115e8d8bef9SDimitry Andric bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1; 11160b57cec5SDimitry Andric 1117e8d8bef9SDimitry Andric if (Opcode == AMDGPU::INSTRUCTION_LIST_END) { 1118bdd1243dSDimitry Andric Register ImpDefSuper = IsFirstSubreg ? Register(DestReg) : Register(); 1119e8d8bef9SDimitry Andric Register ImpUseSuper = SrcReg; 1120*5f757f3fSDimitry Andric indirectCopyToAGPR(*this, MBB, MI, DL, DestSubReg, SrcSubReg, UseKill, 1121*5f757f3fSDimitry Andric *RS, Overlap, ImpDefSuper, ImpUseSuper); 1122fe6060f1SDimitry Andric } else if (Opcode == AMDGPU::V_PK_MOV_B32) { 1123fe6060f1SDimitry Andric MachineInstrBuilder MIB = 1124*5f757f3fSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestSubReg) 1125fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) 1126fe6060f1SDimitry Andric .addReg(SrcSubReg) 1127fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) 1128fe6060f1SDimitry Andric .addReg(SrcSubReg) 1129fe6060f1SDimitry Andric .addImm(0) // op_sel_lo 1130fe6060f1SDimitry Andric .addImm(0) // op_sel_hi 1131fe6060f1SDimitry Andric .addImm(0) // neg_lo 1132fe6060f1SDimitry Andric .addImm(0) // neg_hi 1133fe6060f1SDimitry Andric .addImm(0) // clamp 1134fe6060f1SDimitry Andric .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); 1135bdd1243dSDimitry Andric if (IsFirstSubreg) 1136fe6060f1SDimitry Andric MIB.addReg(DestReg, RegState::Define | RegState::Implicit); 1137e8d8bef9SDimitry Andric } else { 1138e8d8bef9SDimitry Andric MachineInstrBuilder Builder = 1139*5f757f3fSDimitry Andric BuildMI(MBB, MI, DL, get(Opcode), DestSubReg).addReg(SrcSubReg); 1140bdd1243dSDimitry Andric if (IsFirstSubreg) 11410b57cec5SDimitry Andric Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); 11440b57cec5SDimitry Andric } 11450b57cec5SDimitry Andric } 1146e8d8bef9SDimitry Andric } 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric int SIInstrInfo::commuteOpcode(unsigned Opcode) const { 11490b57cec5SDimitry Andric int NewOpc; 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric // Try to map original to commuted opcode 11520b57cec5SDimitry Andric NewOpc = AMDGPU::getCommuteRev(Opcode); 11530b57cec5SDimitry Andric if (NewOpc != -1) 11540b57cec5SDimitry Andric // Check if the commuted (REV) opcode exists on the target. 11550b57cec5SDimitry Andric return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 11560b57cec5SDimitry Andric 11570b57cec5SDimitry Andric // Try to map commuted to original opcode 11580b57cec5SDimitry Andric NewOpc = AMDGPU::getCommuteOrig(Opcode); 11590b57cec5SDimitry Andric if (NewOpc != -1) 11600b57cec5SDimitry Andric // Check if the original (non-REV) opcode exists on the target. 11610b57cec5SDimitry Andric return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric return Opcode; 11640b57cec5SDimitry Andric } 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB, 11670b57cec5SDimitry Andric MachineBasicBlock::iterator MI, 1168bdd1243dSDimitry Andric const DebugLoc &DL, Register DestReg, 11690b57cec5SDimitry Andric int64_t Value) const { 11700b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 11710b57cec5SDimitry Andric const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); 11720b57cec5SDimitry Andric if (RegClass == &AMDGPU::SReg_32RegClass || 11730b57cec5SDimitry Andric RegClass == &AMDGPU::SGPR_32RegClass || 11740b57cec5SDimitry Andric RegClass == &AMDGPU::SReg_32_XM0RegClass || 11750b57cec5SDimitry Andric RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) { 11760b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 11770b57cec5SDimitry Andric .addImm(Value); 11780b57cec5SDimitry Andric return; 11790b57cec5SDimitry Andric } 11800b57cec5SDimitry Andric 11810b57cec5SDimitry Andric if (RegClass == &AMDGPU::SReg_64RegClass || 11820b57cec5SDimitry Andric RegClass == &AMDGPU::SGPR_64RegClass || 11830b57cec5SDimitry Andric RegClass == &AMDGPU::SReg_64_XEXECRegClass) { 11840b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 11850b57cec5SDimitry Andric .addImm(Value); 11860b57cec5SDimitry Andric return; 11870b57cec5SDimitry Andric } 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric if (RegClass == &AMDGPU::VGPR_32RegClass) { 11900b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 11910b57cec5SDimitry Andric .addImm(Value); 11920b57cec5SDimitry Andric return; 11930b57cec5SDimitry Andric } 1194fe6060f1SDimitry Andric if (RegClass->hasSuperClassEq(&AMDGPU::VReg_64RegClass)) { 11950b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg) 11960b57cec5SDimitry Andric .addImm(Value); 11970b57cec5SDimitry Andric return; 11980b57cec5SDimitry Andric } 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andric unsigned EltSize = 4; 12010b57cec5SDimitry Andric unsigned Opcode = AMDGPU::V_MOV_B32_e32; 12020b57cec5SDimitry Andric if (RI.isSGPRClass(RegClass)) { 12030b57cec5SDimitry Andric if (RI.getRegSizeInBits(*RegClass) > 32) { 12040b57cec5SDimitry Andric Opcode = AMDGPU::S_MOV_B64; 12050b57cec5SDimitry Andric EltSize = 8; 12060b57cec5SDimitry Andric } else { 12070b57cec5SDimitry Andric Opcode = AMDGPU::S_MOV_B32; 12080b57cec5SDimitry Andric EltSize = 4; 12090b57cec5SDimitry Andric } 12100b57cec5SDimitry Andric } 12110b57cec5SDimitry Andric 12120b57cec5SDimitry Andric ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize); 12130b57cec5SDimitry Andric for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { 12140b57cec5SDimitry Andric int64_t IdxValue = Idx == 0 ? Value : 0; 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 12175ffd83dbSDimitry Andric get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx])); 12180b57cec5SDimitry Andric Builder.addImm(IdxValue); 12190b57cec5SDimitry Andric } 12200b57cec5SDimitry Andric } 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andric const TargetRegisterClass * 12230b57cec5SDimitry Andric SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const { 12240b57cec5SDimitry Andric return &AMDGPU::VGPR_32RegClass; 12250b57cec5SDimitry Andric } 12260b57cec5SDimitry Andric 12270b57cec5SDimitry Andric void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, 12280b57cec5SDimitry Andric MachineBasicBlock::iterator I, 12295ffd83dbSDimitry Andric const DebugLoc &DL, Register DstReg, 12300b57cec5SDimitry Andric ArrayRef<MachineOperand> Cond, 12315ffd83dbSDimitry Andric Register TrueReg, 12325ffd83dbSDimitry Andric Register FalseReg) const { 12330b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 12340b57cec5SDimitry Andric const TargetRegisterClass *BoolXExecRC = 12350b57cec5SDimitry Andric RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 12360b57cec5SDimitry Andric assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass && 12370b57cec5SDimitry Andric "Not a VGPR32 reg"); 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric if (Cond.size() == 1) { 12408bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12410b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) 12420b57cec5SDimitry Andric .add(Cond[0]); 12430b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 12440b57cec5SDimitry Andric .addImm(0) 12450b57cec5SDimitry Andric .addReg(FalseReg) 12460b57cec5SDimitry Andric .addImm(0) 12470b57cec5SDimitry Andric .addReg(TrueReg) 12480b57cec5SDimitry Andric .addReg(SReg); 12490b57cec5SDimitry Andric } else if (Cond.size() == 2) { 12500b57cec5SDimitry Andric assert(Cond[0].isImm() && "Cond[0] is not an immediate"); 12510b57cec5SDimitry Andric switch (Cond[0].getImm()) { 12520b57cec5SDimitry Andric case SIInstrInfo::SCC_TRUE: { 12538bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12540b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 12550b57cec5SDimitry Andric : AMDGPU::S_CSELECT_B64), SReg) 1256480093f4SDimitry Andric .addImm(1) 12570b57cec5SDimitry Andric .addImm(0); 12580b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 12590b57cec5SDimitry Andric .addImm(0) 12600b57cec5SDimitry Andric .addReg(FalseReg) 12610b57cec5SDimitry Andric .addImm(0) 12620b57cec5SDimitry Andric .addReg(TrueReg) 12630b57cec5SDimitry Andric .addReg(SReg); 12640b57cec5SDimitry Andric break; 12650b57cec5SDimitry Andric } 12660b57cec5SDimitry Andric case SIInstrInfo::SCC_FALSE: { 12678bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12680b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 12690b57cec5SDimitry Andric : AMDGPU::S_CSELECT_B64), SReg) 12700b57cec5SDimitry Andric .addImm(0) 1271480093f4SDimitry Andric .addImm(1); 12720b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 12730b57cec5SDimitry Andric .addImm(0) 12740b57cec5SDimitry Andric .addReg(FalseReg) 12750b57cec5SDimitry Andric .addImm(0) 12760b57cec5SDimitry Andric .addReg(TrueReg) 12770b57cec5SDimitry Andric .addReg(SReg); 12780b57cec5SDimitry Andric break; 12790b57cec5SDimitry Andric } 12800b57cec5SDimitry Andric case SIInstrInfo::VCCNZ: { 12810b57cec5SDimitry Andric MachineOperand RegOp = Cond[1]; 12820b57cec5SDimitry Andric RegOp.setImplicit(false); 12838bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12840b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) 12850b57cec5SDimitry Andric .add(RegOp); 12860b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 12870b57cec5SDimitry Andric .addImm(0) 12880b57cec5SDimitry Andric .addReg(FalseReg) 12890b57cec5SDimitry Andric .addImm(0) 12900b57cec5SDimitry Andric .addReg(TrueReg) 12910b57cec5SDimitry Andric .addReg(SReg); 12920b57cec5SDimitry Andric break; 12930b57cec5SDimitry Andric } 12940b57cec5SDimitry Andric case SIInstrInfo::VCCZ: { 12950b57cec5SDimitry Andric MachineOperand RegOp = Cond[1]; 12960b57cec5SDimitry Andric RegOp.setImplicit(false); 12978bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12980b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) 12990b57cec5SDimitry Andric .add(RegOp); 13000b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 13010b57cec5SDimitry Andric .addImm(0) 13020b57cec5SDimitry Andric .addReg(TrueReg) 13030b57cec5SDimitry Andric .addImm(0) 13040b57cec5SDimitry Andric .addReg(FalseReg) 13050b57cec5SDimitry Andric .addReg(SReg); 13060b57cec5SDimitry Andric break; 13070b57cec5SDimitry Andric } 13080b57cec5SDimitry Andric case SIInstrInfo::EXECNZ: { 13098bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 13108bcb0991SDimitry Andric Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); 13110b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 13120b57cec5SDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) 13130b57cec5SDimitry Andric .addImm(0); 13140b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 13150b57cec5SDimitry Andric : AMDGPU::S_CSELECT_B64), SReg) 1316480093f4SDimitry Andric .addImm(1) 13170b57cec5SDimitry Andric .addImm(0); 13180b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 13190b57cec5SDimitry Andric .addImm(0) 13200b57cec5SDimitry Andric .addReg(FalseReg) 13210b57cec5SDimitry Andric .addImm(0) 13220b57cec5SDimitry Andric .addReg(TrueReg) 13230b57cec5SDimitry Andric .addReg(SReg); 13240b57cec5SDimitry Andric break; 13250b57cec5SDimitry Andric } 13260b57cec5SDimitry Andric case SIInstrInfo::EXECZ: { 13278bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 13288bcb0991SDimitry Andric Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); 13290b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 13300b57cec5SDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) 13310b57cec5SDimitry Andric .addImm(0); 13320b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 13330b57cec5SDimitry Andric : AMDGPU::S_CSELECT_B64), SReg) 13340b57cec5SDimitry Andric .addImm(0) 1335480093f4SDimitry Andric .addImm(1); 13360b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 13370b57cec5SDimitry Andric .addImm(0) 13380b57cec5SDimitry Andric .addReg(FalseReg) 13390b57cec5SDimitry Andric .addImm(0) 13400b57cec5SDimitry Andric .addReg(TrueReg) 13410b57cec5SDimitry Andric .addReg(SReg); 13420b57cec5SDimitry Andric llvm_unreachable("Unhandled branch predicate EXECZ"); 13430b57cec5SDimitry Andric break; 13440b57cec5SDimitry Andric } 13450b57cec5SDimitry Andric default: 13460b57cec5SDimitry Andric llvm_unreachable("invalid branch predicate"); 13470b57cec5SDimitry Andric } 13480b57cec5SDimitry Andric } else { 13490b57cec5SDimitry Andric llvm_unreachable("Can only handle Cond size 1 or 2"); 13500b57cec5SDimitry Andric } 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric 13535ffd83dbSDimitry Andric Register SIInstrInfo::insertEQ(MachineBasicBlock *MBB, 13540b57cec5SDimitry Andric MachineBasicBlock::iterator I, 13550b57cec5SDimitry Andric const DebugLoc &DL, 13565ffd83dbSDimitry Andric Register SrcReg, int Value) const { 13570b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 13588bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); 13590b57cec5SDimitry Andric BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) 13600b57cec5SDimitry Andric .addImm(Value) 13610b57cec5SDimitry Andric .addReg(SrcReg); 13620b57cec5SDimitry Andric 13630b57cec5SDimitry Andric return Reg; 13640b57cec5SDimitry Andric } 13650b57cec5SDimitry Andric 13665ffd83dbSDimitry Andric Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, 13670b57cec5SDimitry Andric MachineBasicBlock::iterator I, 13680b57cec5SDimitry Andric const DebugLoc &DL, 13695ffd83dbSDimitry Andric Register SrcReg, int Value) const { 13700b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 13718bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); 13720b57cec5SDimitry Andric BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) 13730b57cec5SDimitry Andric .addImm(Value) 13740b57cec5SDimitry Andric .addReg(SrcReg); 13750b57cec5SDimitry Andric 13760b57cec5SDimitry Andric return Reg; 13770b57cec5SDimitry Andric } 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { 13800b57cec5SDimitry Andric 13814824e7fdSDimitry Andric if (RI.isAGPRClass(DstRC)) 13820b57cec5SDimitry Andric return AMDGPU::COPY; 1383*5f757f3fSDimitry Andric if (RI.getRegSizeInBits(*DstRC) == 16) { 1384*5f757f3fSDimitry Andric // Assume hi bits are unneeded. Only _e64 true16 instructions are legal 1385*5f757f3fSDimitry Andric // before RA. 1386*5f757f3fSDimitry Andric return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64; 1387*5f757f3fSDimitry Andric } else if (RI.getRegSizeInBits(*DstRC) == 32) { 13880b57cec5SDimitry Andric return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 13890b57cec5SDimitry Andric } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { 13900b57cec5SDimitry Andric return AMDGPU::S_MOV_B64; 13910b57cec5SDimitry Andric } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { 13920b57cec5SDimitry Andric return AMDGPU::V_MOV_B64_PSEUDO; 13930b57cec5SDimitry Andric } 13940b57cec5SDimitry Andric return AMDGPU::COPY; 13950b57cec5SDimitry Andric } 13960b57cec5SDimitry Andric 1397e8d8bef9SDimitry Andric const MCInstrDesc & 1398e8d8bef9SDimitry Andric SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize, 1399e8d8bef9SDimitry Andric bool IsIndirectSrc) const { 1400e8d8bef9SDimitry Andric if (IsIndirectSrc) { 14015ffd83dbSDimitry Andric if (VecSize <= 32) // 4 bytes 1402e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1); 14035ffd83dbSDimitry Andric if (VecSize <= 64) // 8 bytes 1404e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2); 14055ffd83dbSDimitry Andric if (VecSize <= 96) // 12 bytes 1406e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3); 14075ffd83dbSDimitry Andric if (VecSize <= 128) // 16 bytes 1408e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4); 14095ffd83dbSDimitry Andric if (VecSize <= 160) // 20 bytes 1410e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5); 14115ffd83dbSDimitry Andric if (VecSize <= 256) // 32 bytes 1412e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8); 1413bdd1243dSDimitry Andric if (VecSize <= 288) // 36 bytes 1414bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9); 1415bdd1243dSDimitry Andric if (VecSize <= 320) // 40 bytes 1416bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10); 1417bdd1243dSDimitry Andric if (VecSize <= 352) // 44 bytes 1418bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11); 1419bdd1243dSDimitry Andric if (VecSize <= 384) // 48 bytes 1420bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12); 14215ffd83dbSDimitry Andric if (VecSize <= 512) // 64 bytes 1422e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16); 14235ffd83dbSDimitry Andric if (VecSize <= 1024) // 128 bytes 1424e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32); 14255ffd83dbSDimitry Andric 1426e8d8bef9SDimitry Andric llvm_unreachable("unsupported size for IndirectRegReadGPRIDX pseudos"); 14275ffd83dbSDimitry Andric } 14285ffd83dbSDimitry Andric 14295ffd83dbSDimitry Andric if (VecSize <= 32) // 4 bytes 1430e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1); 14315ffd83dbSDimitry Andric if (VecSize <= 64) // 8 bytes 1432e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2); 14335ffd83dbSDimitry Andric if (VecSize <= 96) // 12 bytes 1434e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3); 14355ffd83dbSDimitry Andric if (VecSize <= 128) // 16 bytes 1436e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4); 14375ffd83dbSDimitry Andric if (VecSize <= 160) // 20 bytes 1438e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5); 14395ffd83dbSDimitry Andric if (VecSize <= 256) // 32 bytes 1440e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8); 1441bdd1243dSDimitry Andric if (VecSize <= 288) // 36 bytes 1442bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9); 1443bdd1243dSDimitry Andric if (VecSize <= 320) // 40 bytes 1444bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10); 1445bdd1243dSDimitry Andric if (VecSize <= 352) // 44 bytes 1446bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11); 1447bdd1243dSDimitry Andric if (VecSize <= 384) // 48 bytes 1448bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12); 14495ffd83dbSDimitry Andric if (VecSize <= 512) // 64 bytes 1450e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16); 14515ffd83dbSDimitry Andric if (VecSize <= 1024) // 128 bytes 1452e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32); 14535ffd83dbSDimitry Andric 1454e8d8bef9SDimitry Andric llvm_unreachable("unsupported size for IndirectRegWriteGPRIDX pseudos"); 14555ffd83dbSDimitry Andric } 14565ffd83dbSDimitry Andric 1457e8d8bef9SDimitry Andric static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) { 1458e8d8bef9SDimitry Andric if (VecSize <= 32) // 4 bytes 1459e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1; 14605ffd83dbSDimitry Andric if (VecSize <= 64) // 8 bytes 1461e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2; 1462e8d8bef9SDimitry Andric if (VecSize <= 96) // 12 bytes 1463e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3; 14645ffd83dbSDimitry Andric if (VecSize <= 128) // 16 bytes 1465e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4; 1466e8d8bef9SDimitry Andric if (VecSize <= 160) // 20 bytes 1467e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5; 14685ffd83dbSDimitry Andric if (VecSize <= 256) // 32 bytes 1469e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8; 1470bdd1243dSDimitry Andric if (VecSize <= 288) // 36 bytes 1471bdd1243dSDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9; 1472bdd1243dSDimitry Andric if (VecSize <= 320) // 40 bytes 1473bdd1243dSDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10; 1474bdd1243dSDimitry Andric if (VecSize <= 352) // 44 bytes 1475bdd1243dSDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11; 1476bdd1243dSDimitry Andric if (VecSize <= 384) // 48 bytes 1477bdd1243dSDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12; 14785ffd83dbSDimitry Andric if (VecSize <= 512) // 64 bytes 1479e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16; 14805ffd83dbSDimitry Andric if (VecSize <= 1024) // 128 bytes 1481e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32; 14825ffd83dbSDimitry Andric 14835ffd83dbSDimitry Andric llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); 14845ffd83dbSDimitry Andric } 14855ffd83dbSDimitry Andric 1486e8d8bef9SDimitry Andric static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize) { 1487e8d8bef9SDimitry Andric if (VecSize <= 32) // 4 bytes 1488e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1; 1489e8d8bef9SDimitry Andric if (VecSize <= 64) // 8 bytes 1490e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2; 1491e8d8bef9SDimitry Andric if (VecSize <= 96) // 12 bytes 1492e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3; 1493e8d8bef9SDimitry Andric if (VecSize <= 128) // 16 bytes 1494e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4; 1495e8d8bef9SDimitry Andric if (VecSize <= 160) // 20 bytes 1496e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5; 1497e8d8bef9SDimitry Andric if (VecSize <= 256) // 32 bytes 1498e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8; 149906c3fb27SDimitry Andric if (VecSize <= 288) // 36 bytes 150006c3fb27SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9; 150106c3fb27SDimitry Andric if (VecSize <= 320) // 40 bytes 150206c3fb27SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10; 150306c3fb27SDimitry Andric if (VecSize <= 352) // 44 bytes 150406c3fb27SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11; 150506c3fb27SDimitry Andric if (VecSize <= 384) // 48 bytes 150606c3fb27SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12; 1507e8d8bef9SDimitry Andric if (VecSize <= 512) // 64 bytes 1508e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16; 1509e8d8bef9SDimitry Andric if (VecSize <= 1024) // 128 bytes 1510e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32; 1511e8d8bef9SDimitry Andric 1512e8d8bef9SDimitry Andric llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); 1513e8d8bef9SDimitry Andric } 1514e8d8bef9SDimitry Andric 1515e8d8bef9SDimitry Andric static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize) { 1516e8d8bef9SDimitry Andric if (VecSize <= 64) // 8 bytes 1517e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1; 1518e8d8bef9SDimitry Andric if (VecSize <= 128) // 16 bytes 1519e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2; 1520e8d8bef9SDimitry Andric if (VecSize <= 256) // 32 bytes 1521e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4; 1522e8d8bef9SDimitry Andric if (VecSize <= 512) // 64 bytes 1523e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8; 1524e8d8bef9SDimitry Andric if (VecSize <= 1024) // 128 bytes 1525e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16; 1526e8d8bef9SDimitry Andric 1527e8d8bef9SDimitry Andric llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); 1528e8d8bef9SDimitry Andric } 1529e8d8bef9SDimitry Andric 1530e8d8bef9SDimitry Andric const MCInstrDesc & 1531e8d8bef9SDimitry Andric SIInstrInfo::getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, 1532e8d8bef9SDimitry Andric bool IsSGPR) const { 15335ffd83dbSDimitry Andric if (IsSGPR) { 15345ffd83dbSDimitry Andric switch (EltSize) { 15355ffd83dbSDimitry Andric case 32: 1536e8d8bef9SDimitry Andric return get(getIndirectSGPRWriteMovRelPseudo32(VecSize)); 15375ffd83dbSDimitry Andric case 64: 1538e8d8bef9SDimitry Andric return get(getIndirectSGPRWriteMovRelPseudo64(VecSize)); 15395ffd83dbSDimitry Andric default: 15405ffd83dbSDimitry Andric llvm_unreachable("invalid reg indexing elt size"); 15415ffd83dbSDimitry Andric } 15425ffd83dbSDimitry Andric } 15435ffd83dbSDimitry Andric 15445ffd83dbSDimitry Andric assert(EltSize == 32 && "invalid reg indexing elt size"); 1545e8d8bef9SDimitry Andric return get(getIndirectVGPRWriteMovRelPseudoOpc(VecSize)); 15465ffd83dbSDimitry Andric } 15475ffd83dbSDimitry Andric 15480b57cec5SDimitry Andric static unsigned getSGPRSpillSaveOpcode(unsigned Size) { 15490b57cec5SDimitry Andric switch (Size) { 15500b57cec5SDimitry Andric case 4: 15510b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S32_SAVE; 15520b57cec5SDimitry Andric case 8: 15530b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S64_SAVE; 15540b57cec5SDimitry Andric case 12: 15550b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S96_SAVE; 15560b57cec5SDimitry Andric case 16: 15570b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S128_SAVE; 15580b57cec5SDimitry Andric case 20: 15590b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S160_SAVE; 15605ffd83dbSDimitry Andric case 24: 15615ffd83dbSDimitry Andric return AMDGPU::SI_SPILL_S192_SAVE; 1562fe6060f1SDimitry Andric case 28: 1563fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_S224_SAVE; 15640b57cec5SDimitry Andric case 32: 15650b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S256_SAVE; 1566bdd1243dSDimitry Andric case 36: 1567bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S288_SAVE; 1568bdd1243dSDimitry Andric case 40: 1569bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S320_SAVE; 1570bdd1243dSDimitry Andric case 44: 1571bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S352_SAVE; 1572bdd1243dSDimitry Andric case 48: 1573bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S384_SAVE; 15740b57cec5SDimitry Andric case 64: 15750b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S512_SAVE; 15760b57cec5SDimitry Andric case 128: 15770b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S1024_SAVE; 15780b57cec5SDimitry Andric default: 15790b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 15800b57cec5SDimitry Andric } 15810b57cec5SDimitry Andric } 15820b57cec5SDimitry Andric 15830b57cec5SDimitry Andric static unsigned getVGPRSpillSaveOpcode(unsigned Size) { 15840b57cec5SDimitry Andric switch (Size) { 15850b57cec5SDimitry Andric case 4: 15860b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V32_SAVE; 15870b57cec5SDimitry Andric case 8: 15880b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V64_SAVE; 15890b57cec5SDimitry Andric case 12: 15900b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V96_SAVE; 15910b57cec5SDimitry Andric case 16: 15920b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V128_SAVE; 15930b57cec5SDimitry Andric case 20: 15940b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V160_SAVE; 15955ffd83dbSDimitry Andric case 24: 15965ffd83dbSDimitry Andric return AMDGPU::SI_SPILL_V192_SAVE; 1597fe6060f1SDimitry Andric case 28: 1598fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_V224_SAVE; 15990b57cec5SDimitry Andric case 32: 16000b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V256_SAVE; 1601bdd1243dSDimitry Andric case 36: 1602bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V288_SAVE; 1603bdd1243dSDimitry Andric case 40: 1604bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V320_SAVE; 1605bdd1243dSDimitry Andric case 44: 1606bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V352_SAVE; 1607bdd1243dSDimitry Andric case 48: 1608bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V384_SAVE; 16090b57cec5SDimitry Andric case 64: 16100b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V512_SAVE; 16110b57cec5SDimitry Andric case 128: 16120b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V1024_SAVE; 16130b57cec5SDimitry Andric default: 16140b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 16150b57cec5SDimitry Andric } 16160b57cec5SDimitry Andric } 16170b57cec5SDimitry Andric 16180b57cec5SDimitry Andric static unsigned getAGPRSpillSaveOpcode(unsigned Size) { 16190b57cec5SDimitry Andric switch (Size) { 16200b57cec5SDimitry Andric case 4: 16210b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A32_SAVE; 16220b57cec5SDimitry Andric case 8: 16230b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A64_SAVE; 1624e8d8bef9SDimitry Andric case 12: 1625e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A96_SAVE; 16260b57cec5SDimitry Andric case 16: 16270b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A128_SAVE; 1628e8d8bef9SDimitry Andric case 20: 1629e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A160_SAVE; 1630e8d8bef9SDimitry Andric case 24: 1631e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A192_SAVE; 1632fe6060f1SDimitry Andric case 28: 1633fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_A224_SAVE; 1634e8d8bef9SDimitry Andric case 32: 1635e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A256_SAVE; 1636bdd1243dSDimitry Andric case 36: 1637bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A288_SAVE; 1638bdd1243dSDimitry Andric case 40: 1639bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A320_SAVE; 1640bdd1243dSDimitry Andric case 44: 1641bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A352_SAVE; 1642bdd1243dSDimitry Andric case 48: 1643bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A384_SAVE; 16440b57cec5SDimitry Andric case 64: 16450b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A512_SAVE; 16460b57cec5SDimitry Andric case 128: 16470b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A1024_SAVE; 16480b57cec5SDimitry Andric default: 16490b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 16500b57cec5SDimitry Andric } 16510b57cec5SDimitry Andric } 16520b57cec5SDimitry Andric 16530eae32dcSDimitry Andric static unsigned getAVSpillSaveOpcode(unsigned Size) { 16540eae32dcSDimitry Andric switch (Size) { 16550eae32dcSDimitry Andric case 4: 16560eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV32_SAVE; 16570eae32dcSDimitry Andric case 8: 16580eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV64_SAVE; 16590eae32dcSDimitry Andric case 12: 16600eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV96_SAVE; 16610eae32dcSDimitry Andric case 16: 16620eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV128_SAVE; 16630eae32dcSDimitry Andric case 20: 16640eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV160_SAVE; 16650eae32dcSDimitry Andric case 24: 16660eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV192_SAVE; 16670eae32dcSDimitry Andric case 28: 16680eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV224_SAVE; 16690eae32dcSDimitry Andric case 32: 16700eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV256_SAVE; 1671bdd1243dSDimitry Andric case 36: 1672bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV288_SAVE; 1673bdd1243dSDimitry Andric case 40: 1674bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV320_SAVE; 1675bdd1243dSDimitry Andric case 44: 1676bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV352_SAVE; 1677bdd1243dSDimitry Andric case 48: 1678bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV384_SAVE; 16790eae32dcSDimitry Andric case 64: 16800eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV512_SAVE; 16810eae32dcSDimitry Andric case 128: 16820eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV1024_SAVE; 16830eae32dcSDimitry Andric default: 16840eae32dcSDimitry Andric llvm_unreachable("unknown register size"); 16850eae32dcSDimitry Andric } 16860eae32dcSDimitry Andric } 16870eae32dcSDimitry Andric 1688*5f757f3fSDimitry Andric static unsigned getWWMRegSpillSaveOpcode(unsigned Size, 1689*5f757f3fSDimitry Andric bool IsVectorSuperClass) { 169006c3fb27SDimitry Andric // Currently, there is only 32-bit WWM register spills needed. 169106c3fb27SDimitry Andric if (Size != 4) 169206c3fb27SDimitry Andric llvm_unreachable("unknown wwm register spill size"); 169306c3fb27SDimitry Andric 1694*5f757f3fSDimitry Andric if (IsVectorSuperClass) 1695*5f757f3fSDimitry Andric return AMDGPU::SI_SPILL_WWM_AV32_SAVE; 1696*5f757f3fSDimitry Andric 169706c3fb27SDimitry Andric return AMDGPU::SI_SPILL_WWM_V32_SAVE; 169806c3fb27SDimitry Andric } 169906c3fb27SDimitry Andric 170006c3fb27SDimitry Andric static unsigned getVectorRegSpillSaveOpcode(Register Reg, 170106c3fb27SDimitry Andric const TargetRegisterClass *RC, 170206c3fb27SDimitry Andric unsigned Size, 170306c3fb27SDimitry Andric const SIRegisterInfo &TRI, 170406c3fb27SDimitry Andric const SIMachineFunctionInfo &MFI) { 1705*5f757f3fSDimitry Andric bool IsVectorSuperClass = TRI.isVectorSuperClass(RC); 1706*5f757f3fSDimitry Andric 170706c3fb27SDimitry Andric // Choose the right opcode if spilling a WWM register. 170806c3fb27SDimitry Andric if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) 1709*5f757f3fSDimitry Andric return getWWMRegSpillSaveOpcode(Size, IsVectorSuperClass); 171006c3fb27SDimitry Andric 1711*5f757f3fSDimitry Andric if (IsVectorSuperClass) 171206c3fb27SDimitry Andric return getAVSpillSaveOpcode(Size); 171306c3fb27SDimitry Andric 171406c3fb27SDimitry Andric return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size) 171506c3fb27SDimitry Andric : getVGPRSpillSaveOpcode(Size); 171606c3fb27SDimitry Andric } 171706c3fb27SDimitry Andric 1718bdd1243dSDimitry Andric void SIInstrInfo::storeRegToStackSlot( 1719bdd1243dSDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, 1720bdd1243dSDimitry Andric bool isKill, int FrameIndex, const TargetRegisterClass *RC, 1721bdd1243dSDimitry Andric const TargetRegisterInfo *TRI, Register VReg) const { 17220b57cec5SDimitry Andric MachineFunction *MF = MBB.getParent(); 17230b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 17240b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 17250b57cec5SDimitry Andric const DebugLoc &DL = MBB.findDebugLoc(MI); 17260b57cec5SDimitry Andric 17270b57cec5SDimitry Andric MachinePointerInfo PtrInfo 17280b57cec5SDimitry Andric = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 17295ffd83dbSDimitry Andric MachineMemOperand *MMO = MF->getMachineMemOperand( 17305ffd83dbSDimitry Andric PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FrameIndex), 17315ffd83dbSDimitry Andric FrameInfo.getObjectAlign(FrameIndex)); 17320b57cec5SDimitry Andric unsigned SpillSize = TRI->getSpillSize(*RC); 17330b57cec5SDimitry Andric 17344824e7fdSDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 17350b57cec5SDimitry Andric if (RI.isSGPRClass(RC)) { 17360b57cec5SDimitry Andric MFI->setHasSpilledSGPRs(); 1737480093f4SDimitry Andric assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled"); 17385ffd83dbSDimitry Andric assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI && 17395ffd83dbSDimitry Andric SrcReg != AMDGPU::EXEC && "exec should not be spilled"); 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric // We are only allowed to create one new instruction when spilling 17420b57cec5SDimitry Andric // registers, so we need to use pseudo instruction for spilling SGPRs. 17430b57cec5SDimitry Andric const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); 17440b57cec5SDimitry Andric 17450b57cec5SDimitry Andric // The SGPR spill/restore instructions only work on number sgprs, so we need 17460b57cec5SDimitry Andric // to make sure we are using the correct register class. 1747e8d8bef9SDimitry Andric if (SrcReg.isVirtual() && SpillSize == 4) { 17485ffd83dbSDimitry Andric MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); 17490b57cec5SDimitry Andric } 17500b57cec5SDimitry Andric 17518bcb0991SDimitry Andric BuildMI(MBB, MI, DL, OpDesc) 17520b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(isKill)) // data 17530b57cec5SDimitry Andric .addFrameIndex(FrameIndex) // addr 17540b57cec5SDimitry Andric .addMemOperand(MMO) 17550b57cec5SDimitry Andric .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); 1756e8d8bef9SDimitry Andric 17570b57cec5SDimitry Andric if (RI.spillSGPRToVGPR()) 17580b57cec5SDimitry Andric FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); 17590b57cec5SDimitry Andric return; 17600b57cec5SDimitry Andric } 17610b57cec5SDimitry Andric 176206c3fb27SDimitry Andric unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC, 176306c3fb27SDimitry Andric SpillSize, RI, *MFI); 17640b57cec5SDimitry Andric MFI->setHasSpilledVGPRs(); 17650b57cec5SDimitry Andric 1766e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(Opcode)) 1767e8d8bef9SDimitry Andric .addReg(SrcReg, getKillRegState(isKill)) // data 17680b57cec5SDimitry Andric .addFrameIndex(FrameIndex) // addr 17690b57cec5SDimitry Andric .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset 17700b57cec5SDimitry Andric .addImm(0) // offset 17710b57cec5SDimitry Andric .addMemOperand(MMO); 17720b57cec5SDimitry Andric } 17730b57cec5SDimitry Andric 17740b57cec5SDimitry Andric static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { 17750b57cec5SDimitry Andric switch (Size) { 17760b57cec5SDimitry Andric case 4: 17770b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S32_RESTORE; 17780b57cec5SDimitry Andric case 8: 17790b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S64_RESTORE; 17800b57cec5SDimitry Andric case 12: 17810b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S96_RESTORE; 17820b57cec5SDimitry Andric case 16: 17830b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S128_RESTORE; 17840b57cec5SDimitry Andric case 20: 17850b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S160_RESTORE; 17865ffd83dbSDimitry Andric case 24: 17875ffd83dbSDimitry Andric return AMDGPU::SI_SPILL_S192_RESTORE; 1788fe6060f1SDimitry Andric case 28: 1789fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_S224_RESTORE; 17900b57cec5SDimitry Andric case 32: 17910b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S256_RESTORE; 1792bdd1243dSDimitry Andric case 36: 1793bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S288_RESTORE; 1794bdd1243dSDimitry Andric case 40: 1795bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S320_RESTORE; 1796bdd1243dSDimitry Andric case 44: 1797bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S352_RESTORE; 1798bdd1243dSDimitry Andric case 48: 1799bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S384_RESTORE; 18000b57cec5SDimitry Andric case 64: 18010b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S512_RESTORE; 18020b57cec5SDimitry Andric case 128: 18030b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S1024_RESTORE; 18040b57cec5SDimitry Andric default: 18050b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 18060b57cec5SDimitry Andric } 18070b57cec5SDimitry Andric } 18080b57cec5SDimitry Andric 18090b57cec5SDimitry Andric static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { 18100b57cec5SDimitry Andric switch (Size) { 18110b57cec5SDimitry Andric case 4: 18120b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V32_RESTORE; 18130b57cec5SDimitry Andric case 8: 18140b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V64_RESTORE; 18150b57cec5SDimitry Andric case 12: 18160b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V96_RESTORE; 18170b57cec5SDimitry Andric case 16: 18180b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V128_RESTORE; 18190b57cec5SDimitry Andric case 20: 18200b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V160_RESTORE; 18215ffd83dbSDimitry Andric case 24: 18225ffd83dbSDimitry Andric return AMDGPU::SI_SPILL_V192_RESTORE; 1823fe6060f1SDimitry Andric case 28: 1824fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_V224_RESTORE; 18250b57cec5SDimitry Andric case 32: 18260b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V256_RESTORE; 1827bdd1243dSDimitry Andric case 36: 1828bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V288_RESTORE; 1829bdd1243dSDimitry Andric case 40: 1830bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V320_RESTORE; 1831bdd1243dSDimitry Andric case 44: 1832bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V352_RESTORE; 1833bdd1243dSDimitry Andric case 48: 1834bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V384_RESTORE; 18350b57cec5SDimitry Andric case 64: 18360b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V512_RESTORE; 18370b57cec5SDimitry Andric case 128: 18380b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V1024_RESTORE; 18390b57cec5SDimitry Andric default: 18400b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 18410b57cec5SDimitry Andric } 18420b57cec5SDimitry Andric } 18430b57cec5SDimitry Andric 18440b57cec5SDimitry Andric static unsigned getAGPRSpillRestoreOpcode(unsigned Size) { 18450b57cec5SDimitry Andric switch (Size) { 18460b57cec5SDimitry Andric case 4: 18470b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A32_RESTORE; 18480b57cec5SDimitry Andric case 8: 18490b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A64_RESTORE; 1850e8d8bef9SDimitry Andric case 12: 1851e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A96_RESTORE; 18520b57cec5SDimitry Andric case 16: 18530b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A128_RESTORE; 1854e8d8bef9SDimitry Andric case 20: 1855e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A160_RESTORE; 1856e8d8bef9SDimitry Andric case 24: 1857e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A192_RESTORE; 1858fe6060f1SDimitry Andric case 28: 1859fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_A224_RESTORE; 1860e8d8bef9SDimitry Andric case 32: 1861e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A256_RESTORE; 1862bdd1243dSDimitry Andric case 36: 1863bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A288_RESTORE; 1864bdd1243dSDimitry Andric case 40: 1865bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A320_RESTORE; 1866bdd1243dSDimitry Andric case 44: 1867bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A352_RESTORE; 1868bdd1243dSDimitry Andric case 48: 1869bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A384_RESTORE; 18700b57cec5SDimitry Andric case 64: 18710b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A512_RESTORE; 18720b57cec5SDimitry Andric case 128: 18730b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A1024_RESTORE; 18740b57cec5SDimitry Andric default: 18750b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 18760b57cec5SDimitry Andric } 18770b57cec5SDimitry Andric } 18780b57cec5SDimitry Andric 18790eae32dcSDimitry Andric static unsigned getAVSpillRestoreOpcode(unsigned Size) { 18800eae32dcSDimitry Andric switch (Size) { 18810eae32dcSDimitry Andric case 4: 18820eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV32_RESTORE; 18830eae32dcSDimitry Andric case 8: 18840eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV64_RESTORE; 18850eae32dcSDimitry Andric case 12: 18860eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV96_RESTORE; 18870eae32dcSDimitry Andric case 16: 18880eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV128_RESTORE; 18890eae32dcSDimitry Andric case 20: 18900eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV160_RESTORE; 18910eae32dcSDimitry Andric case 24: 18920eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV192_RESTORE; 18930eae32dcSDimitry Andric case 28: 18940eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV224_RESTORE; 18950eae32dcSDimitry Andric case 32: 18960eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV256_RESTORE; 1897bdd1243dSDimitry Andric case 36: 1898bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV288_RESTORE; 1899bdd1243dSDimitry Andric case 40: 1900bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV320_RESTORE; 1901bdd1243dSDimitry Andric case 44: 1902bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV352_RESTORE; 1903bdd1243dSDimitry Andric case 48: 1904bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV384_RESTORE; 19050eae32dcSDimitry Andric case 64: 19060eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV512_RESTORE; 19070eae32dcSDimitry Andric case 128: 19080eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV1024_RESTORE; 19090eae32dcSDimitry Andric default: 19100eae32dcSDimitry Andric llvm_unreachable("unknown register size"); 19110eae32dcSDimitry Andric } 19120eae32dcSDimitry Andric } 19130eae32dcSDimitry Andric 1914*5f757f3fSDimitry Andric static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, 1915*5f757f3fSDimitry Andric bool IsVectorSuperClass) { 191606c3fb27SDimitry Andric // Currently, there is only 32-bit WWM register spills needed. 191706c3fb27SDimitry Andric if (Size != 4) 191806c3fb27SDimitry Andric llvm_unreachable("unknown wwm register spill size"); 191906c3fb27SDimitry Andric 1920*5f757f3fSDimitry Andric if (IsVectorSuperClass) 1921*5f757f3fSDimitry Andric return AMDGPU::SI_SPILL_WWM_AV32_RESTORE; 1922*5f757f3fSDimitry Andric 192306c3fb27SDimitry Andric return AMDGPU::SI_SPILL_WWM_V32_RESTORE; 192406c3fb27SDimitry Andric } 192506c3fb27SDimitry Andric 192606c3fb27SDimitry Andric static unsigned 192706c3fb27SDimitry Andric getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, 192806c3fb27SDimitry Andric unsigned Size, const SIRegisterInfo &TRI, 192906c3fb27SDimitry Andric const SIMachineFunctionInfo &MFI) { 1930*5f757f3fSDimitry Andric bool IsVectorSuperClass = TRI.isVectorSuperClass(RC); 1931*5f757f3fSDimitry Andric 193206c3fb27SDimitry Andric // Choose the right opcode if restoring a WWM register. 193306c3fb27SDimitry Andric if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) 1934*5f757f3fSDimitry Andric return getWWMRegSpillRestoreOpcode(Size, IsVectorSuperClass); 193506c3fb27SDimitry Andric 1936*5f757f3fSDimitry Andric if (IsVectorSuperClass) 193706c3fb27SDimitry Andric return getAVSpillRestoreOpcode(Size); 193806c3fb27SDimitry Andric 193906c3fb27SDimitry Andric return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size) 194006c3fb27SDimitry Andric : getVGPRSpillRestoreOpcode(Size); 194106c3fb27SDimitry Andric } 194206c3fb27SDimitry Andric 19430b57cec5SDimitry Andric void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 19440b57cec5SDimitry Andric MachineBasicBlock::iterator MI, 19455ffd83dbSDimitry Andric Register DestReg, int FrameIndex, 19460b57cec5SDimitry Andric const TargetRegisterClass *RC, 1947bdd1243dSDimitry Andric const TargetRegisterInfo *TRI, 1948bdd1243dSDimitry Andric Register VReg) const { 19490b57cec5SDimitry Andric MachineFunction *MF = MBB.getParent(); 19500b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 19510b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 19520b57cec5SDimitry Andric const DebugLoc &DL = MBB.findDebugLoc(MI); 19530b57cec5SDimitry Andric unsigned SpillSize = TRI->getSpillSize(*RC); 19540b57cec5SDimitry Andric 19550b57cec5SDimitry Andric MachinePointerInfo PtrInfo 19560b57cec5SDimitry Andric = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 19570b57cec5SDimitry Andric 19580b57cec5SDimitry Andric MachineMemOperand *MMO = MF->getMachineMemOperand( 19595ffd83dbSDimitry Andric PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FrameIndex), 19605ffd83dbSDimitry Andric FrameInfo.getObjectAlign(FrameIndex)); 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric if (RI.isSGPRClass(RC)) { 19630b57cec5SDimitry Andric MFI->setHasSpilledSGPRs(); 1964480093f4SDimitry Andric assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into"); 19655ffd83dbSDimitry Andric assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI && 19665ffd83dbSDimitry Andric DestReg != AMDGPU::EXEC && "exec should not be spilled"); 19670b57cec5SDimitry Andric 19680b57cec5SDimitry Andric // FIXME: Maybe this should not include a memoperand because it will be 19690b57cec5SDimitry Andric // lowered to non-memory instructions. 19700b57cec5SDimitry Andric const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); 19715ffd83dbSDimitry Andric if (DestReg.isVirtual() && SpillSize == 4) { 19720b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 19735ffd83dbSDimitry Andric MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); 19740b57cec5SDimitry Andric } 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric if (RI.spillSGPRToVGPR()) 19770b57cec5SDimitry Andric FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); 19788bcb0991SDimitry Andric BuildMI(MBB, MI, DL, OpDesc, DestReg) 19790b57cec5SDimitry Andric .addFrameIndex(FrameIndex) // addr 19800b57cec5SDimitry Andric .addMemOperand(MMO) 19810b57cec5SDimitry Andric .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); 1982e8d8bef9SDimitry Andric 19830b57cec5SDimitry Andric return; 19840b57cec5SDimitry Andric } 19850b57cec5SDimitry Andric 198606c3fb27SDimitry Andric unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC, 198706c3fb27SDimitry Andric SpillSize, RI, *MFI); 1988e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(Opcode), DestReg) 1989e8d8bef9SDimitry Andric .addFrameIndex(FrameIndex) // vaddr 19900b57cec5SDimitry Andric .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset 19910b57cec5SDimitry Andric .addImm(0) // offset 19920b57cec5SDimitry Andric .addMemOperand(MMO); 19930b57cec5SDimitry Andric } 19940b57cec5SDimitry Andric 19950b57cec5SDimitry Andric void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, 19960b57cec5SDimitry Andric MachineBasicBlock::iterator MI) const { 1997e8d8bef9SDimitry Andric insertNoops(MBB, MI, 1); 1998e8d8bef9SDimitry Andric } 1999e8d8bef9SDimitry Andric 2000e8d8bef9SDimitry Andric void SIInstrInfo::insertNoops(MachineBasicBlock &MBB, 2001e8d8bef9SDimitry Andric MachineBasicBlock::iterator MI, 2002e8d8bef9SDimitry Andric unsigned Quantity) const { 2003e8d8bef9SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MI); 2004e8d8bef9SDimitry Andric while (Quantity > 0) { 2005e8d8bef9SDimitry Andric unsigned Arg = std::min(Quantity, 8u); 2006e8d8bef9SDimitry Andric Quantity -= Arg; 2007e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg - 1); 2008e8d8bef9SDimitry Andric } 20090b57cec5SDimitry Andric } 20100b57cec5SDimitry Andric 20110b57cec5SDimitry Andric void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { 20120b57cec5SDimitry Andric auto MF = MBB.getParent(); 20130b57cec5SDimitry Andric SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 20140b57cec5SDimitry Andric 20150b57cec5SDimitry Andric assert(Info->isEntryFunction()); 20160b57cec5SDimitry Andric 20170b57cec5SDimitry Andric if (MBB.succ_empty()) { 20180b57cec5SDimitry Andric bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); 20190b57cec5SDimitry Andric if (HasNoTerminator) { 20200b57cec5SDimitry Andric if (Info->returnsVoid()) { 20210b57cec5SDimitry Andric BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::S_ENDPGM)).addImm(0); 20220b57cec5SDimitry Andric } else { 20230b57cec5SDimitry Andric BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::SI_RETURN_TO_EPILOG)); 20240b57cec5SDimitry Andric } 20250b57cec5SDimitry Andric } 20260b57cec5SDimitry Andric } 20270b57cec5SDimitry Andric } 20280b57cec5SDimitry Andric 20290b57cec5SDimitry Andric unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) { 20300b57cec5SDimitry Andric switch (MI.getOpcode()) { 2031349cc55cSDimitry Andric default: 2032349cc55cSDimitry Andric if (MI.isMetaInstruction()) 2033349cc55cSDimitry Andric return 0; 2034349cc55cSDimitry Andric return 1; // FIXME: Do wait states equal cycles? 20350b57cec5SDimitry Andric 20360b57cec5SDimitry Andric case AMDGPU::S_NOP: 20370b57cec5SDimitry Andric return MI.getOperand(0).getImm() + 1; 2038349cc55cSDimitry Andric // SI_RETURN_TO_EPILOG is a fallthrough to code outside of the function. The 2039349cc55cSDimitry Andric // hazard, even if one exist, won't really be visible. Should we handle it? 20400b57cec5SDimitry Andric } 20410b57cec5SDimitry Andric } 20420b57cec5SDimitry Andric 20430b57cec5SDimitry Andric bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 2044fe6060f1SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 20450b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 20460b57cec5SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MI); 20470b57cec5SDimitry Andric switch (MI.getOpcode()) { 20480b57cec5SDimitry Andric default: return TargetInstrInfo::expandPostRAPseudo(MI); 20490b57cec5SDimitry Andric case AMDGPU::S_MOV_B64_term: 20500b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20510b57cec5SDimitry Andric // register allocation. 20520b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_MOV_B64)); 20530b57cec5SDimitry Andric break; 20540b57cec5SDimitry Andric 20550b57cec5SDimitry Andric case AMDGPU::S_MOV_B32_term: 20560b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20570b57cec5SDimitry Andric // register allocation. 20580b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_MOV_B32)); 20590b57cec5SDimitry Andric break; 20600b57cec5SDimitry Andric 20610b57cec5SDimitry Andric case AMDGPU::S_XOR_B64_term: 20620b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20630b57cec5SDimitry Andric // register allocation. 20640b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_XOR_B64)); 20650b57cec5SDimitry Andric break; 20660b57cec5SDimitry Andric 20670b57cec5SDimitry Andric case AMDGPU::S_XOR_B32_term: 20680b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20690b57cec5SDimitry Andric // register allocation. 20700b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_XOR_B32)); 20710b57cec5SDimitry Andric break; 2072e8d8bef9SDimitry Andric case AMDGPU::S_OR_B64_term: 2073e8d8bef9SDimitry Andric // This is only a terminator to get the correct spill code placement during 2074e8d8bef9SDimitry Andric // register allocation. 2075e8d8bef9SDimitry Andric MI.setDesc(get(AMDGPU::S_OR_B64)); 2076e8d8bef9SDimitry Andric break; 20770b57cec5SDimitry Andric case AMDGPU::S_OR_B32_term: 20780b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20790b57cec5SDimitry Andric // register allocation. 20800b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_OR_B32)); 20810b57cec5SDimitry Andric break; 20820b57cec5SDimitry Andric 20830b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B64_term: 20840b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20850b57cec5SDimitry Andric // register allocation. 20860b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_ANDN2_B64)); 20870b57cec5SDimitry Andric break; 20880b57cec5SDimitry Andric 20890b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B32_term: 20900b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20910b57cec5SDimitry Andric // register allocation. 20920b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_ANDN2_B32)); 20930b57cec5SDimitry Andric break; 20940b57cec5SDimitry Andric 2095fe6060f1SDimitry Andric case AMDGPU::S_AND_B64_term: 2096fe6060f1SDimitry Andric // This is only a terminator to get the correct spill code placement during 2097fe6060f1SDimitry Andric // register allocation. 2098fe6060f1SDimitry Andric MI.setDesc(get(AMDGPU::S_AND_B64)); 2099fe6060f1SDimitry Andric break; 2100fe6060f1SDimitry Andric 2101fe6060f1SDimitry Andric case AMDGPU::S_AND_B32_term: 2102fe6060f1SDimitry Andric // This is only a terminator to get the correct spill code placement during 2103fe6060f1SDimitry Andric // register allocation. 2104fe6060f1SDimitry Andric MI.setDesc(get(AMDGPU::S_AND_B32)); 2105fe6060f1SDimitry Andric break; 2106fe6060f1SDimitry Andric 210706c3fb27SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B64_term: 210806c3fb27SDimitry Andric // This is only a terminator to get the correct spill code placement during 210906c3fb27SDimitry Andric // register allocation. 211006c3fb27SDimitry Andric MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B64)); 211106c3fb27SDimitry Andric break; 211206c3fb27SDimitry Andric 211306c3fb27SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B32_term: 211406c3fb27SDimitry Andric // This is only a terminator to get the correct spill code placement during 211506c3fb27SDimitry Andric // register allocation. 211606c3fb27SDimitry Andric MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B32)); 211706c3fb27SDimitry Andric break; 211806c3fb27SDimitry Andric 2119*5f757f3fSDimitry Andric case AMDGPU::SI_SPILL_S32_TO_VGPR: 2120*5f757f3fSDimitry Andric MI.setDesc(get(AMDGPU::V_WRITELANE_B32)); 2121*5f757f3fSDimitry Andric break; 2122*5f757f3fSDimitry Andric 2123*5f757f3fSDimitry Andric case AMDGPU::SI_RESTORE_S32_FROM_VGPR: 2124*5f757f3fSDimitry Andric MI.setDesc(get(AMDGPU::V_READLANE_B32)); 2125*5f757f3fSDimitry Andric break; 2126*5f757f3fSDimitry Andric 21270b57cec5SDimitry Andric case AMDGPU::V_MOV_B64_PSEUDO: { 21288bcb0991SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 21298bcb0991SDimitry Andric Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 21308bcb0991SDimitry Andric Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 21310b57cec5SDimitry Andric 21320b57cec5SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(1); 21330b57cec5SDimitry Andric // FIXME: Will this work for 64-bit floating point immediates? 21340b57cec5SDimitry Andric assert(!SrcOp.isFPImm()); 213581ad6265SDimitry Andric if (ST.hasMovB64()) { 213681ad6265SDimitry Andric MI.setDesc(get(AMDGPU::V_MOV_B64_e32)); 2137bdd1243dSDimitry Andric if (SrcOp.isReg() || isInlineConstant(MI, 1) || 2138bdd1243dSDimitry Andric isUInt<32>(SrcOp.getImm())) 213981ad6265SDimitry Andric break; 214081ad6265SDimitry Andric } 21410b57cec5SDimitry Andric if (SrcOp.isImm()) { 21420b57cec5SDimitry Andric APInt Imm(64, SrcOp.getImm()); 2143fe6060f1SDimitry Andric APInt Lo(32, Imm.getLoBits(32).getZExtValue()); 2144fe6060f1SDimitry Andric APInt Hi(32, Imm.getHiBits(32).getZExtValue()); 2145*5f757f3fSDimitry Andric if (ST.hasPkMovB32() && Lo == Hi && isInlineConstant(Lo)) { 2146fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) 2147fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) 2148fe6060f1SDimitry Andric .addImm(Lo.getSExtValue()) 2149fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) 2150fe6060f1SDimitry Andric .addImm(Lo.getSExtValue()) 2151fe6060f1SDimitry Andric .addImm(0) // op_sel_lo 2152fe6060f1SDimitry Andric .addImm(0) // op_sel_hi 2153fe6060f1SDimitry Andric .addImm(0) // neg_lo 2154fe6060f1SDimitry Andric .addImm(0) // neg_hi 2155fe6060f1SDimitry Andric .addImm(0); // clamp 2156fe6060f1SDimitry Andric } else { 21570b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 2158fe6060f1SDimitry Andric .addImm(Lo.getSExtValue()) 21590b57cec5SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 21600b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 2161fe6060f1SDimitry Andric .addImm(Hi.getSExtValue()) 21620b57cec5SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 2163fe6060f1SDimitry Andric } 21640b57cec5SDimitry Andric } else { 21650b57cec5SDimitry Andric assert(SrcOp.isReg()); 2166*5f757f3fSDimitry Andric if (ST.hasPkMovB32() && 2167fe6060f1SDimitry Andric !RI.isAGPR(MBB.getParent()->getRegInfo(), SrcOp.getReg())) { 2168fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) 2169fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) // src0_mod 2170fe6060f1SDimitry Andric .addReg(SrcOp.getReg()) 2171fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) // src1_mod 2172fe6060f1SDimitry Andric .addReg(SrcOp.getReg()) 2173fe6060f1SDimitry Andric .addImm(0) // op_sel_lo 2174fe6060f1SDimitry Andric .addImm(0) // op_sel_hi 2175fe6060f1SDimitry Andric .addImm(0) // neg_lo 2176fe6060f1SDimitry Andric .addImm(0) // neg_hi 2177fe6060f1SDimitry Andric .addImm(0); // clamp 2178fe6060f1SDimitry Andric } else { 21790b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 21800b57cec5SDimitry Andric .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) 21810b57cec5SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 21820b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 21830b57cec5SDimitry Andric .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) 21840b57cec5SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 21850b57cec5SDimitry Andric } 2186fe6060f1SDimitry Andric } 21870b57cec5SDimitry Andric MI.eraseFromParent(); 21880b57cec5SDimitry Andric break; 21890b57cec5SDimitry Andric } 21908bcb0991SDimitry Andric case AMDGPU::V_MOV_B64_DPP_PSEUDO: { 21918bcb0991SDimitry Andric expandMovDPP64(MI); 21928bcb0991SDimitry Andric break; 21938bcb0991SDimitry Andric } 2194fe6060f1SDimitry Andric case AMDGPU::S_MOV_B64_IMM_PSEUDO: { 2195fe6060f1SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(1); 2196fe6060f1SDimitry Andric assert(!SrcOp.isFPImm()); 2197fe6060f1SDimitry Andric APInt Imm(64, SrcOp.getImm()); 2198fe6060f1SDimitry Andric if (Imm.isIntN(32) || isInlineConstant(Imm)) { 2199fe6060f1SDimitry Andric MI.setDesc(get(AMDGPU::S_MOV_B64)); 2200fe6060f1SDimitry Andric break; 2201fe6060f1SDimitry Andric } 2202fe6060f1SDimitry Andric 2203fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2204fe6060f1SDimitry Andric Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 2205fe6060f1SDimitry Andric Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 2206fe6060f1SDimitry Andric 2207fe6060f1SDimitry Andric APInt Lo(32, Imm.getLoBits(32).getZExtValue()); 2208fe6060f1SDimitry Andric APInt Hi(32, Imm.getHiBits(32).getZExtValue()); 2209fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstLo) 2210fe6060f1SDimitry Andric .addImm(Lo.getSExtValue()) 2211fe6060f1SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 2212fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstHi) 2213fe6060f1SDimitry Andric .addImm(Hi.getSExtValue()) 2214fe6060f1SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 2215fe6060f1SDimitry Andric MI.eraseFromParent(); 2216fe6060f1SDimitry Andric break; 2217fe6060f1SDimitry Andric } 22180b57cec5SDimitry Andric case AMDGPU::V_SET_INACTIVE_B32: { 22190b57cec5SDimitry Andric unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; 22200b57cec5SDimitry Andric unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 222181ad6265SDimitry Andric // FIXME: We may possibly optimize the COPY once we find ways to make LLVM 222281ad6265SDimitry Andric // optimizations (mainly Register Coalescer) aware of WWM register liveness. 222381ad6265SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) 222481ad6265SDimitry Andric .add(MI.getOperand(1)); 2225fe6060f1SDimitry Andric auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); 2226fe6060f1SDimitry Andric FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten 22270b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) 22280b57cec5SDimitry Andric .add(MI.getOperand(2)); 22290b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(NotOpc), Exec) 22300b57cec5SDimitry Andric .addReg(Exec); 22310b57cec5SDimitry Andric MI.eraseFromParent(); 22320b57cec5SDimitry Andric break; 22330b57cec5SDimitry Andric } 22340b57cec5SDimitry Andric case AMDGPU::V_SET_INACTIVE_B64: { 22350b57cec5SDimitry Andric unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; 22360b57cec5SDimitry Andric unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 223781ad6265SDimitry Andric MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), 223881ad6265SDimitry Andric MI.getOperand(0).getReg()) 223981ad6265SDimitry Andric .add(MI.getOperand(1)); 224081ad6265SDimitry Andric expandPostRAPseudo(*Copy); 2241fe6060f1SDimitry Andric auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); 2242fe6060f1SDimitry Andric FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten 224381ad6265SDimitry Andric Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), 22440b57cec5SDimitry Andric MI.getOperand(0).getReg()) 22450b57cec5SDimitry Andric .add(MI.getOperand(2)); 22460b57cec5SDimitry Andric expandPostRAPseudo(*Copy); 22470b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(NotOpc), Exec) 22480b57cec5SDimitry Andric .addReg(Exec); 22490b57cec5SDimitry Andric MI.eraseFromParent(); 22500b57cec5SDimitry Andric break; 22510b57cec5SDimitry Andric } 2252e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1: 2253e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2: 2254e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3: 2255e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4: 2256e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5: 2257e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8: 2258bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9: 2259bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10: 2260bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11: 2261bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12: 2262e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16: 2263e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32: 2264e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1: 2265e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2: 2266e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3: 2267e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4: 2268e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5: 2269e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8: 227006c3fb27SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9: 227106c3fb27SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10: 227206c3fb27SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11: 227306c3fb27SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12: 2274e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16: 2275e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32: 2276e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1: 2277e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2: 2278e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4: 2279e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8: 2280e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: { 22815ffd83dbSDimitry Andric const TargetRegisterClass *EltRC = getOpRegClass(MI, 2); 22825ffd83dbSDimitry Andric 22835ffd83dbSDimitry Andric unsigned Opc; 22845ffd83dbSDimitry Andric if (RI.hasVGPRs(EltRC)) { 2285e8d8bef9SDimitry Andric Opc = AMDGPU::V_MOVRELD_B32_e32; 22865ffd83dbSDimitry Andric } else { 2287e8d8bef9SDimitry Andric Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64 2288e8d8bef9SDimitry Andric : AMDGPU::S_MOVRELD_B32; 22895ffd83dbSDimitry Andric } 22905ffd83dbSDimitry Andric 22915ffd83dbSDimitry Andric const MCInstrDesc &OpDesc = get(Opc); 22928bcb0991SDimitry Andric Register VecReg = MI.getOperand(0).getReg(); 22930b57cec5SDimitry Andric bool IsUndef = MI.getOperand(1).isUndef(); 22945ffd83dbSDimitry Andric unsigned SubReg = MI.getOperand(3).getImm(); 22950b57cec5SDimitry Andric assert(VecReg == MI.getOperand(1).getReg()); 22960b57cec5SDimitry Andric 22975ffd83dbSDimitry Andric MachineInstrBuilder MIB = 22985ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, OpDesc) 22990b57cec5SDimitry Andric .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) 23000b57cec5SDimitry Andric .add(MI.getOperand(2)) 23010b57cec5SDimitry Andric .addReg(VecReg, RegState::ImplicitDefine) 23025ffd83dbSDimitry Andric .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)); 23030b57cec5SDimitry Andric 23040b57cec5SDimitry Andric const int ImpDefIdx = 2305bdd1243dSDimitry Andric OpDesc.getNumOperands() + OpDesc.implicit_uses().size(); 23060b57cec5SDimitry Andric const int ImpUseIdx = ImpDefIdx + 1; 23075ffd83dbSDimitry Andric MIB->tieOperands(ImpDefIdx, ImpUseIdx); 23080b57cec5SDimitry Andric MI.eraseFromParent(); 23090b57cec5SDimitry Andric break; 23100b57cec5SDimitry Andric } 2311e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1: 2312e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2: 2313e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3: 2314e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4: 2315e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5: 2316e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8: 2317bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9: 2318bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10: 2319bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11: 2320bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12: 2321e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16: 2322e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: { 2323e8d8bef9SDimitry Andric assert(ST.useVGPRIndexMode()); 2324e8d8bef9SDimitry Andric Register VecReg = MI.getOperand(0).getReg(); 2325e8d8bef9SDimitry Andric bool IsUndef = MI.getOperand(1).isUndef(); 2326e8d8bef9SDimitry Andric Register Idx = MI.getOperand(3).getReg(); 2327e8d8bef9SDimitry Andric Register SubReg = MI.getOperand(4).getImm(); 2328e8d8bef9SDimitry Andric 2329e8d8bef9SDimitry Andric MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) 2330e8d8bef9SDimitry Andric .addReg(Idx) 2331e8d8bef9SDimitry Andric .addImm(AMDGPU::VGPRIndexMode::DST_ENABLE); 2332e8d8bef9SDimitry Andric SetOn->getOperand(3).setIsUndef(); 2333e8d8bef9SDimitry Andric 2334349cc55cSDimitry Andric const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect_write); 2335e8d8bef9SDimitry Andric MachineInstrBuilder MIB = 2336e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, OpDesc) 2337e8d8bef9SDimitry Andric .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) 2338e8d8bef9SDimitry Andric .add(MI.getOperand(2)) 2339e8d8bef9SDimitry Andric .addReg(VecReg, RegState::ImplicitDefine) 2340e8d8bef9SDimitry Andric .addReg(VecReg, 2341e8d8bef9SDimitry Andric RegState::Implicit | (IsUndef ? RegState::Undef : 0)); 2342e8d8bef9SDimitry Andric 2343bdd1243dSDimitry Andric const int ImpDefIdx = 2344bdd1243dSDimitry Andric OpDesc.getNumOperands() + OpDesc.implicit_uses().size(); 2345e8d8bef9SDimitry Andric const int ImpUseIdx = ImpDefIdx + 1; 2346e8d8bef9SDimitry Andric MIB->tieOperands(ImpDefIdx, ImpUseIdx); 2347e8d8bef9SDimitry Andric 2348e8d8bef9SDimitry Andric MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); 2349e8d8bef9SDimitry Andric 2350e8d8bef9SDimitry Andric finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); 2351e8d8bef9SDimitry Andric 2352e8d8bef9SDimitry Andric MI.eraseFromParent(); 2353e8d8bef9SDimitry Andric break; 2354e8d8bef9SDimitry Andric } 2355e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1: 2356e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2: 2357e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3: 2358e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4: 2359e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5: 2360e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8: 2361bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9: 2362bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10: 2363bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11: 2364bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12: 2365e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16: 2366e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: { 2367e8d8bef9SDimitry Andric assert(ST.useVGPRIndexMode()); 2368e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2369e8d8bef9SDimitry Andric Register VecReg = MI.getOperand(1).getReg(); 2370e8d8bef9SDimitry Andric bool IsUndef = MI.getOperand(1).isUndef(); 2371e8d8bef9SDimitry Andric Register Idx = MI.getOperand(2).getReg(); 2372e8d8bef9SDimitry Andric Register SubReg = MI.getOperand(3).getImm(); 2373e8d8bef9SDimitry Andric 2374e8d8bef9SDimitry Andric MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) 2375e8d8bef9SDimitry Andric .addReg(Idx) 2376e8d8bef9SDimitry Andric .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE); 2377e8d8bef9SDimitry Andric SetOn->getOperand(3).setIsUndef(); 2378e8d8bef9SDimitry Andric 2379349cc55cSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_indirect_read)) 2380e8d8bef9SDimitry Andric .addDef(Dst) 2381e8d8bef9SDimitry Andric .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) 2382349cc55cSDimitry Andric .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)); 2383e8d8bef9SDimitry Andric 2384e8d8bef9SDimitry Andric MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); 2385e8d8bef9SDimitry Andric 2386e8d8bef9SDimitry Andric finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); 2387e8d8bef9SDimitry Andric 2388e8d8bef9SDimitry Andric MI.eraseFromParent(); 2389e8d8bef9SDimitry Andric break; 2390e8d8bef9SDimitry Andric } 23910b57cec5SDimitry Andric case AMDGPU::SI_PC_ADD_REL_OFFSET: { 23920b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 23938bcb0991SDimitry Andric Register Reg = MI.getOperand(0).getReg(); 23948bcb0991SDimitry Andric Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0); 23958bcb0991SDimitry Andric Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1); 2396*5f757f3fSDimitry Andric MachineOperand OpLo = MI.getOperand(1); 2397*5f757f3fSDimitry Andric MachineOperand OpHi = MI.getOperand(2); 23980b57cec5SDimitry Andric 23990b57cec5SDimitry Andric // Create a bundle so these instructions won't be re-ordered by the 24000b57cec5SDimitry Andric // post-RA scheduler. 24010b57cec5SDimitry Andric MIBundleBuilder Bundler(MBB, MI); 24020b57cec5SDimitry Andric Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); 24030b57cec5SDimitry Andric 2404*5f757f3fSDimitry Andric // What we want here is an offset from the value returned by s_getpc (which 2405*5f757f3fSDimitry Andric // is the address of the s_add_u32 instruction) to the global variable, but 2406*5f757f3fSDimitry Andric // since the encoding of $symbol starts 4 bytes after the start of the 2407*5f757f3fSDimitry Andric // s_add_u32 instruction, we end up with an offset that is 4 bytes too 2408*5f757f3fSDimitry Andric // small. This requires us to add 4 to the global variable offset in order 2409*5f757f3fSDimitry Andric // to compute the correct address. Similarly for the s_addc_u32 instruction, 2410*5f757f3fSDimitry Andric // the encoding of $symbol starts 12 bytes after the start of the s_add_u32 2411*5f757f3fSDimitry Andric // instruction. 24120b57cec5SDimitry Andric 2413*5f757f3fSDimitry Andric if (OpLo.isGlobal()) 2414*5f757f3fSDimitry Andric OpLo.setOffset(OpLo.getOffset() + 4); 2415*5f757f3fSDimitry Andric Bundler.append( 2416*5f757f3fSDimitry Andric BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo)); 24170b57cec5SDimitry Andric 2418*5f757f3fSDimitry Andric if (OpHi.isGlobal()) 2419*5f757f3fSDimitry Andric OpHi.setOffset(OpHi.getOffset() + 12); 2420*5f757f3fSDimitry Andric Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) 2421*5f757f3fSDimitry Andric .addReg(RegHi) 2422*5f757f3fSDimitry Andric .add(OpHi)); 2423*5f757f3fSDimitry Andric 24240b57cec5SDimitry Andric finalizeBundle(MBB, Bundler.begin()); 24250b57cec5SDimitry Andric 24260b57cec5SDimitry Andric MI.eraseFromParent(); 24270b57cec5SDimitry Andric break; 24280b57cec5SDimitry Andric } 2429fe6060f1SDimitry Andric case AMDGPU::ENTER_STRICT_WWM: { 24300b57cec5SDimitry Andric // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when 2431fe6060f1SDimitry Andric // Whole Wave Mode is entered. 24320b57cec5SDimitry Andric MI.setDesc(get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 24330b57cec5SDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B64)); 24340b57cec5SDimitry Andric break; 24350b57cec5SDimitry Andric } 2436fe6060f1SDimitry Andric case AMDGPU::ENTER_STRICT_WQM: { 24370b57cec5SDimitry Andric // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when 2438fe6060f1SDimitry Andric // STRICT_WQM is entered. 2439fe6060f1SDimitry Andric const unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 2440fe6060f1SDimitry Andric const unsigned WQMOp = ST.isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64; 2441fe6060f1SDimitry Andric const unsigned MovOp = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 2442fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(MovOp), MI.getOperand(0).getReg()).addReg(Exec); 2443fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(WQMOp), Exec).addReg(Exec); 2444fe6060f1SDimitry Andric 2445fe6060f1SDimitry Andric MI.eraseFromParent(); 2446fe6060f1SDimitry Andric break; 2447fe6060f1SDimitry Andric } 2448fe6060f1SDimitry Andric case AMDGPU::EXIT_STRICT_WWM: 2449fe6060f1SDimitry Andric case AMDGPU::EXIT_STRICT_WQM: { 2450fe6060f1SDimitry Andric // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when 2451fe6060f1SDimitry Andric // WWM/STICT_WQM is exited. 24520b57cec5SDimitry Andric MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); 24530b57cec5SDimitry Andric break; 24540b57cec5SDimitry Andric } 2455bdd1243dSDimitry Andric case AMDGPU::ENTER_PSEUDO_WM: 2456bdd1243dSDimitry Andric case AMDGPU::EXIT_PSEUDO_WM: { 2457bdd1243dSDimitry Andric // These do nothing. 2458bdd1243dSDimitry Andric MI.eraseFromParent(); 2459bdd1243dSDimitry Andric break; 2460bdd1243dSDimitry Andric } 246181ad6265SDimitry Andric case AMDGPU::SI_RETURN: { 246281ad6265SDimitry Andric const MachineFunction *MF = MBB.getParent(); 246381ad6265SDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 246481ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 246581ad6265SDimitry Andric // Hiding the return address use with SI_RETURN may lead to extra kills in 246681ad6265SDimitry Andric // the function and missing live-ins. We are fine in practice because callee 246781ad6265SDimitry Andric // saved register handling ensures the register value is restored before 246881ad6265SDimitry Andric // RET, but we need the undef flag here to appease the MachineVerifier 246981ad6265SDimitry Andric // liveness checks. 247081ad6265SDimitry Andric MachineInstrBuilder MIB = 247181ad6265SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_SETPC_B64_return)) 247281ad6265SDimitry Andric .addReg(TRI->getReturnAddressReg(*MF), RegState::Undef); 247381ad6265SDimitry Andric 247481ad6265SDimitry Andric MIB.copyImplicitOps(MI); 247581ad6265SDimitry Andric MI.eraseFromParent(); 247681ad6265SDimitry Andric break; 247781ad6265SDimitry Andric } 24780b57cec5SDimitry Andric } 24790b57cec5SDimitry Andric return true; 24800b57cec5SDimitry Andric } 24810b57cec5SDimitry Andric 2482*5f757f3fSDimitry Andric void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, 2483*5f757f3fSDimitry Andric MachineBasicBlock::iterator I, Register DestReg, 2484*5f757f3fSDimitry Andric unsigned SubIdx, const MachineInstr &Orig, 2485*5f757f3fSDimitry Andric const TargetRegisterInfo &RI) const { 2486*5f757f3fSDimitry Andric 2487*5f757f3fSDimitry Andric // Try shrinking the instruction to remat only the part needed for current 2488*5f757f3fSDimitry Andric // context. 2489*5f757f3fSDimitry Andric // TODO: Handle more cases. 2490*5f757f3fSDimitry Andric unsigned Opcode = Orig.getOpcode(); 2491*5f757f3fSDimitry Andric switch (Opcode) { 2492*5f757f3fSDimitry Andric case AMDGPU::S_LOAD_DWORDX16_IMM: 2493*5f757f3fSDimitry Andric case AMDGPU::S_LOAD_DWORDX8_IMM: { 2494*5f757f3fSDimitry Andric if (SubIdx != 0) 2495*5f757f3fSDimitry Andric break; 2496*5f757f3fSDimitry Andric 2497*5f757f3fSDimitry Andric if (I == MBB.end()) 2498*5f757f3fSDimitry Andric break; 2499*5f757f3fSDimitry Andric 2500*5f757f3fSDimitry Andric if (I->isBundled()) 2501*5f757f3fSDimitry Andric break; 2502*5f757f3fSDimitry Andric 2503*5f757f3fSDimitry Andric // Look for a single use of the register that is also a subreg. 2504*5f757f3fSDimitry Andric Register RegToFind = Orig.getOperand(0).getReg(); 2505*5f757f3fSDimitry Andric MachineOperand *UseMO = nullptr; 2506*5f757f3fSDimitry Andric for (auto &CandMO : I->operands()) { 2507*5f757f3fSDimitry Andric if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef()) 2508*5f757f3fSDimitry Andric continue; 2509*5f757f3fSDimitry Andric if (UseMO) { 2510*5f757f3fSDimitry Andric UseMO = nullptr; 2511*5f757f3fSDimitry Andric break; 2512*5f757f3fSDimitry Andric } 2513*5f757f3fSDimitry Andric UseMO = &CandMO; 2514*5f757f3fSDimitry Andric } 2515*5f757f3fSDimitry Andric if (!UseMO || UseMO->getSubReg() == AMDGPU::NoSubRegister) 2516*5f757f3fSDimitry Andric break; 2517*5f757f3fSDimitry Andric 2518*5f757f3fSDimitry Andric unsigned Offset = RI.getSubRegIdxOffset(UseMO->getSubReg()); 2519*5f757f3fSDimitry Andric unsigned SubregSize = RI.getSubRegIdxSize(UseMO->getSubReg()); 2520*5f757f3fSDimitry Andric 2521*5f757f3fSDimitry Andric MachineFunction *MF = MBB.getParent(); 2522*5f757f3fSDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 2523*5f757f3fSDimitry Andric assert(MRI.use_nodbg_empty(DestReg) && "DestReg should have no users yet."); 2524*5f757f3fSDimitry Andric 2525*5f757f3fSDimitry Andric unsigned NewOpcode = -1; 2526*5f757f3fSDimitry Andric if (SubregSize == 256) 2527*5f757f3fSDimitry Andric NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM; 2528*5f757f3fSDimitry Andric else if (SubregSize == 128) 2529*5f757f3fSDimitry Andric NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM; 2530*5f757f3fSDimitry Andric else 2531*5f757f3fSDimitry Andric break; 2532*5f757f3fSDimitry Andric 2533*5f757f3fSDimitry Andric const MCInstrDesc &TID = get(NewOpcode); 2534*5f757f3fSDimitry Andric const TargetRegisterClass *NewRC = 2535*5f757f3fSDimitry Andric RI.getAllocatableClass(getRegClass(TID, 0, &RI, *MF)); 2536*5f757f3fSDimitry Andric MRI.setRegClass(DestReg, NewRC); 2537*5f757f3fSDimitry Andric 2538*5f757f3fSDimitry Andric UseMO->setReg(DestReg); 2539*5f757f3fSDimitry Andric UseMO->setSubReg(AMDGPU::NoSubRegister); 2540*5f757f3fSDimitry Andric 2541*5f757f3fSDimitry Andric // Use a smaller load with the desired size, possibly with updated offset. 2542*5f757f3fSDimitry Andric MachineInstr *MI = MF->CloneMachineInstr(&Orig); 2543*5f757f3fSDimitry Andric MI->setDesc(TID); 2544*5f757f3fSDimitry Andric MI->getOperand(0).setReg(DestReg); 2545*5f757f3fSDimitry Andric MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister); 2546*5f757f3fSDimitry Andric if (Offset) { 2547*5f757f3fSDimitry Andric MachineOperand *OffsetMO = getNamedOperand(*MI, AMDGPU::OpName::offset); 2548*5f757f3fSDimitry Andric int64_t FinalOffset = OffsetMO->getImm() + Offset / 8; 2549*5f757f3fSDimitry Andric OffsetMO->setImm(FinalOffset); 2550*5f757f3fSDimitry Andric } 2551*5f757f3fSDimitry Andric SmallVector<MachineMemOperand *> NewMMOs; 2552*5f757f3fSDimitry Andric for (const MachineMemOperand *MemOp : Orig.memoperands()) 2553*5f757f3fSDimitry Andric NewMMOs.push_back(MF->getMachineMemOperand(MemOp, MemOp->getPointerInfo(), 2554*5f757f3fSDimitry Andric SubregSize / 8)); 2555*5f757f3fSDimitry Andric MI->setMemRefs(*MF, NewMMOs); 2556*5f757f3fSDimitry Andric 2557*5f757f3fSDimitry Andric MBB.insert(I, MI); 2558*5f757f3fSDimitry Andric return; 2559*5f757f3fSDimitry Andric } 2560*5f757f3fSDimitry Andric 2561*5f757f3fSDimitry Andric default: 2562*5f757f3fSDimitry Andric break; 2563*5f757f3fSDimitry Andric } 2564*5f757f3fSDimitry Andric 2565*5f757f3fSDimitry Andric TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, RI); 2566*5f757f3fSDimitry Andric } 2567*5f757f3fSDimitry Andric 25688bcb0991SDimitry Andric std::pair<MachineInstr*, MachineInstr*> 25698bcb0991SDimitry Andric SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { 25708bcb0991SDimitry Andric assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); 25718bcb0991SDimitry Andric 257281ad6265SDimitry Andric if (ST.hasMovB64() && 2573*5f757f3fSDimitry Andric AMDGPU::isLegalDPALU_DPPControl( 257481ad6265SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) { 257581ad6265SDimitry Andric MI.setDesc(get(AMDGPU::V_MOV_B64_dpp)); 2576bdd1243dSDimitry Andric return std::pair(&MI, nullptr); 257781ad6265SDimitry Andric } 257881ad6265SDimitry Andric 25798bcb0991SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 25808bcb0991SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MI); 25818bcb0991SDimitry Andric MachineFunction *MF = MBB.getParent(); 25828bcb0991SDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 25838bcb0991SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 25848bcb0991SDimitry Andric unsigned Part = 0; 25858bcb0991SDimitry Andric MachineInstr *Split[2]; 25868bcb0991SDimitry Andric 25878bcb0991SDimitry Andric for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) { 25888bcb0991SDimitry Andric auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp)); 25898bcb0991SDimitry Andric if (Dst.isPhysical()) { 25908bcb0991SDimitry Andric MovDPP.addDef(RI.getSubReg(Dst, Sub)); 25918bcb0991SDimitry Andric } else { 25928bcb0991SDimitry Andric assert(MRI.isSSA()); 25938bcb0991SDimitry Andric auto Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 25948bcb0991SDimitry Andric MovDPP.addDef(Tmp); 25958bcb0991SDimitry Andric } 25968bcb0991SDimitry Andric 25978bcb0991SDimitry Andric for (unsigned I = 1; I <= 2; ++I) { // old and src operands. 25988bcb0991SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(I); 25998bcb0991SDimitry Andric assert(!SrcOp.isFPImm()); 26008bcb0991SDimitry Andric if (SrcOp.isImm()) { 26018bcb0991SDimitry Andric APInt Imm(64, SrcOp.getImm()); 26028bcb0991SDimitry Andric Imm.ashrInPlace(Part * 32); 26038bcb0991SDimitry Andric MovDPP.addImm(Imm.getLoBits(32).getZExtValue()); 26048bcb0991SDimitry Andric } else { 26058bcb0991SDimitry Andric assert(SrcOp.isReg()); 26068bcb0991SDimitry Andric Register Src = SrcOp.getReg(); 26078bcb0991SDimitry Andric if (Src.isPhysical()) 26088bcb0991SDimitry Andric MovDPP.addReg(RI.getSubReg(Src, Sub)); 26098bcb0991SDimitry Andric else 26108bcb0991SDimitry Andric MovDPP.addReg(Src, SrcOp.isUndef() ? RegState::Undef : 0, Sub); 26118bcb0991SDimitry Andric } 26128bcb0991SDimitry Andric } 26138bcb0991SDimitry Andric 2614bdd1243dSDimitry Andric for (const MachineOperand &MO : llvm::drop_begin(MI.explicit_operands(), 3)) 2615bdd1243dSDimitry Andric MovDPP.addImm(MO.getImm()); 26168bcb0991SDimitry Andric 26178bcb0991SDimitry Andric Split[Part] = MovDPP; 26188bcb0991SDimitry Andric ++Part; 26198bcb0991SDimitry Andric } 26208bcb0991SDimitry Andric 26218bcb0991SDimitry Andric if (Dst.isVirtual()) 26228bcb0991SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst) 26238bcb0991SDimitry Andric .addReg(Split[0]->getOperand(0).getReg()) 26248bcb0991SDimitry Andric .addImm(AMDGPU::sub0) 26258bcb0991SDimitry Andric .addReg(Split[1]->getOperand(0).getReg()) 26268bcb0991SDimitry Andric .addImm(AMDGPU::sub1); 26278bcb0991SDimitry Andric 26288bcb0991SDimitry Andric MI.eraseFromParent(); 2629bdd1243dSDimitry Andric return std::pair(Split[0], Split[1]); 26308bcb0991SDimitry Andric } 26318bcb0991SDimitry Andric 2632*5f757f3fSDimitry Andric std::optional<DestSourcePair> 2633*5f757f3fSDimitry Andric SIInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 2634*5f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::WWM_COPY) 2635*5f757f3fSDimitry Andric return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 2636*5f757f3fSDimitry Andric 2637*5f757f3fSDimitry Andric return std::nullopt; 2638*5f757f3fSDimitry Andric } 2639*5f757f3fSDimitry Andric 26400b57cec5SDimitry Andric bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, 26410b57cec5SDimitry Andric MachineOperand &Src0, 26420b57cec5SDimitry Andric unsigned Src0OpName, 26430b57cec5SDimitry Andric MachineOperand &Src1, 26440b57cec5SDimitry Andric unsigned Src1OpName) const { 26450b57cec5SDimitry Andric MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName); 26460b57cec5SDimitry Andric if (!Src0Mods) 26470b57cec5SDimitry Andric return false; 26480b57cec5SDimitry Andric 26490b57cec5SDimitry Andric MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName); 26500b57cec5SDimitry Andric assert(Src1Mods && 26510b57cec5SDimitry Andric "All commutable instructions have both src0 and src1 modifiers"); 26520b57cec5SDimitry Andric 26530b57cec5SDimitry Andric int Src0ModsVal = Src0Mods->getImm(); 26540b57cec5SDimitry Andric int Src1ModsVal = Src1Mods->getImm(); 26550b57cec5SDimitry Andric 26560b57cec5SDimitry Andric Src1Mods->setImm(Src0ModsVal); 26570b57cec5SDimitry Andric Src0Mods->setImm(Src1ModsVal); 26580b57cec5SDimitry Andric return true; 26590b57cec5SDimitry Andric } 26600b57cec5SDimitry Andric 26610b57cec5SDimitry Andric static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, 26620b57cec5SDimitry Andric MachineOperand &RegOp, 26630b57cec5SDimitry Andric MachineOperand &NonRegOp) { 26648bcb0991SDimitry Andric Register Reg = RegOp.getReg(); 26650b57cec5SDimitry Andric unsigned SubReg = RegOp.getSubReg(); 26660b57cec5SDimitry Andric bool IsKill = RegOp.isKill(); 26670b57cec5SDimitry Andric bool IsDead = RegOp.isDead(); 26680b57cec5SDimitry Andric bool IsUndef = RegOp.isUndef(); 26690b57cec5SDimitry Andric bool IsDebug = RegOp.isDebug(); 26700b57cec5SDimitry Andric 26710b57cec5SDimitry Andric if (NonRegOp.isImm()) 26720b57cec5SDimitry Andric RegOp.ChangeToImmediate(NonRegOp.getImm()); 26730b57cec5SDimitry Andric else if (NonRegOp.isFI()) 26740b57cec5SDimitry Andric RegOp.ChangeToFrameIndex(NonRegOp.getIndex()); 26755ffd83dbSDimitry Andric else if (NonRegOp.isGlobal()) { 26765ffd83dbSDimitry Andric RegOp.ChangeToGA(NonRegOp.getGlobal(), NonRegOp.getOffset(), 26775ffd83dbSDimitry Andric NonRegOp.getTargetFlags()); 26785ffd83dbSDimitry Andric } else 26790b57cec5SDimitry Andric return nullptr; 26800b57cec5SDimitry Andric 26815ffd83dbSDimitry Andric // Make sure we don't reinterpret a subreg index in the target flags. 26825ffd83dbSDimitry Andric RegOp.setTargetFlags(NonRegOp.getTargetFlags()); 26835ffd83dbSDimitry Andric 26840b57cec5SDimitry Andric NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug); 26850b57cec5SDimitry Andric NonRegOp.setSubReg(SubReg); 26860b57cec5SDimitry Andric 26870b57cec5SDimitry Andric return &MI; 26880b57cec5SDimitry Andric } 26890b57cec5SDimitry Andric 26900b57cec5SDimitry Andric MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, 26910b57cec5SDimitry Andric unsigned Src0Idx, 26920b57cec5SDimitry Andric unsigned Src1Idx) const { 26930b57cec5SDimitry Andric assert(!NewMI && "this should never be used"); 26940b57cec5SDimitry Andric 26950b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 26960b57cec5SDimitry Andric int CommutedOpcode = commuteOpcode(Opc); 26970b57cec5SDimitry Andric if (CommutedOpcode == -1) 26980b57cec5SDimitry Andric return nullptr; 26990b57cec5SDimitry Andric 2700*5f757f3fSDimitry Andric if (Src0Idx > Src1Idx) 2701*5f757f3fSDimitry Andric std::swap(Src0Idx, Src1Idx); 2702*5f757f3fSDimitry Andric 27030b57cec5SDimitry Andric assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) == 27040b57cec5SDimitry Andric static_cast<int>(Src0Idx) && 27050b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) == 27060b57cec5SDimitry Andric static_cast<int>(Src1Idx) && 27070b57cec5SDimitry Andric "inconsistency with findCommutedOpIndices"); 27080b57cec5SDimitry Andric 27090b57cec5SDimitry Andric MachineOperand &Src0 = MI.getOperand(Src0Idx); 27100b57cec5SDimitry Andric MachineOperand &Src1 = MI.getOperand(Src1Idx); 27110b57cec5SDimitry Andric 27120b57cec5SDimitry Andric MachineInstr *CommutedMI = nullptr; 27130b57cec5SDimitry Andric if (Src0.isReg() && Src1.isReg()) { 27140b57cec5SDimitry Andric if (isOperandLegal(MI, Src1Idx, &Src0)) { 27150b57cec5SDimitry Andric // Be sure to copy the source modifiers to the right place. 27160b57cec5SDimitry Andric CommutedMI 27170b57cec5SDimitry Andric = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx); 27180b57cec5SDimitry Andric } 27190b57cec5SDimitry Andric 27200b57cec5SDimitry Andric } else if (Src0.isReg() && !Src1.isReg()) { 27210b57cec5SDimitry Andric // src0 should always be able to support any operand type, so no need to 27220b57cec5SDimitry Andric // check operand legality. 27230b57cec5SDimitry Andric CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1); 27240b57cec5SDimitry Andric } else if (!Src0.isReg() && Src1.isReg()) { 27250b57cec5SDimitry Andric if (isOperandLegal(MI, Src1Idx, &Src0)) 27260b57cec5SDimitry Andric CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0); 27270b57cec5SDimitry Andric } else { 27280b57cec5SDimitry Andric // FIXME: Found two non registers to commute. This does happen. 27290b57cec5SDimitry Andric return nullptr; 27300b57cec5SDimitry Andric } 27310b57cec5SDimitry Andric 27320b57cec5SDimitry Andric if (CommutedMI) { 27330b57cec5SDimitry Andric swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers, 27340b57cec5SDimitry Andric Src1, AMDGPU::OpName::src1_modifiers); 27350b57cec5SDimitry Andric 27360b57cec5SDimitry Andric CommutedMI->setDesc(get(CommutedOpcode)); 27370b57cec5SDimitry Andric } 27380b57cec5SDimitry Andric 27390b57cec5SDimitry Andric return CommutedMI; 27400b57cec5SDimitry Andric } 27410b57cec5SDimitry Andric 27420b57cec5SDimitry Andric // This needs to be implemented because the source modifiers may be inserted 27430b57cec5SDimitry Andric // between the true commutable operands, and the base 27440b57cec5SDimitry Andric // TargetInstrInfo::commuteInstruction uses it. 27458bcb0991SDimitry Andric bool SIInstrInfo::findCommutedOpIndices(const MachineInstr &MI, 27468bcb0991SDimitry Andric unsigned &SrcOpIdx0, 27470b57cec5SDimitry Andric unsigned &SrcOpIdx1) const { 27480b57cec5SDimitry Andric return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1); 27490b57cec5SDimitry Andric } 27500b57cec5SDimitry Andric 2751bdd1243dSDimitry Andric bool SIInstrInfo::findCommutedOpIndices(const MCInstrDesc &Desc, 2752bdd1243dSDimitry Andric unsigned &SrcOpIdx0, 27530b57cec5SDimitry Andric unsigned &SrcOpIdx1) const { 27540b57cec5SDimitry Andric if (!Desc.isCommutable()) 27550b57cec5SDimitry Andric return false; 27560b57cec5SDimitry Andric 27570b57cec5SDimitry Andric unsigned Opc = Desc.getOpcode(); 27580b57cec5SDimitry Andric int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 27590b57cec5SDimitry Andric if (Src0Idx == -1) 27600b57cec5SDimitry Andric return false; 27610b57cec5SDimitry Andric 27620b57cec5SDimitry Andric int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 27630b57cec5SDimitry Andric if (Src1Idx == -1) 27640b57cec5SDimitry Andric return false; 27650b57cec5SDimitry Andric 27660b57cec5SDimitry Andric return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); 27670b57cec5SDimitry Andric } 27680b57cec5SDimitry Andric 27690b57cec5SDimitry Andric bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp, 27700b57cec5SDimitry Andric int64_t BrOffset) const { 27710b57cec5SDimitry Andric // BranchRelaxation should never have to check s_setpc_b64 because its dest 27720b57cec5SDimitry Andric // block is unanalyzable. 27730b57cec5SDimitry Andric assert(BranchOp != AMDGPU::S_SETPC_B64); 27740b57cec5SDimitry Andric 27750b57cec5SDimitry Andric // Convert to dwords. 27760b57cec5SDimitry Andric BrOffset /= 4; 27770b57cec5SDimitry Andric 27780b57cec5SDimitry Andric // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is 27790b57cec5SDimitry Andric // from the next instruction. 27800b57cec5SDimitry Andric BrOffset -= 1; 27810b57cec5SDimitry Andric 27820b57cec5SDimitry Andric return isIntN(BranchOffsetBits, BrOffset); 27830b57cec5SDimitry Andric } 27840b57cec5SDimitry Andric 2785*5f757f3fSDimitry Andric MachineBasicBlock * 2786*5f757f3fSDimitry Andric SIInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 27870b57cec5SDimitry Andric return MI.getOperand(0).getMBB(); 27880b57cec5SDimitry Andric } 27890b57cec5SDimitry Andric 2790bdd1243dSDimitry Andric bool SIInstrInfo::hasDivergentBranch(const MachineBasicBlock *MBB) const { 2791bdd1243dSDimitry Andric for (const MachineInstr &MI : MBB->terminators()) { 2792bdd1243dSDimitry Andric if (MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO || 2793bdd1243dSDimitry Andric MI.getOpcode() == AMDGPU::SI_IF || MI.getOpcode() == AMDGPU::SI_ELSE || 2794bdd1243dSDimitry Andric MI.getOpcode() == AMDGPU::SI_LOOP) 2795bdd1243dSDimitry Andric return true; 2796bdd1243dSDimitry Andric } 2797bdd1243dSDimitry Andric return false; 2798bdd1243dSDimitry Andric } 2799bdd1243dSDimitry Andric 2800349cc55cSDimitry Andric void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, 28010b57cec5SDimitry Andric MachineBasicBlock &DestBB, 2802349cc55cSDimitry Andric MachineBasicBlock &RestoreBB, 2803349cc55cSDimitry Andric const DebugLoc &DL, int64_t BrOffset, 28040b57cec5SDimitry Andric RegScavenger *RS) const { 28050b57cec5SDimitry Andric assert(RS && "RegScavenger required for long branching"); 28060b57cec5SDimitry Andric assert(MBB.empty() && 28070b57cec5SDimitry Andric "new block should be inserted for expanding unconditional branch"); 28080b57cec5SDimitry Andric assert(MBB.pred_size() == 1); 2809349cc55cSDimitry Andric assert(RestoreBB.empty() && 2810349cc55cSDimitry Andric "restore block should be inserted for restoring clobbered registers"); 28110b57cec5SDimitry Andric 28120b57cec5SDimitry Andric MachineFunction *MF = MBB.getParent(); 28130b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 281406c3fb27SDimitry Andric const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 28150b57cec5SDimitry Andric 28160b57cec5SDimitry Andric // FIXME: Virtual register workaround for RegScavenger not working with empty 28170b57cec5SDimitry Andric // blocks. 28188bcb0991SDimitry Andric Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 28190b57cec5SDimitry Andric 28200b57cec5SDimitry Andric auto I = MBB.end(); 28210b57cec5SDimitry Andric 28220b57cec5SDimitry Andric // We need to compute the offset relative to the instruction immediately after 28230b57cec5SDimitry Andric // s_getpc_b64. Insert pc arithmetic code before last terminator. 28240b57cec5SDimitry Andric MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg); 28250b57cec5SDimitry Andric 2826fe6060f1SDimitry Andric auto &MCCtx = MF->getContext(); 2827fe6060f1SDimitry Andric MCSymbol *PostGetPCLabel = 2828fe6060f1SDimitry Andric MCCtx.createTempSymbol("post_getpc", /*AlwaysAddSuffix=*/true); 2829fe6060f1SDimitry Andric GetPC->setPostInstrSymbol(*MF, PostGetPCLabel); 2830fe6060f1SDimitry Andric 2831fe6060f1SDimitry Andric MCSymbol *OffsetLo = 2832fe6060f1SDimitry Andric MCCtx.createTempSymbol("offset_lo", /*AlwaysAddSuffix=*/true); 2833fe6060f1SDimitry Andric MCSymbol *OffsetHi = 2834fe6060f1SDimitry Andric MCCtx.createTempSymbol("offset_hi", /*AlwaysAddSuffix=*/true); 28350b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32)) 28360b57cec5SDimitry Andric .addReg(PCReg, RegState::Define, AMDGPU::sub0) 28370b57cec5SDimitry Andric .addReg(PCReg, 0, AMDGPU::sub0) 2838fe6060f1SDimitry Andric .addSym(OffsetLo, MO_FAR_BRANCH_OFFSET); 28390b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32)) 28400b57cec5SDimitry Andric .addReg(PCReg, RegState::Define, AMDGPU::sub1) 28410b57cec5SDimitry Andric .addReg(PCReg, 0, AMDGPU::sub1) 2842fe6060f1SDimitry Andric .addSym(OffsetHi, MO_FAR_BRANCH_OFFSET); 28430b57cec5SDimitry Andric 28440b57cec5SDimitry Andric // Insert the indirect branch after the other terminator. 28450b57cec5SDimitry Andric BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)) 28460b57cec5SDimitry Andric .addReg(PCReg); 28470b57cec5SDimitry Andric 28480b57cec5SDimitry Andric // If a spill is needed for the pc register pair, we need to insert a spill 28490b57cec5SDimitry Andric // restore block right before the destination block, and insert a short branch 28500b57cec5SDimitry Andric // into the old destination block's fallthrough predecessor. 28510b57cec5SDimitry Andric // e.g.: 28520b57cec5SDimitry Andric // 28530b57cec5SDimitry Andric // s_cbranch_scc0 skip_long_branch: 28540b57cec5SDimitry Andric // 28550b57cec5SDimitry Andric // long_branch_bb: 28560b57cec5SDimitry Andric // spill s[8:9] 28570b57cec5SDimitry Andric // s_getpc_b64 s[8:9] 28580b57cec5SDimitry Andric // s_add_u32 s8, s8, restore_bb 28590b57cec5SDimitry Andric // s_addc_u32 s9, s9, 0 28600b57cec5SDimitry Andric // s_setpc_b64 s[8:9] 28610b57cec5SDimitry Andric // 28620b57cec5SDimitry Andric // skip_long_branch: 28630b57cec5SDimitry Andric // foo; 28640b57cec5SDimitry Andric // 28650b57cec5SDimitry Andric // ..... 28660b57cec5SDimitry Andric // 28670b57cec5SDimitry Andric // dest_bb_fallthrough_predecessor: 28680b57cec5SDimitry Andric // bar; 28690b57cec5SDimitry Andric // s_branch dest_bb 28700b57cec5SDimitry Andric // 28710b57cec5SDimitry Andric // restore_bb: 28720b57cec5SDimitry Andric // restore s[8:9] 28730b57cec5SDimitry Andric // fallthrough dest_bb 28740b57cec5SDimitry Andric /// 28750b57cec5SDimitry Andric // dest_bb: 28760b57cec5SDimitry Andric // buzz; 28770b57cec5SDimitry Andric 287806c3fb27SDimitry Andric Register LongBranchReservedReg = MFI->getLongBranchReservedReg(); 287906c3fb27SDimitry Andric Register Scav; 288006c3fb27SDimitry Andric 288106c3fb27SDimitry Andric // If we've previously reserved a register for long branches 288206c3fb27SDimitry Andric // avoid running the scavenger and just use those registers 288306c3fb27SDimitry Andric if (LongBranchReservedReg) { 288406c3fb27SDimitry Andric RS->enterBasicBlock(MBB); 288506c3fb27SDimitry Andric Scav = LongBranchReservedReg; 288606c3fb27SDimitry Andric } else { 28870b57cec5SDimitry Andric RS->enterBasicBlockEnd(MBB); 288806c3fb27SDimitry Andric Scav = RS->scavengeRegisterBackwards( 2889349cc55cSDimitry Andric AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC), 2890349cc55cSDimitry Andric /* RestoreAfter */ false, 0, /* AllowSpill */ false); 289106c3fb27SDimitry Andric } 2892349cc55cSDimitry Andric if (Scav) { 2893349cc55cSDimitry Andric RS->setRegUsed(Scav); 28940b57cec5SDimitry Andric MRI.replaceRegWith(PCReg, Scav); 28950b57cec5SDimitry Andric MRI.clearVirtRegs(); 2896349cc55cSDimitry Andric } else { 2897349cc55cSDimitry Andric // As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for 2898349cc55cSDimitry Andric // SGPR spill. 2899349cc55cSDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 2900349cc55cSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 2901349cc55cSDimitry Andric TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS); 2902349cc55cSDimitry Andric MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1); 2903349cc55cSDimitry Andric MRI.clearVirtRegs(); 2904349cc55cSDimitry Andric } 29050b57cec5SDimitry Andric 2906349cc55cSDimitry Andric MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol(); 2907fe6060f1SDimitry Andric // Now, the distance could be defined. 2908fe6060f1SDimitry Andric auto *Offset = MCBinaryExpr::createSub( 2909349cc55cSDimitry Andric MCSymbolRefExpr::create(DestLabel, MCCtx), 2910fe6060f1SDimitry Andric MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx); 2911fe6060f1SDimitry Andric // Add offset assignments. 2912fe6060f1SDimitry Andric auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx); 2913fe6060f1SDimitry Andric OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); 2914fe6060f1SDimitry Andric auto *ShAmt = MCConstantExpr::create(32, MCCtx); 2915fe6060f1SDimitry Andric OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); 29160b57cec5SDimitry Andric } 29170b57cec5SDimitry Andric 29180b57cec5SDimitry Andric unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { 29190b57cec5SDimitry Andric switch (Cond) { 29200b57cec5SDimitry Andric case SIInstrInfo::SCC_TRUE: 29210b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_SCC1; 29220b57cec5SDimitry Andric case SIInstrInfo::SCC_FALSE: 29230b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_SCC0; 29240b57cec5SDimitry Andric case SIInstrInfo::VCCNZ: 29250b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_VCCNZ; 29260b57cec5SDimitry Andric case SIInstrInfo::VCCZ: 29270b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_VCCZ; 29280b57cec5SDimitry Andric case SIInstrInfo::EXECNZ: 29290b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_EXECNZ; 29300b57cec5SDimitry Andric case SIInstrInfo::EXECZ: 29310b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_EXECZ; 29320b57cec5SDimitry Andric default: 29330b57cec5SDimitry Andric llvm_unreachable("invalid branch predicate"); 29340b57cec5SDimitry Andric } 29350b57cec5SDimitry Andric } 29360b57cec5SDimitry Andric 29370b57cec5SDimitry Andric SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) { 29380b57cec5SDimitry Andric switch (Opcode) { 29390b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC0: 29400b57cec5SDimitry Andric return SCC_FALSE; 29410b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC1: 29420b57cec5SDimitry Andric return SCC_TRUE; 29430b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_VCCNZ: 29440b57cec5SDimitry Andric return VCCNZ; 29450b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_VCCZ: 29460b57cec5SDimitry Andric return VCCZ; 29470b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_EXECNZ: 29480b57cec5SDimitry Andric return EXECNZ; 29490b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_EXECZ: 29500b57cec5SDimitry Andric return EXECZ; 29510b57cec5SDimitry Andric default: 29520b57cec5SDimitry Andric return INVALID_BR; 29530b57cec5SDimitry Andric } 29540b57cec5SDimitry Andric } 29550b57cec5SDimitry Andric 29560b57cec5SDimitry Andric bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB, 29570b57cec5SDimitry Andric MachineBasicBlock::iterator I, 29580b57cec5SDimitry Andric MachineBasicBlock *&TBB, 29590b57cec5SDimitry Andric MachineBasicBlock *&FBB, 29600b57cec5SDimitry Andric SmallVectorImpl<MachineOperand> &Cond, 29610b57cec5SDimitry Andric bool AllowModify) const { 29620b57cec5SDimitry Andric if (I->getOpcode() == AMDGPU::S_BRANCH) { 29630b57cec5SDimitry Andric // Unconditional Branch 29640b57cec5SDimitry Andric TBB = I->getOperand(0).getMBB(); 29650b57cec5SDimitry Andric return false; 29660b57cec5SDimitry Andric } 29670b57cec5SDimitry Andric 29680b57cec5SDimitry Andric MachineBasicBlock *CondBB = nullptr; 29690b57cec5SDimitry Andric 29700b57cec5SDimitry Andric if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { 29710b57cec5SDimitry Andric CondBB = I->getOperand(1).getMBB(); 29720b57cec5SDimitry Andric Cond.push_back(I->getOperand(0)); 29730b57cec5SDimitry Andric } else { 29740b57cec5SDimitry Andric BranchPredicate Pred = getBranchPredicate(I->getOpcode()); 29750b57cec5SDimitry Andric if (Pred == INVALID_BR) 29760b57cec5SDimitry Andric return true; 29770b57cec5SDimitry Andric 29780b57cec5SDimitry Andric CondBB = I->getOperand(0).getMBB(); 29790b57cec5SDimitry Andric Cond.push_back(MachineOperand::CreateImm(Pred)); 29800b57cec5SDimitry Andric Cond.push_back(I->getOperand(1)); // Save the branch register. 29810b57cec5SDimitry Andric } 29820b57cec5SDimitry Andric ++I; 29830b57cec5SDimitry Andric 29840b57cec5SDimitry Andric if (I == MBB.end()) { 29850b57cec5SDimitry Andric // Conditional branch followed by fall-through. 29860b57cec5SDimitry Andric TBB = CondBB; 29870b57cec5SDimitry Andric return false; 29880b57cec5SDimitry Andric } 29890b57cec5SDimitry Andric 29900b57cec5SDimitry Andric if (I->getOpcode() == AMDGPU::S_BRANCH) { 29910b57cec5SDimitry Andric TBB = CondBB; 29920b57cec5SDimitry Andric FBB = I->getOperand(0).getMBB(); 29930b57cec5SDimitry Andric return false; 29940b57cec5SDimitry Andric } 29950b57cec5SDimitry Andric 29960b57cec5SDimitry Andric return true; 29970b57cec5SDimitry Andric } 29980b57cec5SDimitry Andric 29990b57cec5SDimitry Andric bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 30000b57cec5SDimitry Andric MachineBasicBlock *&FBB, 30010b57cec5SDimitry Andric SmallVectorImpl<MachineOperand> &Cond, 30020b57cec5SDimitry Andric bool AllowModify) const { 30030b57cec5SDimitry Andric MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 30040b57cec5SDimitry Andric auto E = MBB.end(); 30050b57cec5SDimitry Andric if (I == E) 30060b57cec5SDimitry Andric return false; 30070b57cec5SDimitry Andric 30080b57cec5SDimitry Andric // Skip over the instructions that are artificially terminators for special 30090b57cec5SDimitry Andric // exec management. 3010fe6060f1SDimitry Andric while (I != E && !I->isBranch() && !I->isReturn()) { 30110b57cec5SDimitry Andric switch (I->getOpcode()) { 30120b57cec5SDimitry Andric case AMDGPU::S_MOV_B64_term: 30130b57cec5SDimitry Andric case AMDGPU::S_XOR_B64_term: 3014e8d8bef9SDimitry Andric case AMDGPU::S_OR_B64_term: 30150b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B64_term: 3016fe6060f1SDimitry Andric case AMDGPU::S_AND_B64_term: 301706c3fb27SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B64_term: 30180b57cec5SDimitry Andric case AMDGPU::S_MOV_B32_term: 30190b57cec5SDimitry Andric case AMDGPU::S_XOR_B32_term: 30200b57cec5SDimitry Andric case AMDGPU::S_OR_B32_term: 30210b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B32_term: 3022fe6060f1SDimitry Andric case AMDGPU::S_AND_B32_term: 302306c3fb27SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B32_term: 30240b57cec5SDimitry Andric break; 30250b57cec5SDimitry Andric case AMDGPU::SI_IF: 30260b57cec5SDimitry Andric case AMDGPU::SI_ELSE: 30270b57cec5SDimitry Andric case AMDGPU::SI_KILL_I1_TERMINATOR: 30280b57cec5SDimitry Andric case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: 30290b57cec5SDimitry Andric // FIXME: It's messy that these need to be considered here at all. 30300b57cec5SDimitry Andric return true; 30310b57cec5SDimitry Andric default: 30320b57cec5SDimitry Andric llvm_unreachable("unexpected non-branch terminator inst"); 30330b57cec5SDimitry Andric } 30340b57cec5SDimitry Andric 30350b57cec5SDimitry Andric ++I; 30360b57cec5SDimitry Andric } 30370b57cec5SDimitry Andric 30380b57cec5SDimitry Andric if (I == E) 30390b57cec5SDimitry Andric return false; 30400b57cec5SDimitry Andric 30410b57cec5SDimitry Andric return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify); 30420b57cec5SDimitry Andric } 30430b57cec5SDimitry Andric 30440b57cec5SDimitry Andric unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, 30450b57cec5SDimitry Andric int *BytesRemoved) const { 30460b57cec5SDimitry Andric unsigned Count = 0; 30470b57cec5SDimitry Andric unsigned RemovedSize = 0; 3048349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB.terminators())) { 3049349cc55cSDimitry Andric // Skip over artificial terminators when removing instructions. 3050349cc55cSDimitry Andric if (MI.isBranch() || MI.isReturn()) { 3051349cc55cSDimitry Andric RemovedSize += getInstSizeInBytes(MI); 3052349cc55cSDimitry Andric MI.eraseFromParent(); 30530b57cec5SDimitry Andric ++Count; 3054349cc55cSDimitry Andric } 30550b57cec5SDimitry Andric } 30560b57cec5SDimitry Andric 30570b57cec5SDimitry Andric if (BytesRemoved) 30580b57cec5SDimitry Andric *BytesRemoved = RemovedSize; 30590b57cec5SDimitry Andric 30600b57cec5SDimitry Andric return Count; 30610b57cec5SDimitry Andric } 30620b57cec5SDimitry Andric 30630b57cec5SDimitry Andric // Copy the flags onto the implicit condition register operand. 30640b57cec5SDimitry Andric static void preserveCondRegFlags(MachineOperand &CondReg, 30650b57cec5SDimitry Andric const MachineOperand &OrigCond) { 30660b57cec5SDimitry Andric CondReg.setIsUndef(OrigCond.isUndef()); 30670b57cec5SDimitry Andric CondReg.setIsKill(OrigCond.isKill()); 30680b57cec5SDimitry Andric } 30690b57cec5SDimitry Andric 30700b57cec5SDimitry Andric unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, 30710b57cec5SDimitry Andric MachineBasicBlock *TBB, 30720b57cec5SDimitry Andric MachineBasicBlock *FBB, 30730b57cec5SDimitry Andric ArrayRef<MachineOperand> Cond, 30740b57cec5SDimitry Andric const DebugLoc &DL, 30750b57cec5SDimitry Andric int *BytesAdded) const { 30760b57cec5SDimitry Andric if (!FBB && Cond.empty()) { 30770b57cec5SDimitry Andric BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 30780b57cec5SDimitry Andric .addMBB(TBB); 30790b57cec5SDimitry Andric if (BytesAdded) 3080e8d8bef9SDimitry Andric *BytesAdded = ST.hasOffset3fBug() ? 8 : 4; 30810b57cec5SDimitry Andric return 1; 30820b57cec5SDimitry Andric } 30830b57cec5SDimitry Andric 30840b57cec5SDimitry Andric if(Cond.size() == 1 && Cond[0].isReg()) { 30850b57cec5SDimitry Andric BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) 30860b57cec5SDimitry Andric .add(Cond[0]) 30870b57cec5SDimitry Andric .addMBB(TBB); 30880b57cec5SDimitry Andric return 1; 30890b57cec5SDimitry Andric } 30900b57cec5SDimitry Andric 30910b57cec5SDimitry Andric assert(TBB && Cond[0].isImm()); 30920b57cec5SDimitry Andric 30930b57cec5SDimitry Andric unsigned Opcode 30940b57cec5SDimitry Andric = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm())); 30950b57cec5SDimitry Andric 30960b57cec5SDimitry Andric if (!FBB) { 30970b57cec5SDimitry Andric MachineInstr *CondBr = 30980b57cec5SDimitry Andric BuildMI(&MBB, DL, get(Opcode)) 30990b57cec5SDimitry Andric .addMBB(TBB); 31000b57cec5SDimitry Andric 31010b57cec5SDimitry Andric // Copy the flags onto the implicit condition register operand. 31020b57cec5SDimitry Andric preserveCondRegFlags(CondBr->getOperand(1), Cond[1]); 31035ffd83dbSDimitry Andric fixImplicitOperands(*CondBr); 31040b57cec5SDimitry Andric 31050b57cec5SDimitry Andric if (BytesAdded) 3106e8d8bef9SDimitry Andric *BytesAdded = ST.hasOffset3fBug() ? 8 : 4; 31070b57cec5SDimitry Andric return 1; 31080b57cec5SDimitry Andric } 31090b57cec5SDimitry Andric 31100b57cec5SDimitry Andric assert(TBB && FBB); 31110b57cec5SDimitry Andric 31120b57cec5SDimitry Andric MachineInstr *CondBr = 31130b57cec5SDimitry Andric BuildMI(&MBB, DL, get(Opcode)) 31140b57cec5SDimitry Andric .addMBB(TBB); 3115fe6060f1SDimitry Andric fixImplicitOperands(*CondBr); 31160b57cec5SDimitry Andric BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 31170b57cec5SDimitry Andric .addMBB(FBB); 31180b57cec5SDimitry Andric 31190b57cec5SDimitry Andric MachineOperand &CondReg = CondBr->getOperand(1); 31200b57cec5SDimitry Andric CondReg.setIsUndef(Cond[1].isUndef()); 31210b57cec5SDimitry Andric CondReg.setIsKill(Cond[1].isKill()); 31220b57cec5SDimitry Andric 31230b57cec5SDimitry Andric if (BytesAdded) 3124e8d8bef9SDimitry Andric *BytesAdded = ST.hasOffset3fBug() ? 16 : 8; 31250b57cec5SDimitry Andric 31260b57cec5SDimitry Andric return 2; 31270b57cec5SDimitry Andric } 31280b57cec5SDimitry Andric 31290b57cec5SDimitry Andric bool SIInstrInfo::reverseBranchCondition( 31300b57cec5SDimitry Andric SmallVectorImpl<MachineOperand> &Cond) const { 31310b57cec5SDimitry Andric if (Cond.size() != 2) { 31320b57cec5SDimitry Andric return true; 31330b57cec5SDimitry Andric } 31340b57cec5SDimitry Andric 31350b57cec5SDimitry Andric if (Cond[0].isImm()) { 31360b57cec5SDimitry Andric Cond[0].setImm(-Cond[0].getImm()); 31370b57cec5SDimitry Andric return false; 31380b57cec5SDimitry Andric } 31390b57cec5SDimitry Andric 31400b57cec5SDimitry Andric return true; 31410b57cec5SDimitry Andric } 31420b57cec5SDimitry Andric 31430b57cec5SDimitry Andric bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 31440b57cec5SDimitry Andric ArrayRef<MachineOperand> Cond, 31455ffd83dbSDimitry Andric Register DstReg, Register TrueReg, 31465ffd83dbSDimitry Andric Register FalseReg, int &CondCycles, 31470b57cec5SDimitry Andric int &TrueCycles, int &FalseCycles) const { 31480b57cec5SDimitry Andric switch (Cond[0].getImm()) { 31490b57cec5SDimitry Andric case VCCNZ: 31500b57cec5SDimitry Andric case VCCZ: { 31510b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 31520b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); 3153e8d8bef9SDimitry Andric if (MRI.getRegClass(FalseReg) != RC) 3154e8d8bef9SDimitry Andric return false; 31550b57cec5SDimitry Andric 315606c3fb27SDimitry Andric int NumInsts = AMDGPU::getRegBitWidth(*RC) / 32; 31570b57cec5SDimitry Andric CondCycles = TrueCycles = FalseCycles = NumInsts; // ??? 31580b57cec5SDimitry Andric 31590b57cec5SDimitry Andric // Limit to equal cost for branch vs. N v_cndmask_b32s. 31600b57cec5SDimitry Andric return RI.hasVGPRs(RC) && NumInsts <= 6; 31610b57cec5SDimitry Andric } 31620b57cec5SDimitry Andric case SCC_TRUE: 31630b57cec5SDimitry Andric case SCC_FALSE: { 31640b57cec5SDimitry Andric // FIXME: We could insert for VGPRs if we could replace the original compare 31650b57cec5SDimitry Andric // with a vector one. 31660b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 31670b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); 3168e8d8bef9SDimitry Andric if (MRI.getRegClass(FalseReg) != RC) 3169e8d8bef9SDimitry Andric return false; 31700b57cec5SDimitry Andric 317106c3fb27SDimitry Andric int NumInsts = AMDGPU::getRegBitWidth(*RC) / 32; 31720b57cec5SDimitry Andric 31730b57cec5SDimitry Andric // Multiples of 8 can do s_cselect_b64 31740b57cec5SDimitry Andric if (NumInsts % 2 == 0) 31750b57cec5SDimitry Andric NumInsts /= 2; 31760b57cec5SDimitry Andric 31770b57cec5SDimitry Andric CondCycles = TrueCycles = FalseCycles = NumInsts; // ??? 31780b57cec5SDimitry Andric return RI.isSGPRClass(RC); 31790b57cec5SDimitry Andric } 31800b57cec5SDimitry Andric default: 31810b57cec5SDimitry Andric return false; 31820b57cec5SDimitry Andric } 31830b57cec5SDimitry Andric } 31840b57cec5SDimitry Andric 31850b57cec5SDimitry Andric void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, 31860b57cec5SDimitry Andric MachineBasicBlock::iterator I, const DebugLoc &DL, 31875ffd83dbSDimitry Andric Register DstReg, ArrayRef<MachineOperand> Cond, 31885ffd83dbSDimitry Andric Register TrueReg, Register FalseReg) const { 31890b57cec5SDimitry Andric BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm()); 31900b57cec5SDimitry Andric if (Pred == VCCZ || Pred == SCC_FALSE) { 31910b57cec5SDimitry Andric Pred = static_cast<BranchPredicate>(-Pred); 31920b57cec5SDimitry Andric std::swap(TrueReg, FalseReg); 31930b57cec5SDimitry Andric } 31940b57cec5SDimitry Andric 31950b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 31960b57cec5SDimitry Andric const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); 31970b57cec5SDimitry Andric unsigned DstSize = RI.getRegSizeInBits(*DstRC); 31980b57cec5SDimitry Andric 31990b57cec5SDimitry Andric if (DstSize == 32) { 32005ffd83dbSDimitry Andric MachineInstr *Select; 32015ffd83dbSDimitry Andric if (Pred == SCC_TRUE) { 32025ffd83dbSDimitry Andric Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B32), DstReg) 32035ffd83dbSDimitry Andric .addReg(TrueReg) 32045ffd83dbSDimitry Andric .addReg(FalseReg); 32055ffd83dbSDimitry Andric } else { 32060b57cec5SDimitry Andric // Instruction's operands are backwards from what is expected. 32075ffd83dbSDimitry Andric Select = BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e32), DstReg) 32080b57cec5SDimitry Andric .addReg(FalseReg) 32090b57cec5SDimitry Andric .addReg(TrueReg); 32105ffd83dbSDimitry Andric } 32110b57cec5SDimitry Andric 32120b57cec5SDimitry Andric preserveCondRegFlags(Select->getOperand(3), Cond[1]); 32130b57cec5SDimitry Andric return; 32140b57cec5SDimitry Andric } 32150b57cec5SDimitry Andric 32160b57cec5SDimitry Andric if (DstSize == 64 && Pred == SCC_TRUE) { 32170b57cec5SDimitry Andric MachineInstr *Select = 32180b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg) 32195ffd83dbSDimitry Andric .addReg(TrueReg) 32205ffd83dbSDimitry Andric .addReg(FalseReg); 32210b57cec5SDimitry Andric 32220b57cec5SDimitry Andric preserveCondRegFlags(Select->getOperand(3), Cond[1]); 32230b57cec5SDimitry Andric return; 32240b57cec5SDimitry Andric } 32250b57cec5SDimitry Andric 32260b57cec5SDimitry Andric static const int16_t Sub0_15[] = { 32270b57cec5SDimitry Andric AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 32280b57cec5SDimitry Andric AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 32290b57cec5SDimitry Andric AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 32300b57cec5SDimitry Andric AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 32310b57cec5SDimitry Andric }; 32320b57cec5SDimitry Andric 32330b57cec5SDimitry Andric static const int16_t Sub0_15_64[] = { 32340b57cec5SDimitry Andric AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 32350b57cec5SDimitry Andric AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 32360b57cec5SDimitry Andric AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, 32370b57cec5SDimitry Andric AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, 32380b57cec5SDimitry Andric }; 32390b57cec5SDimitry Andric 32400b57cec5SDimitry Andric unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32; 32410b57cec5SDimitry Andric const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass; 32420b57cec5SDimitry Andric const int16_t *SubIndices = Sub0_15; 32430b57cec5SDimitry Andric int NElts = DstSize / 32; 32440b57cec5SDimitry Andric 32450b57cec5SDimitry Andric // 64-bit select is only available for SALU. 32460b57cec5SDimitry Andric // TODO: Split 96-bit into 64-bit and 32-bit, not 3x 32-bit. 32470b57cec5SDimitry Andric if (Pred == SCC_TRUE) { 32480b57cec5SDimitry Andric if (NElts % 2) { 32490b57cec5SDimitry Andric SelOp = AMDGPU::S_CSELECT_B32; 32500b57cec5SDimitry Andric EltRC = &AMDGPU::SGPR_32RegClass; 32510b57cec5SDimitry Andric } else { 32520b57cec5SDimitry Andric SelOp = AMDGPU::S_CSELECT_B64; 32530b57cec5SDimitry Andric EltRC = &AMDGPU::SGPR_64RegClass; 32540b57cec5SDimitry Andric SubIndices = Sub0_15_64; 32550b57cec5SDimitry Andric NElts /= 2; 32560b57cec5SDimitry Andric } 32570b57cec5SDimitry Andric } 32580b57cec5SDimitry Andric 32590b57cec5SDimitry Andric MachineInstrBuilder MIB = BuildMI( 32600b57cec5SDimitry Andric MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg); 32610b57cec5SDimitry Andric 32620b57cec5SDimitry Andric I = MIB->getIterator(); 32630b57cec5SDimitry Andric 32645ffd83dbSDimitry Andric SmallVector<Register, 8> Regs; 32650b57cec5SDimitry Andric for (int Idx = 0; Idx != NElts; ++Idx) { 32668bcb0991SDimitry Andric Register DstElt = MRI.createVirtualRegister(EltRC); 32670b57cec5SDimitry Andric Regs.push_back(DstElt); 32680b57cec5SDimitry Andric 32690b57cec5SDimitry Andric unsigned SubIdx = SubIndices[Idx]; 32700b57cec5SDimitry Andric 32715ffd83dbSDimitry Andric MachineInstr *Select; 32725ffd83dbSDimitry Andric if (SelOp == AMDGPU::V_CNDMASK_B32_e32) { 32735ffd83dbSDimitry Andric Select = 32740b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(SelOp), DstElt) 32750b57cec5SDimitry Andric .addReg(FalseReg, 0, SubIdx) 32760b57cec5SDimitry Andric .addReg(TrueReg, 0, SubIdx); 32775ffd83dbSDimitry Andric } else { 32785ffd83dbSDimitry Andric Select = 32795ffd83dbSDimitry Andric BuildMI(MBB, I, DL, get(SelOp), DstElt) 32805ffd83dbSDimitry Andric .addReg(TrueReg, 0, SubIdx) 32815ffd83dbSDimitry Andric .addReg(FalseReg, 0, SubIdx); 32825ffd83dbSDimitry Andric } 32835ffd83dbSDimitry Andric 32840b57cec5SDimitry Andric preserveCondRegFlags(Select->getOperand(3), Cond[1]); 32850b57cec5SDimitry Andric fixImplicitOperands(*Select); 32860b57cec5SDimitry Andric 32870b57cec5SDimitry Andric MIB.addReg(DstElt) 32880b57cec5SDimitry Andric .addImm(SubIdx); 32890b57cec5SDimitry Andric } 32900b57cec5SDimitry Andric } 32910b57cec5SDimitry Andric 3292349cc55cSDimitry Andric bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { 32930b57cec5SDimitry Andric switch (MI.getOpcode()) { 32940b57cec5SDimitry Andric case AMDGPU::V_MOV_B32_e32: 32950b57cec5SDimitry Andric case AMDGPU::V_MOV_B32_e64: 3296349cc55cSDimitry Andric case AMDGPU::V_MOV_B64_PSEUDO: 329781ad6265SDimitry Andric case AMDGPU::V_MOV_B64_e32: 329881ad6265SDimitry Andric case AMDGPU::V_MOV_B64_e64: 32990b57cec5SDimitry Andric case AMDGPU::S_MOV_B32: 33000b57cec5SDimitry Andric case AMDGPU::S_MOV_B64: 3301*5f757f3fSDimitry Andric case AMDGPU::S_MOV_B64_IMM_PSEUDO: 33020b57cec5SDimitry Andric case AMDGPU::COPY: 3303*5f757f3fSDimitry Andric case AMDGPU::WWM_COPY: 3304e8d8bef9SDimitry Andric case AMDGPU::V_ACCVGPR_WRITE_B32_e64: 3305e8d8bef9SDimitry Andric case AMDGPU::V_ACCVGPR_READ_B32_e64: 3306fe6060f1SDimitry Andric case AMDGPU::V_ACCVGPR_MOV_B32: 33070b57cec5SDimitry Andric return true; 33080b57cec5SDimitry Andric default: 33090b57cec5SDimitry Andric return false; 33100b57cec5SDimitry Andric } 33110b57cec5SDimitry Andric } 33120b57cec5SDimitry Andric 331381ad6265SDimitry Andric static constexpr unsigned ModifierOpNames[] = { 331481ad6265SDimitry Andric AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, 331581ad6265SDimitry Andric AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp, 3316bdd1243dSDimitry Andric AMDGPU::OpName::omod, AMDGPU::OpName::op_sel}; 33170b57cec5SDimitry Andric 331881ad6265SDimitry Andric void SIInstrInfo::removeModOperands(MachineInstr &MI) const { 33190b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 3320bdd1243dSDimitry Andric for (unsigned Name : reverse(ModifierOpNames)) { 3321bdd1243dSDimitry Andric int Idx = AMDGPU::getNamedOperandIdx(Opc, Name); 3322bdd1243dSDimitry Andric if (Idx >= 0) 3323bdd1243dSDimitry Andric MI.removeOperand(Idx); 3324bdd1243dSDimitry Andric } 33250b57cec5SDimitry Andric } 33260b57cec5SDimitry Andric 33270b57cec5SDimitry Andric bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, 33285ffd83dbSDimitry Andric Register Reg, MachineRegisterInfo *MRI) const { 33290b57cec5SDimitry Andric if (!MRI->hasOneNonDBGUse(Reg)) 33300b57cec5SDimitry Andric return false; 33310b57cec5SDimitry Andric 33320b57cec5SDimitry Andric switch (DefMI.getOpcode()) { 33330b57cec5SDimitry Andric default: 33340b57cec5SDimitry Andric return false; 3335*5f757f3fSDimitry Andric case AMDGPU::V_MOV_B64_e32: 33360b57cec5SDimitry Andric case AMDGPU::S_MOV_B64: 3337*5f757f3fSDimitry Andric case AMDGPU::V_MOV_B64_PSEUDO: 3338*5f757f3fSDimitry Andric case AMDGPU::S_MOV_B64_IMM_PSEUDO: 33390b57cec5SDimitry Andric case AMDGPU::V_MOV_B32_e32: 33400b57cec5SDimitry Andric case AMDGPU::S_MOV_B32: 3341e8d8bef9SDimitry Andric case AMDGPU::V_ACCVGPR_WRITE_B32_e64: 33420b57cec5SDimitry Andric break; 33430b57cec5SDimitry Andric } 33440b57cec5SDimitry Andric 33450b57cec5SDimitry Andric const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0); 33460b57cec5SDimitry Andric assert(ImmOp); 33470b57cec5SDimitry Andric // FIXME: We could handle FrameIndex values here. 33480b57cec5SDimitry Andric if (!ImmOp->isImm()) 33490b57cec5SDimitry Andric return false; 33500b57cec5SDimitry Andric 3351*5f757f3fSDimitry Andric auto getImmFor = [ImmOp](const MachineOperand &UseOp) -> int64_t { 3352*5f757f3fSDimitry Andric int64_t Imm = ImmOp->getImm(); 3353*5f757f3fSDimitry Andric switch (UseOp.getSubReg()) { 3354*5f757f3fSDimitry Andric default: 3355*5f757f3fSDimitry Andric return Imm; 3356*5f757f3fSDimitry Andric case AMDGPU::sub0: 3357*5f757f3fSDimitry Andric return Lo_32(Imm); 3358*5f757f3fSDimitry Andric case AMDGPU::sub1: 3359*5f757f3fSDimitry Andric return Hi_32(Imm); 3360*5f757f3fSDimitry Andric case AMDGPU::lo16: 3361*5f757f3fSDimitry Andric return APInt(16, Imm).getSExtValue(); 3362*5f757f3fSDimitry Andric case AMDGPU::hi16: 3363*5f757f3fSDimitry Andric return APInt(32, Imm).ashr(16).getSExtValue(); 3364*5f757f3fSDimitry Andric case AMDGPU::sub1_lo16: 3365*5f757f3fSDimitry Andric return APInt(16, Hi_32(Imm)).getSExtValue(); 3366*5f757f3fSDimitry Andric case AMDGPU::sub1_hi16: 3367*5f757f3fSDimitry Andric return APInt(32, Hi_32(Imm)).ashr(16).getSExtValue(); 3368*5f757f3fSDimitry Andric } 3369*5f757f3fSDimitry Andric }; 3370*5f757f3fSDimitry Andric 3371*5f757f3fSDimitry Andric assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form"); 3372*5f757f3fSDimitry Andric 33730b57cec5SDimitry Andric unsigned Opc = UseMI.getOpcode(); 33740b57cec5SDimitry Andric if (Opc == AMDGPU::COPY) { 3375*5f757f3fSDimitry Andric assert(!UseMI.getOperand(0).getSubReg() && "Expected SSA form"); 33765ffd83dbSDimitry Andric 3377*5f757f3fSDimitry Andric Register DstReg = UseMI.getOperand(0).getReg(); 3378*5f757f3fSDimitry Andric unsigned OpSize = getOpSize(UseMI, 0); 3379*5f757f3fSDimitry Andric bool Is16Bit = OpSize == 2; 3380*5f757f3fSDimitry Andric bool Is64Bit = OpSize == 8; 3381*5f757f3fSDimitry Andric bool isVGPRCopy = RI.isVGPR(*MRI, DstReg); 3382*5f757f3fSDimitry Andric unsigned NewOpc = isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO 3383*5f757f3fSDimitry Andric : AMDGPU::V_MOV_B32_e32 3384*5f757f3fSDimitry Andric : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO 3385*5f757f3fSDimitry Andric : AMDGPU::S_MOV_B32; 3386*5f757f3fSDimitry Andric APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1))); 33875ffd83dbSDimitry Andric 33885ffd83dbSDimitry Andric if (RI.isAGPR(*MRI, DstReg)) { 3389*5f757f3fSDimitry Andric if (Is64Bit || !isInlineConstant(Imm)) 33900b57cec5SDimitry Andric return false; 3391e8d8bef9SDimitry Andric NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64; 33920b57cec5SDimitry Andric } 33935ffd83dbSDimitry Andric 33945ffd83dbSDimitry Andric if (Is16Bit) { 33955ffd83dbSDimitry Andric if (isVGPRCopy) 33965ffd83dbSDimitry Andric return false; // Do not clobber vgpr_hi16 33975ffd83dbSDimitry Andric 33984824e7fdSDimitry Andric if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) 33995ffd83dbSDimitry Andric return false; 34005ffd83dbSDimitry Andric 34015ffd83dbSDimitry Andric UseMI.getOperand(0).setSubReg(0); 34025ffd83dbSDimitry Andric if (DstReg.isPhysical()) { 34035ffd83dbSDimitry Andric DstReg = RI.get32BitRegister(DstReg); 34045ffd83dbSDimitry Andric UseMI.getOperand(0).setReg(DstReg); 34055ffd83dbSDimitry Andric } 34065ffd83dbSDimitry Andric assert(UseMI.getOperand(1).getReg().isVirtual()); 34075ffd83dbSDimitry Andric } 34085ffd83dbSDimitry Andric 340906c3fb27SDimitry Andric const MCInstrDesc &NewMCID = get(NewOpc); 341006c3fb27SDimitry Andric if (DstReg.isPhysical() && 341106c3fb27SDimitry Andric !RI.getRegClass(NewMCID.operands()[0].RegClass)->contains(DstReg)) 341206c3fb27SDimitry Andric return false; 341306c3fb27SDimitry Andric 341406c3fb27SDimitry Andric UseMI.setDesc(NewMCID); 34155ffd83dbSDimitry Andric UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue()); 34160b57cec5SDimitry Andric UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); 34170b57cec5SDimitry Andric return true; 34180b57cec5SDimitry Andric } 34190b57cec5SDimitry Andric 3420e8d8bef9SDimitry Andric if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || 3421e8d8bef9SDimitry Andric Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 || 3422e8d8bef9SDimitry Andric Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || 3423bdd1243dSDimitry Andric Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || 3424bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_t16_e64) { 34250b57cec5SDimitry Andric // Don't fold if we are using source or output modifiers. The new VOP2 34260b57cec5SDimitry Andric // instructions don't have them. 34270b57cec5SDimitry Andric if (hasAnyModifiersSet(UseMI)) 34280b57cec5SDimitry Andric return false; 34290b57cec5SDimitry Andric 34300b57cec5SDimitry Andric // If this is a free constant, there's no reason to do this. 34310b57cec5SDimitry Andric // TODO: We could fold this here instead of letting SIFoldOperands do it 34320b57cec5SDimitry Andric // later. 34330b57cec5SDimitry Andric MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0); 34340b57cec5SDimitry Andric 34350b57cec5SDimitry Andric // Any src operand can be used for the legality check. 34360b57cec5SDimitry Andric if (isInlineConstant(UseMI, *Src0, *ImmOp)) 34370b57cec5SDimitry Andric return false; 34380b57cec5SDimitry Andric 3439e8d8bef9SDimitry Andric bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || 3440e8d8bef9SDimitry Andric Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64; 3441bdd1243dSDimitry Andric bool IsFMA = 3442bdd1243dSDimitry Andric Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || 3443bdd1243dSDimitry Andric Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || 3444bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_t16_e64; 34450b57cec5SDimitry Andric MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); 34460b57cec5SDimitry Andric MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); 34470b57cec5SDimitry Andric 34480b57cec5SDimitry Andric // Multiplied part is the constant: Use v_madmk_{f16, f32}. 3449*5f757f3fSDimitry Andric if ((Src0->isReg() && Src0->getReg() == Reg) || 3450*5f757f3fSDimitry Andric (Src1->isReg() && Src1->getReg() == Reg)) { 3451*5f757f3fSDimitry Andric MachineOperand *RegSrc = 3452*5f757f3fSDimitry Andric Src1->isReg() && Src1->getReg() == Reg ? Src0 : Src1; 3453*5f757f3fSDimitry Andric if (!RegSrc->isReg()) 3454*5f757f3fSDimitry Andric return false; 3455*5f757f3fSDimitry Andric if (RI.isSGPRClass(MRI->getRegClass(RegSrc->getReg())) && 3456*5f757f3fSDimitry Andric ST.getConstantBusLimit(Opc) < 2) 34570b57cec5SDimitry Andric return false; 34580b57cec5SDimitry Andric 34590b57cec5SDimitry Andric if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) 34600b57cec5SDimitry Andric return false; 34610b57cec5SDimitry Andric 3462*5f757f3fSDimitry Andric // If src2 is also a literal constant then we have to choose which one to 3463*5f757f3fSDimitry Andric // fold. In general it is better to choose madak so that the other literal 3464*5f757f3fSDimitry Andric // can be materialized in an sgpr instead of a vgpr: 3465*5f757f3fSDimitry Andric // s_mov_b32 s0, literal 3466*5f757f3fSDimitry Andric // v_madak_f32 v0, s0, v0, literal 3467*5f757f3fSDimitry Andric // Instead of: 3468*5f757f3fSDimitry Andric // v_mov_b32 v1, literal 3469*5f757f3fSDimitry Andric // v_madmk_f32 v0, v0, literal, v1 3470*5f757f3fSDimitry Andric MachineInstr *Def = MRI->getUniqueVRegDef(Src2->getReg()); 3471*5f757f3fSDimitry Andric if (Def && Def->isMoveImmediate() && 3472*5f757f3fSDimitry Andric !isInlineConstant(Def->getOperand(1))) 3473*5f757f3fSDimitry Andric return false; 3474*5f757f3fSDimitry Andric 34750b57cec5SDimitry Andric unsigned NewOpc = 3476bdd1243dSDimitry Andric IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 3477bdd1243dSDimitry Andric : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16 3478bdd1243dSDimitry Andric : AMDGPU::V_FMAMK_F16) 34790b57cec5SDimitry Andric : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16); 34800b57cec5SDimitry Andric if (pseudoToMCOpcode(NewOpc) == -1) 34810b57cec5SDimitry Andric return false; 34820b57cec5SDimitry Andric 3483*5f757f3fSDimitry Andric // V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite 3484*5f757f3fSDimitry Andric // would also require restricting their register classes. For now 3485*5f757f3fSDimitry Andric // just bail out. 3486*5f757f3fSDimitry Andric if (NewOpc == AMDGPU::V_FMAMK_F16_t16) 3487*5f757f3fSDimitry Andric return false; 34880b57cec5SDimitry Andric 3489*5f757f3fSDimitry Andric const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1); 34900b57cec5SDimitry Andric 34910b57cec5SDimitry Andric // FIXME: This would be a lot easier if we could return a new instruction 34920b57cec5SDimitry Andric // instead of having to modify in place. 34930b57cec5SDimitry Andric 3494*5f757f3fSDimitry Andric Register SrcReg = RegSrc->getReg(); 3495*5f757f3fSDimitry Andric unsigned SrcSubReg = RegSrc->getSubReg(); 3496*5f757f3fSDimitry Andric Src0->setReg(SrcReg); 3497*5f757f3fSDimitry Andric Src0->setSubReg(SrcSubReg); 3498*5f757f3fSDimitry Andric Src0->setIsKill(RegSrc->isKill()); 34990b57cec5SDimitry Andric 3500bdd1243dSDimitry Andric if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || 3501bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || 35020b57cec5SDimitry Andric Opc == AMDGPU::V_FMAC_F16_e64) 35030b57cec5SDimitry Andric UseMI.untieRegOperand( 35040b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 35050b57cec5SDimitry Andric 35060b57cec5SDimitry Andric Src1->ChangeToImmediate(Imm); 35070b57cec5SDimitry Andric 35080b57cec5SDimitry Andric removeModOperands(UseMI); 35090b57cec5SDimitry Andric UseMI.setDesc(get(NewOpc)); 35100b57cec5SDimitry Andric 351181ad6265SDimitry Andric bool DeleteDef = MRI->use_nodbg_empty(Reg); 35120b57cec5SDimitry Andric if (DeleteDef) 35130b57cec5SDimitry Andric DefMI.eraseFromParent(); 35140b57cec5SDimitry Andric 35150b57cec5SDimitry Andric return true; 35160b57cec5SDimitry Andric } 35170b57cec5SDimitry Andric 35180b57cec5SDimitry Andric // Added part is the constant: Use v_madak_{f16, f32}. 35190b57cec5SDimitry Andric if (Src2->isReg() && Src2->getReg() == Reg) { 3520*5f757f3fSDimitry Andric if (ST.getConstantBusLimit(Opc) < 2) { 35210b57cec5SDimitry Andric // Not allowed to use constant bus for another operand. 35220b57cec5SDimitry Andric // We can however allow an inline immediate as src0. 35230b57cec5SDimitry Andric bool Src0Inlined = false; 35240b57cec5SDimitry Andric if (Src0->isReg()) { 35250b57cec5SDimitry Andric // Try to inline constant if possible. 35260b57cec5SDimitry Andric // If the Def moves immediate and the use is single 35270b57cec5SDimitry Andric // We are saving VGPR here. 35280b57cec5SDimitry Andric MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg()); 35290b57cec5SDimitry Andric if (Def && Def->isMoveImmediate() && 35300b57cec5SDimitry Andric isInlineConstant(Def->getOperand(1)) && 35310b57cec5SDimitry Andric MRI->hasOneUse(Src0->getReg())) { 35320b57cec5SDimitry Andric Src0->ChangeToImmediate(Def->getOperand(1).getImm()); 35330b57cec5SDimitry Andric Src0Inlined = true; 3534*5f757f3fSDimitry Andric } else if (ST.getConstantBusLimit(Opc) <= 1 && 3535*5f757f3fSDimitry Andric RI.isSGPRReg(*MRI, Src0->getReg())) { 35360b57cec5SDimitry Andric return false; 3537*5f757f3fSDimitry Andric } 35380b57cec5SDimitry Andric // VGPR is okay as Src0 - fallthrough 35390b57cec5SDimitry Andric } 35400b57cec5SDimitry Andric 35410b57cec5SDimitry Andric if (Src1->isReg() && !Src0Inlined) { 35420b57cec5SDimitry Andric // We have one slot for inlinable constant so far - try to fill it 35430b57cec5SDimitry Andric MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg()); 35440b57cec5SDimitry Andric if (Def && Def->isMoveImmediate() && 35450b57cec5SDimitry Andric isInlineConstant(Def->getOperand(1)) && 3546*5f757f3fSDimitry Andric MRI->hasOneUse(Src1->getReg()) && commuteInstruction(UseMI)) 35470b57cec5SDimitry Andric Src0->ChangeToImmediate(Def->getOperand(1).getImm()); 3548*5f757f3fSDimitry Andric else if (RI.isSGPRReg(*MRI, Src1->getReg())) 35490b57cec5SDimitry Andric return false; 35500b57cec5SDimitry Andric // VGPR is okay as Src1 - fallthrough 35510b57cec5SDimitry Andric } 3552*5f757f3fSDimitry Andric } 35530b57cec5SDimitry Andric 35540b57cec5SDimitry Andric unsigned NewOpc = 3555bdd1243dSDimitry Andric IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 3556bdd1243dSDimitry Andric : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16 3557bdd1243dSDimitry Andric : AMDGPU::V_FMAAK_F16) 35580b57cec5SDimitry Andric : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16); 35590b57cec5SDimitry Andric if (pseudoToMCOpcode(NewOpc) == -1) 35600b57cec5SDimitry Andric return false; 35610b57cec5SDimitry Andric 3562*5f757f3fSDimitry Andric // V_FMAAK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite 3563*5f757f3fSDimitry Andric // would also require restricting their register classes. For now 3564*5f757f3fSDimitry Andric // just bail out. 3565*5f757f3fSDimitry Andric if (NewOpc == AMDGPU::V_FMAAK_F16_t16) 3566*5f757f3fSDimitry Andric return false; 35670b57cec5SDimitry Andric 35680b57cec5SDimitry Andric // FIXME: This would be a lot easier if we could return a new instruction 35690b57cec5SDimitry Andric // instead of having to modify in place. 35700b57cec5SDimitry Andric 3571bdd1243dSDimitry Andric if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || 3572bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || 35730b57cec5SDimitry Andric Opc == AMDGPU::V_FMAC_F16_e64) 35740b57cec5SDimitry Andric UseMI.untieRegOperand( 35750b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 35760b57cec5SDimitry Andric 35770b57cec5SDimitry Andric // ChangingToImmediate adds Src2 back to the instruction. 3578*5f757f3fSDimitry Andric Src2->ChangeToImmediate(getImmFor(*Src2)); 35790b57cec5SDimitry Andric 35800b57cec5SDimitry Andric // These come before src2. 35810b57cec5SDimitry Andric removeModOperands(UseMI); 35820b57cec5SDimitry Andric UseMI.setDesc(get(NewOpc)); 35830b57cec5SDimitry Andric // It might happen that UseMI was commuted 35840b57cec5SDimitry Andric // and we now have SGPR as SRC1. If so 2 inlined 35850b57cec5SDimitry Andric // constant and SGPR are illegal. 35860b57cec5SDimitry Andric legalizeOperands(UseMI); 35870b57cec5SDimitry Andric 358881ad6265SDimitry Andric bool DeleteDef = MRI->use_nodbg_empty(Reg); 35890b57cec5SDimitry Andric if (DeleteDef) 35900b57cec5SDimitry Andric DefMI.eraseFromParent(); 35910b57cec5SDimitry Andric 35920b57cec5SDimitry Andric return true; 35930b57cec5SDimitry Andric } 35940b57cec5SDimitry Andric } 35950b57cec5SDimitry Andric 35960b57cec5SDimitry Andric return false; 35970b57cec5SDimitry Andric } 35980b57cec5SDimitry Andric 35995ffd83dbSDimitry Andric static bool 36005ffd83dbSDimitry Andric memOpsHaveSameBaseOperands(ArrayRef<const MachineOperand *> BaseOps1, 36015ffd83dbSDimitry Andric ArrayRef<const MachineOperand *> BaseOps2) { 36025ffd83dbSDimitry Andric if (BaseOps1.size() != BaseOps2.size()) 36035ffd83dbSDimitry Andric return false; 36045ffd83dbSDimitry Andric for (size_t I = 0, E = BaseOps1.size(); I < E; ++I) { 36055ffd83dbSDimitry Andric if (!BaseOps1[I]->isIdenticalTo(*BaseOps2[I])) 36065ffd83dbSDimitry Andric return false; 36075ffd83dbSDimitry Andric } 36085ffd83dbSDimitry Andric return true; 36095ffd83dbSDimitry Andric } 36105ffd83dbSDimitry Andric 36110b57cec5SDimitry Andric static bool offsetsDoNotOverlap(int WidthA, int OffsetA, 36120b57cec5SDimitry Andric int WidthB, int OffsetB) { 36130b57cec5SDimitry Andric int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 36140b57cec5SDimitry Andric int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 36150b57cec5SDimitry Andric int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 36160b57cec5SDimitry Andric return LowOffset + LowWidth <= HighOffset; 36170b57cec5SDimitry Andric } 36180b57cec5SDimitry Andric 36190b57cec5SDimitry Andric bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 36200b57cec5SDimitry Andric const MachineInstr &MIb) const { 36215ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps0, BaseOps1; 36220b57cec5SDimitry Andric int64_t Offset0, Offset1; 36235ffd83dbSDimitry Andric unsigned Dummy0, Dummy1; 36245ffd83dbSDimitry Andric bool Offset0IsScalable, Offset1IsScalable; 36255ffd83dbSDimitry Andric if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable, 36265ffd83dbSDimitry Andric Dummy0, &RI) || 36275ffd83dbSDimitry Andric !getMemOperandsWithOffsetWidth(MIb, BaseOps1, Offset1, Offset1IsScalable, 36285ffd83dbSDimitry Andric Dummy1, &RI)) 36295ffd83dbSDimitry Andric return false; 36300b57cec5SDimitry Andric 36315ffd83dbSDimitry Andric if (!memOpsHaveSameBaseOperands(BaseOps0, BaseOps1)) 36320b57cec5SDimitry Andric return false; 36330b57cec5SDimitry Andric 36340b57cec5SDimitry Andric if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) { 36350b57cec5SDimitry Andric // FIXME: Handle ds_read2 / ds_write2. 36360b57cec5SDimitry Andric return false; 36370b57cec5SDimitry Andric } 36385ffd83dbSDimitry Andric unsigned Width0 = MIa.memoperands().front()->getSize(); 36395ffd83dbSDimitry Andric unsigned Width1 = MIb.memoperands().front()->getSize(); 36405ffd83dbSDimitry Andric return offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1); 36410b57cec5SDimitry Andric } 36420b57cec5SDimitry Andric 36430b57cec5SDimitry Andric bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 36448bcb0991SDimitry Andric const MachineInstr &MIb) const { 3645480093f4SDimitry Andric assert(MIa.mayLoadOrStore() && 36460b57cec5SDimitry Andric "MIa must load from or modify a memory location"); 3647480093f4SDimitry Andric assert(MIb.mayLoadOrStore() && 36480b57cec5SDimitry Andric "MIb must load from or modify a memory location"); 36490b57cec5SDimitry Andric 36500b57cec5SDimitry Andric if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects()) 36510b57cec5SDimitry Andric return false; 36520b57cec5SDimitry Andric 36530b57cec5SDimitry Andric // XXX - Can we relax this between address spaces? 36540b57cec5SDimitry Andric if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 36550b57cec5SDimitry Andric return false; 36560b57cec5SDimitry Andric 36570b57cec5SDimitry Andric // TODO: Should we check the address space from the MachineMemOperand? That 36580b57cec5SDimitry Andric // would allow us to distinguish objects we know don't alias based on the 36590b57cec5SDimitry Andric // underlying address space, even if it was lowered to a different one, 36600b57cec5SDimitry Andric // e.g. private accesses lowered to use MUBUF instructions on a scratch 36610b57cec5SDimitry Andric // buffer. 36620b57cec5SDimitry Andric if (isDS(MIa)) { 36630b57cec5SDimitry Andric if (isDS(MIb)) 36640b57cec5SDimitry Andric return checkInstOffsetsDoNotOverlap(MIa, MIb); 36650b57cec5SDimitry Andric 36660b57cec5SDimitry Andric return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb); 36670b57cec5SDimitry Andric } 36680b57cec5SDimitry Andric 36690b57cec5SDimitry Andric if (isMUBUF(MIa) || isMTBUF(MIa)) { 36700b57cec5SDimitry Andric if (isMUBUF(MIb) || isMTBUF(MIb)) 36710b57cec5SDimitry Andric return checkInstOffsetsDoNotOverlap(MIa, MIb); 36720b57cec5SDimitry Andric 3673*5f757f3fSDimitry Andric if (isFLAT(MIb)) 3674*5f757f3fSDimitry Andric return isFLATScratch(MIb); 3675*5f757f3fSDimitry Andric 3676*5f757f3fSDimitry Andric return !isSMRD(MIb); 36770b57cec5SDimitry Andric } 36780b57cec5SDimitry Andric 36790b57cec5SDimitry Andric if (isSMRD(MIa)) { 36800b57cec5SDimitry Andric if (isSMRD(MIb)) 36810b57cec5SDimitry Andric return checkInstOffsetsDoNotOverlap(MIa, MIb); 36820b57cec5SDimitry Andric 3683*5f757f3fSDimitry Andric if (isFLAT(MIb)) 3684*5f757f3fSDimitry Andric return isFLATScratch(MIb); 3685*5f757f3fSDimitry Andric 3686*5f757f3fSDimitry Andric return !isMUBUF(MIb) && !isMTBUF(MIb); 36870b57cec5SDimitry Andric } 36880b57cec5SDimitry Andric 36890b57cec5SDimitry Andric if (isFLAT(MIa)) { 3690*5f757f3fSDimitry Andric if (isFLAT(MIb)) { 3691*5f757f3fSDimitry Andric if ((isFLATScratch(MIa) && isFLATGlobal(MIb)) || 3692*5f757f3fSDimitry Andric (isFLATGlobal(MIa) && isFLATScratch(MIb))) 3693*5f757f3fSDimitry Andric return true; 3694*5f757f3fSDimitry Andric 36950b57cec5SDimitry Andric return checkInstOffsetsDoNotOverlap(MIa, MIb); 3696*5f757f3fSDimitry Andric } 36970b57cec5SDimitry Andric 36980b57cec5SDimitry Andric return false; 36990b57cec5SDimitry Andric } 37000b57cec5SDimitry Andric 37010b57cec5SDimitry Andric return false; 37020b57cec5SDimitry Andric } 37030b57cec5SDimitry Andric 3704349cc55cSDimitry Andric static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, 37050eae32dcSDimitry Andric int64_t &Imm, MachineInstr **DefMI = nullptr) { 3706349cc55cSDimitry Andric if (Reg.isPhysical()) 3707349cc55cSDimitry Andric return false; 3708349cc55cSDimitry Andric auto *Def = MRI.getUniqueVRegDef(Reg); 3709349cc55cSDimitry Andric if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) { 3710349cc55cSDimitry Andric Imm = Def->getOperand(1).getImm(); 37110eae32dcSDimitry Andric if (DefMI) 37120eae32dcSDimitry Andric *DefMI = Def; 3713349cc55cSDimitry Andric return true; 3714349cc55cSDimitry Andric } 3715349cc55cSDimitry Andric return false; 3716349cc55cSDimitry Andric } 3717349cc55cSDimitry Andric 37180eae32dcSDimitry Andric static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm, 37190eae32dcSDimitry Andric MachineInstr **DefMI = nullptr) { 37200b57cec5SDimitry Andric if (!MO->isReg()) 37210b57cec5SDimitry Andric return false; 37220b57cec5SDimitry Andric const MachineFunction *MF = MO->getParent()->getParent()->getParent(); 37230b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 37240eae32dcSDimitry Andric return getFoldableImm(MO->getReg(), MRI, Imm, DefMI); 37250b57cec5SDimitry Andric } 37260b57cec5SDimitry Andric 3727e8d8bef9SDimitry Andric static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, 3728e8d8bef9SDimitry Andric MachineInstr &NewMI) { 3729e8d8bef9SDimitry Andric if (LV) { 3730e8d8bef9SDimitry Andric unsigned NumOps = MI.getNumOperands(); 3731e8d8bef9SDimitry Andric for (unsigned I = 1; I < NumOps; ++I) { 3732e8d8bef9SDimitry Andric MachineOperand &Op = MI.getOperand(I); 3733e8d8bef9SDimitry Andric if (Op.isReg() && Op.isKill()) 3734e8d8bef9SDimitry Andric LV->replaceKillInstruction(Op.getReg(), MI, NewMI); 3735e8d8bef9SDimitry Andric } 3736e8d8bef9SDimitry Andric } 3737e8d8bef9SDimitry Andric } 3738e8d8bef9SDimitry Andric 3739349cc55cSDimitry Andric MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, 3740349cc55cSDimitry Andric LiveVariables *LV, 3741349cc55cSDimitry Andric LiveIntervals *LIS) const { 374204eeddc0SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 374381ad6265SDimitry Andric unsigned Opc = MI.getOpcode(); 374404eeddc0SDimitry Andric 374581ad6265SDimitry Andric // Handle MFMA. 374681ad6265SDimitry Andric int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc); 374704eeddc0SDimitry Andric if (NewMFMAOpc != -1) { 374881ad6265SDimitry Andric MachineInstrBuilder MIB = 374981ad6265SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc)); 375004eeddc0SDimitry Andric for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) 375104eeddc0SDimitry Andric MIB.add(MI.getOperand(I)); 375204eeddc0SDimitry Andric updateLiveVariables(LV, MI, *MIB); 375304eeddc0SDimitry Andric if (LIS) 375404eeddc0SDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 375504eeddc0SDimitry Andric return MIB; 375604eeddc0SDimitry Andric } 375704eeddc0SDimitry Andric 375881ad6265SDimitry Andric if (SIInstrInfo::isWMMA(MI)) { 375981ad6265SDimitry Andric unsigned NewOpc = AMDGPU::mapWMMA2AddrTo3AddrOpcode(MI.getOpcode()); 376081ad6265SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 376181ad6265SDimitry Andric .setMIFlags(MI.getFlags()); 376281ad6265SDimitry Andric for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) 376381ad6265SDimitry Andric MIB->addOperand(MI.getOperand(I)); 376481ad6265SDimitry Andric 376581ad6265SDimitry Andric updateLiveVariables(LV, MI, *MIB); 376681ad6265SDimitry Andric if (LIS) 376781ad6265SDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 376881ad6265SDimitry Andric 376981ad6265SDimitry Andric return MIB; 377081ad6265SDimitry Andric } 377181ad6265SDimitry Andric 3772bdd1243dSDimitry Andric assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 && 3773bdd1243dSDimitry Andric "V_FMAC_F16_t16_e32 is not supported and not expected to be present " 3774bdd1243dSDimitry Andric "pre-RA"); 3775bdd1243dSDimitry Andric 377681ad6265SDimitry Andric // Handle MAC/FMAC. 377781ad6265SDimitry Andric bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 || 3778bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || 3779bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_t16_e64; 378081ad6265SDimitry Andric bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || 378181ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || 378281ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 || 378381ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || 3784bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_t16_e64 || 378581ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; 378681ad6265SDimitry Andric bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; 378781ad6265SDimitry Andric bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 || 378881ad6265SDimitry Andric Opc == AMDGPU::V_MAC_LEGACY_F32_e64 || 378981ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || 379081ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_LEGACY_F32_e64; 379181ad6265SDimitry Andric bool Src0Literal = false; 379281ad6265SDimitry Andric 379381ad6265SDimitry Andric switch (Opc) { 379481ad6265SDimitry Andric default: 379581ad6265SDimitry Andric return nullptr; 379681ad6265SDimitry Andric case AMDGPU::V_MAC_F16_e64: 379781ad6265SDimitry Andric case AMDGPU::V_FMAC_F16_e64: 3798bdd1243dSDimitry Andric case AMDGPU::V_FMAC_F16_t16_e64: 379981ad6265SDimitry Andric case AMDGPU::V_MAC_F32_e64: 380081ad6265SDimitry Andric case AMDGPU::V_MAC_LEGACY_F32_e64: 380181ad6265SDimitry Andric case AMDGPU::V_FMAC_F32_e64: 380281ad6265SDimitry Andric case AMDGPU::V_FMAC_LEGACY_F32_e64: 380381ad6265SDimitry Andric case AMDGPU::V_FMAC_F64_e64: 380481ad6265SDimitry Andric break; 380581ad6265SDimitry Andric case AMDGPU::V_MAC_F16_e32: 380681ad6265SDimitry Andric case AMDGPU::V_FMAC_F16_e32: 380781ad6265SDimitry Andric case AMDGPU::V_MAC_F32_e32: 380881ad6265SDimitry Andric case AMDGPU::V_MAC_LEGACY_F32_e32: 380981ad6265SDimitry Andric case AMDGPU::V_FMAC_F32_e32: 381081ad6265SDimitry Andric case AMDGPU::V_FMAC_LEGACY_F32_e32: 381181ad6265SDimitry Andric case AMDGPU::V_FMAC_F64_e32: { 381281ad6265SDimitry Andric int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 381381ad6265SDimitry Andric AMDGPU::OpName::src0); 381481ad6265SDimitry Andric const MachineOperand *Src0 = &MI.getOperand(Src0Idx); 381581ad6265SDimitry Andric if (!Src0->isReg() && !Src0->isImm()) 381681ad6265SDimitry Andric return nullptr; 381781ad6265SDimitry Andric 381881ad6265SDimitry Andric if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0)) 381981ad6265SDimitry Andric Src0Literal = true; 382081ad6265SDimitry Andric 382181ad6265SDimitry Andric break; 382281ad6265SDimitry Andric } 382381ad6265SDimitry Andric } 382481ad6265SDimitry Andric 382581ad6265SDimitry Andric MachineInstrBuilder MIB; 38260b57cec5SDimitry Andric const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); 38270b57cec5SDimitry Andric const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); 38280b57cec5SDimitry Andric const MachineOperand *Src0Mods = 38290b57cec5SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); 38300b57cec5SDimitry Andric const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); 38310b57cec5SDimitry Andric const MachineOperand *Src1Mods = 38320b57cec5SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); 38330b57cec5SDimitry Andric const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); 383481ad6265SDimitry Andric const MachineOperand *Src2Mods = 383581ad6265SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::src2_modifiers); 38360b57cec5SDimitry Andric const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); 38370b57cec5SDimitry Andric const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); 3838bdd1243dSDimitry Andric const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel); 38390b57cec5SDimitry Andric 384081ad6265SDimitry Andric if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 && 384181ad6265SDimitry Andric !IsLegacy && 38420b57cec5SDimitry Andric // If we have an SGPR input, we will violate the constant bus restriction. 3843e8d8bef9SDimitry Andric (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || 3844349cc55cSDimitry Andric !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { 38450eae32dcSDimitry Andric MachineInstr *DefMI; 3846753f127fSDimitry Andric const auto killDef = [&]() -> void { 38470eae32dcSDimitry Andric const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 38480eae32dcSDimitry Andric // The only user is the instruction which will be killed. 3849753f127fSDimitry Andric Register DefReg = DefMI->getOperand(0).getReg(); 3850753f127fSDimitry Andric if (!MRI.hasOneNonDBGUse(DefReg)) 38510eae32dcSDimitry Andric return; 38520eae32dcSDimitry Andric // We cannot just remove the DefMI here, calling pass will crash. 38530eae32dcSDimitry Andric DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); 38540eae32dcSDimitry Andric for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) 385581ad6265SDimitry Andric DefMI->removeOperand(I); 3856753f127fSDimitry Andric if (LV) 3857753f127fSDimitry Andric LV->getVarInfo(DefReg).AliveBlocks.clear(); 38580eae32dcSDimitry Andric }; 38590eae32dcSDimitry Andric 3860349cc55cSDimitry Andric int64_t Imm; 386181ad6265SDimitry Andric if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) { 38620b57cec5SDimitry Andric unsigned NewOpc = 3863bdd1243dSDimitry Andric IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16 3864bdd1243dSDimitry Andric : AMDGPU::V_FMAAK_F16) 3865bdd1243dSDimitry Andric : AMDGPU::V_FMAAK_F32) 38660b57cec5SDimitry Andric : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); 3867e8d8bef9SDimitry Andric if (pseudoToMCOpcode(NewOpc) != -1) { 3868349cc55cSDimitry Andric MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 38690b57cec5SDimitry Andric .add(*Dst) 38700b57cec5SDimitry Andric .add(*Src0) 38710b57cec5SDimitry Andric .add(*Src1) 38720b57cec5SDimitry Andric .addImm(Imm); 3873e8d8bef9SDimitry Andric updateLiveVariables(LV, MI, *MIB); 3874349cc55cSDimitry Andric if (LIS) 3875349cc55cSDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 38760eae32dcSDimitry Andric killDef(); 3877e8d8bef9SDimitry Andric return MIB; 38780b57cec5SDimitry Andric } 3879e8d8bef9SDimitry Andric } 3880bdd1243dSDimitry Andric unsigned NewOpc = 3881bdd1243dSDimitry Andric IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16 3882bdd1243dSDimitry Andric : AMDGPU::V_FMAMK_F16) 3883bdd1243dSDimitry Andric : AMDGPU::V_FMAMK_F32) 38840b57cec5SDimitry Andric : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); 388581ad6265SDimitry Andric if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) { 3886e8d8bef9SDimitry Andric if (pseudoToMCOpcode(NewOpc) != -1) { 3887349cc55cSDimitry Andric MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 38880b57cec5SDimitry Andric .add(*Dst) 38890b57cec5SDimitry Andric .add(*Src0) 38900b57cec5SDimitry Andric .addImm(Imm) 38910b57cec5SDimitry Andric .add(*Src2); 3892e8d8bef9SDimitry Andric updateLiveVariables(LV, MI, *MIB); 3893349cc55cSDimitry Andric if (LIS) 3894349cc55cSDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 38950eae32dcSDimitry Andric killDef(); 3896e8d8bef9SDimitry Andric return MIB; 3897e8d8bef9SDimitry Andric } 38980b57cec5SDimitry Andric } 389981ad6265SDimitry Andric if (Src0Literal || getFoldableImm(Src0, Imm, &DefMI)) { 390081ad6265SDimitry Andric if (Src0Literal) { 390181ad6265SDimitry Andric Imm = Src0->getImm(); 390281ad6265SDimitry Andric DefMI = nullptr; 390381ad6265SDimitry Andric } 39040b57cec5SDimitry Andric if (pseudoToMCOpcode(NewOpc) != -1 && 3905e8d8bef9SDimitry Andric isOperandLegal( 3906e8d8bef9SDimitry Andric MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0), 3907e8d8bef9SDimitry Andric Src1)) { 3908349cc55cSDimitry Andric MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 39090b57cec5SDimitry Andric .add(*Dst) 39100b57cec5SDimitry Andric .add(*Src1) 39110b57cec5SDimitry Andric .addImm(Imm) 39120b57cec5SDimitry Andric .add(*Src2); 3913e8d8bef9SDimitry Andric updateLiveVariables(LV, MI, *MIB); 3914349cc55cSDimitry Andric if (LIS) 3915349cc55cSDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 391681ad6265SDimitry Andric if (DefMI) 39170eae32dcSDimitry Andric killDef(); 3918e8d8bef9SDimitry Andric return MIB; 3919e8d8bef9SDimitry Andric } 39200b57cec5SDimitry Andric } 39210b57cec5SDimitry Andric } 39220b57cec5SDimitry Andric 392381ad6265SDimitry Andric // VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma 3924bdd1243dSDimitry Andric // if VOP3 does not allow a literal operand. 3925bdd1243dSDimitry Andric if (Src0Literal && !ST.hasVOP3Literal()) 392681ad6265SDimitry Andric return nullptr; 392781ad6265SDimitry Andric 392881ad6265SDimitry Andric unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64 3929fe6060f1SDimitry Andric : IsF64 ? AMDGPU::V_FMA_F64_e64 393081ad6265SDimitry Andric : IsLegacy 393181ad6265SDimitry Andric ? AMDGPU::V_FMA_LEGACY_F32_e64 393281ad6265SDimitry Andric : AMDGPU::V_FMA_F32_e64 393381ad6265SDimitry Andric : IsF16 ? AMDGPU::V_MAD_F16_e64 393481ad6265SDimitry Andric : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64 393581ad6265SDimitry Andric : AMDGPU::V_MAD_F32_e64; 39360b57cec5SDimitry Andric if (pseudoToMCOpcode(NewOpc) == -1) 39370b57cec5SDimitry Andric return nullptr; 39380b57cec5SDimitry Andric 3939349cc55cSDimitry Andric MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 39400b57cec5SDimitry Andric .add(*Dst) 39410b57cec5SDimitry Andric .addImm(Src0Mods ? Src0Mods->getImm() : 0) 39420b57cec5SDimitry Andric .add(*Src0) 39430b57cec5SDimitry Andric .addImm(Src1Mods ? Src1Mods->getImm() : 0) 39440b57cec5SDimitry Andric .add(*Src1) 394581ad6265SDimitry Andric .addImm(Src2Mods ? Src2Mods->getImm() : 0) 39460b57cec5SDimitry Andric .add(*Src2) 39470b57cec5SDimitry Andric .addImm(Clamp ? Clamp->getImm() : 0) 39480b57cec5SDimitry Andric .addImm(Omod ? Omod->getImm() : 0); 3949bdd1243dSDimitry Andric if (AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel)) 3950bdd1243dSDimitry Andric MIB.addImm(OpSel ? OpSel->getImm() : 0); 3951e8d8bef9SDimitry Andric updateLiveVariables(LV, MI, *MIB); 3952349cc55cSDimitry Andric if (LIS) 3953349cc55cSDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 3954e8d8bef9SDimitry Andric return MIB; 39550b57cec5SDimitry Andric } 39560b57cec5SDimitry Andric 39570b57cec5SDimitry Andric // It's not generally safe to move VALU instructions across these since it will 39580b57cec5SDimitry Andric // start using the register as a base index rather than directly. 39590b57cec5SDimitry Andric // XXX - Why isn't hasSideEffects sufficient for these? 39600b57cec5SDimitry Andric static bool changesVGPRIndexingMode(const MachineInstr &MI) { 39610b57cec5SDimitry Andric switch (MI.getOpcode()) { 39620b57cec5SDimitry Andric case AMDGPU::S_SET_GPR_IDX_ON: 39630b57cec5SDimitry Andric case AMDGPU::S_SET_GPR_IDX_MODE: 39640b57cec5SDimitry Andric case AMDGPU::S_SET_GPR_IDX_OFF: 39650b57cec5SDimitry Andric return true; 39660b57cec5SDimitry Andric default: 39670b57cec5SDimitry Andric return false; 39680b57cec5SDimitry Andric } 39690b57cec5SDimitry Andric } 39700b57cec5SDimitry Andric 39710b57cec5SDimitry Andric bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, 39720b57cec5SDimitry Andric const MachineBasicBlock *MBB, 39730b57cec5SDimitry Andric const MachineFunction &MF) const { 39745ffd83dbSDimitry Andric // Skipping the check for SP writes in the base implementation. The reason it 39755ffd83dbSDimitry Andric // was added was apparently due to compile time concerns. 39765ffd83dbSDimitry Andric // 39775ffd83dbSDimitry Andric // TODO: Do we really want this barrier? It triggers unnecessary hazard nops 39785ffd83dbSDimitry Andric // but is probably avoidable. 39795ffd83dbSDimitry Andric 39805ffd83dbSDimitry Andric // Copied from base implementation. 39815ffd83dbSDimitry Andric // Terminators and labels can't be scheduled around. 39825ffd83dbSDimitry Andric if (MI.isTerminator() || MI.isPosition()) 39835ffd83dbSDimitry Andric return true; 39845ffd83dbSDimitry Andric 39855ffd83dbSDimitry Andric // INLINEASM_BR can jump to another block 39865ffd83dbSDimitry Andric if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) 39875ffd83dbSDimitry Andric return true; 39880b57cec5SDimitry Andric 398981ad6265SDimitry Andric if (MI.getOpcode() == AMDGPU::SCHED_BARRIER && MI.getOperand(0).getImm() == 0) 399081ad6265SDimitry Andric return true; 399181ad6265SDimitry Andric 39920b57cec5SDimitry Andric // Target-independent instructions do not have an implicit-use of EXEC, even 39930b57cec5SDimitry Andric // when they operate on VGPRs. Treating EXEC modifications as scheduling 39940b57cec5SDimitry Andric // boundaries prevents incorrect movements of such instructions. 39955ffd83dbSDimitry Andric return MI.modifiesRegister(AMDGPU::EXEC, &RI) || 39960b57cec5SDimitry Andric MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || 39970b57cec5SDimitry Andric MI.getOpcode() == AMDGPU::S_SETREG_B32 || 3998bdd1243dSDimitry Andric MI.getOpcode() == AMDGPU::S_SETPRIO || 39990b57cec5SDimitry Andric changesVGPRIndexingMode(MI); 40000b57cec5SDimitry Andric } 40010b57cec5SDimitry Andric 40020b57cec5SDimitry Andric bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const { 4003*5f757f3fSDimitry Andric return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode); 40040b57cec5SDimitry Andric } 40050b57cec5SDimitry Andric 40065ffd83dbSDimitry Andric bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) { 40075ffd83dbSDimitry Andric // Skip the full operand and register alias search modifiesRegister 40085ffd83dbSDimitry Andric // does. There's only a handful of instructions that touch this, it's only an 40095ffd83dbSDimitry Andric // implicit def, and doesn't alias any other registers. 4010bdd1243dSDimitry Andric return is_contained(MI.getDesc().implicit_defs(), AMDGPU::MODE); 40115ffd83dbSDimitry Andric } 40125ffd83dbSDimitry Andric 40130b57cec5SDimitry Andric bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const { 40140b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 40150b57cec5SDimitry Andric 40160b57cec5SDimitry Andric if (MI.mayStore() && isSMRD(MI)) 40170b57cec5SDimitry Andric return true; // scalar store or atomic 40180b57cec5SDimitry Andric 40190b57cec5SDimitry Andric // This will terminate the function when other lanes may need to continue. 40200b57cec5SDimitry Andric if (MI.isReturn()) 40210b57cec5SDimitry Andric return true; 40220b57cec5SDimitry Andric 40230b57cec5SDimitry Andric // These instructions cause shader I/O that may cause hardware lockups 40240b57cec5SDimitry Andric // when executed with an empty EXEC mask. 40250b57cec5SDimitry Andric // 40260b57cec5SDimitry Andric // Note: exp with VM = DONE = 0 is automatically skipped by hardware when 40270b57cec5SDimitry Andric // EXEC = 0, but checking for that case here seems not worth it 40280b57cec5SDimitry Andric // given the typical code patterns. 40290b57cec5SDimitry Andric if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || 4030e8d8bef9SDimitry Andric isEXP(Opcode) || 40310b57cec5SDimitry Andric Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP || 40320b57cec5SDimitry Andric Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER) 40330b57cec5SDimitry Andric return true; 40340b57cec5SDimitry Andric 40350b57cec5SDimitry Andric if (MI.isCall() || MI.isInlineAsm()) 40360b57cec5SDimitry Andric return true; // conservative assumption 40370b57cec5SDimitry Andric 40385ffd83dbSDimitry Andric // A mode change is a scalar operation that influences vector instructions. 40395ffd83dbSDimitry Andric if (modifiesModeRegister(MI)) 40405ffd83dbSDimitry Andric return true; 40415ffd83dbSDimitry Andric 40420b57cec5SDimitry Andric // These are like SALU instructions in terms of effects, so it's questionable 40430b57cec5SDimitry Andric // whether we should return true for those. 40440b57cec5SDimitry Andric // 40450b57cec5SDimitry Andric // However, executing them with EXEC = 0 causes them to operate on undefined 40460b57cec5SDimitry Andric // data, which we avoid by returning true here. 4047e8d8bef9SDimitry Andric if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || 4048*5f757f3fSDimitry Andric Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 || 4049*5f757f3fSDimitry Andric Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR || 4050*5f757f3fSDimitry Andric Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR) 40510b57cec5SDimitry Andric return true; 40520b57cec5SDimitry Andric 40530b57cec5SDimitry Andric return false; 40540b57cec5SDimitry Andric } 40550b57cec5SDimitry Andric 40560b57cec5SDimitry Andric bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI, 40570b57cec5SDimitry Andric const MachineInstr &MI) const { 40580b57cec5SDimitry Andric if (MI.isMetaInstruction()) 40590b57cec5SDimitry Andric return false; 40600b57cec5SDimitry Andric 40610b57cec5SDimitry Andric // This won't read exec if this is an SGPR->SGPR copy. 40620b57cec5SDimitry Andric if (MI.isCopyLike()) { 40630b57cec5SDimitry Andric if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg())) 40640b57cec5SDimitry Andric return true; 40650b57cec5SDimitry Andric 40660b57cec5SDimitry Andric // Make sure this isn't copying exec as a normal operand 40670b57cec5SDimitry Andric return MI.readsRegister(AMDGPU::EXEC, &RI); 40680b57cec5SDimitry Andric } 40690b57cec5SDimitry Andric 40700b57cec5SDimitry Andric // Make a conservative assumption about the callee. 40710b57cec5SDimitry Andric if (MI.isCall()) 40720b57cec5SDimitry Andric return true; 40730b57cec5SDimitry Andric 40740b57cec5SDimitry Andric // Be conservative with any unhandled generic opcodes. 40750b57cec5SDimitry Andric if (!isTargetSpecificOpcode(MI.getOpcode())) 40760b57cec5SDimitry Andric return true; 40770b57cec5SDimitry Andric 40780b57cec5SDimitry Andric return !isSALU(MI) || MI.readsRegister(AMDGPU::EXEC, &RI); 40790b57cec5SDimitry Andric } 40800b57cec5SDimitry Andric 40810b57cec5SDimitry Andric bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 40820b57cec5SDimitry Andric switch (Imm.getBitWidth()) { 40830b57cec5SDimitry Andric case 1: // This likely will be a condition code mask. 40840b57cec5SDimitry Andric return true; 40850b57cec5SDimitry Andric 40860b57cec5SDimitry Andric case 32: 40870b57cec5SDimitry Andric return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(), 40880b57cec5SDimitry Andric ST.hasInv2PiInlineImm()); 40890b57cec5SDimitry Andric case 64: 40900b57cec5SDimitry Andric return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(), 40910b57cec5SDimitry Andric ST.hasInv2PiInlineImm()); 40920b57cec5SDimitry Andric case 16: 40930b57cec5SDimitry Andric return ST.has16BitInsts() && 40940b57cec5SDimitry Andric AMDGPU::isInlinableLiteral16(Imm.getSExtValue(), 40950b57cec5SDimitry Andric ST.hasInv2PiInlineImm()); 40960b57cec5SDimitry Andric default: 40970b57cec5SDimitry Andric llvm_unreachable("invalid bitwidth"); 40980b57cec5SDimitry Andric } 40990b57cec5SDimitry Andric } 41000b57cec5SDimitry Andric 41010b57cec5SDimitry Andric bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, 41020b57cec5SDimitry Andric uint8_t OperandType) const { 4103bdd1243dSDimitry Andric assert(!MO.isReg() && "isInlineConstant called on register operand!"); 4104*5f757f3fSDimitry Andric if (!MO.isImm()) 41050b57cec5SDimitry Andric return false; 41060b57cec5SDimitry Andric 41070b57cec5SDimitry Andric // MachineOperand provides no way to tell the true operand size, since it only 41080b57cec5SDimitry Andric // records a 64-bit value. We need to know the size to determine if a 32-bit 41090b57cec5SDimitry Andric // floating point immediate bit pattern is legal for an integer immediate. It 41100b57cec5SDimitry Andric // would be for any 32-bit integer operand, but would not be for a 64-bit one. 41110b57cec5SDimitry Andric 41120b57cec5SDimitry Andric int64_t Imm = MO.getImm(); 41130b57cec5SDimitry Andric switch (OperandType) { 41140b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_INT32: 41150b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP32: 4116349cc55cSDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 41170b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT32: 41180b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP32: 4119fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2FP32: 4120fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 4121fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2INT32: 4122fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 41230b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 4124*5f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 4125*5f757f3fSDimitry Andric case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { 41260b57cec5SDimitry Andric int32_t Trunc = static_cast<int32_t>(Imm); 41270b57cec5SDimitry Andric return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); 41280b57cec5SDimitry Andric } 41290b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_INT64: 41300b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP64: 41310b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT64: 41320b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP64: 4133fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 41340b57cec5SDimitry Andric return AMDGPU::isInlinableLiteral64(MO.getImm(), 41350b57cec5SDimitry Andric ST.hasInv2PiInlineImm()); 41360b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_INT16: 41370b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT16: 41380b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 41395ffd83dbSDimitry Andric // We would expect inline immediates to not be concerned with an integer/fp 41405ffd83dbSDimitry Andric // distinction. However, in the case of 16-bit integer operations, the 41415ffd83dbSDimitry Andric // "floating point" values appear to not work. It seems read the low 16-bits 41425ffd83dbSDimitry Andric // of 32-bit immediates, which happens to always work for the integer 41435ffd83dbSDimitry Andric // values. 41445ffd83dbSDimitry Andric // 41455ffd83dbSDimitry Andric // See llvm bugzilla 46302. 41465ffd83dbSDimitry Andric // 41475ffd83dbSDimitry Andric // TODO: Theoretically we could use op-sel to use the high bits of the 41485ffd83dbSDimitry Andric // 32-bit FP values. 41495ffd83dbSDimitry Andric return AMDGPU::isInlinableIntLiteral(Imm); 41505ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2INT16: 41515ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 41525ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 4153*5f757f3fSDimitry Andric return (isInt<16>(Imm) || isUInt<16>(Imm)) && 4154*5f757f3fSDimitry Andric AMDGPU::isInlinableIntLiteral((int16_t)Imm); 41555ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP16: 4156349cc55cSDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 41575ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP16: 4158*5f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 4159*5f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2FP16: 4160*5f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 4161*5f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 41620b57cec5SDimitry Andric if (isInt<16>(Imm) || isUInt<16>(Imm)) { 41630b57cec5SDimitry Andric // A few special case instructions have 16-bit operands on subtargets 41640b57cec5SDimitry Andric // where 16-bit instructions are not legal. 41650b57cec5SDimitry Andric // TODO: Do the 32-bit immediates work? We shouldn't really need to handle 41660b57cec5SDimitry Andric // constants in these cases 41670b57cec5SDimitry Andric int16_t Trunc = static_cast<int16_t>(Imm); 41680b57cec5SDimitry Andric return ST.has16BitInsts() && 41690b57cec5SDimitry Andric AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm()); 41700b57cec5SDimitry Andric } 41710b57cec5SDimitry Andric 41720b57cec5SDimitry Andric return false; 41730b57cec5SDimitry Andric } 4174349cc55cSDimitry Andric case AMDGPU::OPERAND_KIMM32: 4175349cc55cSDimitry Andric case AMDGPU::OPERAND_KIMM16: 4176349cc55cSDimitry Andric return false; 4177*5f757f3fSDimitry Andric case AMDGPU::OPERAND_INPUT_MODS: 4178*5f757f3fSDimitry Andric case MCOI::OPERAND_IMMEDIATE: 4179*5f757f3fSDimitry Andric // Always embedded in the instruction for free. 4180*5f757f3fSDimitry Andric return true; 4181*5f757f3fSDimitry Andric case MCOI::OPERAND_UNKNOWN: 4182*5f757f3fSDimitry Andric case MCOI::OPERAND_REGISTER: 4183*5f757f3fSDimitry Andric case MCOI::OPERAND_PCREL: 4184*5f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_0: 4185*5f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_1: 4186*5f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_2: 4187*5f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_3: 4188*5f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_4: 4189*5f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_5: 4190*5f757f3fSDimitry Andric // Just ignore anything else. 4191*5f757f3fSDimitry Andric return true; 41920b57cec5SDimitry Andric default: 4193*5f757f3fSDimitry Andric llvm_unreachable("invalid operand type"); 41940b57cec5SDimitry Andric } 41950b57cec5SDimitry Andric } 41960b57cec5SDimitry Andric 41970b57cec5SDimitry Andric static bool compareMachineOp(const MachineOperand &Op0, 41980b57cec5SDimitry Andric const MachineOperand &Op1) { 41990b57cec5SDimitry Andric if (Op0.getType() != Op1.getType()) 42000b57cec5SDimitry Andric return false; 42010b57cec5SDimitry Andric 42020b57cec5SDimitry Andric switch (Op0.getType()) { 42030b57cec5SDimitry Andric case MachineOperand::MO_Register: 42040b57cec5SDimitry Andric return Op0.getReg() == Op1.getReg(); 42050b57cec5SDimitry Andric case MachineOperand::MO_Immediate: 42060b57cec5SDimitry Andric return Op0.getImm() == Op1.getImm(); 42070b57cec5SDimitry Andric default: 42080b57cec5SDimitry Andric llvm_unreachable("Didn't expect to be comparing these operand types"); 42090b57cec5SDimitry Andric } 42100b57cec5SDimitry Andric } 42110b57cec5SDimitry Andric 42120b57cec5SDimitry Andric bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 42130b57cec5SDimitry Andric const MachineOperand &MO) const { 42140b57cec5SDimitry Andric const MCInstrDesc &InstDesc = MI.getDesc(); 4215bdd1243dSDimitry Andric const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; 42160b57cec5SDimitry Andric 42170b57cec5SDimitry Andric assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); 42180b57cec5SDimitry Andric 42190b57cec5SDimitry Andric if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 42200b57cec5SDimitry Andric return true; 42210b57cec5SDimitry Andric 42220b57cec5SDimitry Andric if (OpInfo.RegClass < 0) 42230b57cec5SDimitry Andric return false; 42240b57cec5SDimitry Andric 42258bcb0991SDimitry Andric if (MO.isImm() && isInlineConstant(MO, OpInfo)) { 42268bcb0991SDimitry Andric if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() && 42278bcb0991SDimitry Andric OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(), 42288bcb0991SDimitry Andric AMDGPU::OpName::src2)) 42298bcb0991SDimitry Andric return false; 42300b57cec5SDimitry Andric return RI.opCanUseInlineConstant(OpInfo.OperandType); 42318bcb0991SDimitry Andric } 42320b57cec5SDimitry Andric 42330b57cec5SDimitry Andric if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) 42340b57cec5SDimitry Andric return false; 42350b57cec5SDimitry Andric 42360b57cec5SDimitry Andric if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) 42370b57cec5SDimitry Andric return true; 42380b57cec5SDimitry Andric 42390b57cec5SDimitry Andric return ST.hasVOP3Literal(); 42400b57cec5SDimitry Andric } 42410b57cec5SDimitry Andric 42420b57cec5SDimitry Andric bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 4243fe6060f1SDimitry Andric // GFX90A does not have V_MUL_LEGACY_F32_e32. 4244fe6060f1SDimitry Andric if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts()) 4245fe6060f1SDimitry Andric return false; 4246fe6060f1SDimitry Andric 42470b57cec5SDimitry Andric int Op32 = AMDGPU::getVOPe32(Opcode); 42480b57cec5SDimitry Andric if (Op32 == -1) 42490b57cec5SDimitry Andric return false; 42500b57cec5SDimitry Andric 42510b57cec5SDimitry Andric return pseudoToMCOpcode(Op32) != -1; 42520b57cec5SDimitry Andric } 42530b57cec5SDimitry Andric 42540b57cec5SDimitry Andric bool SIInstrInfo::hasModifiers(unsigned Opcode) const { 42550b57cec5SDimitry Andric // The src0_modifier operand is present on all instructions 42560b57cec5SDimitry Andric // that have modifiers. 42570b57cec5SDimitry Andric 4258bdd1243dSDimitry Andric return AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers); 42590b57cec5SDimitry Andric } 42600b57cec5SDimitry Andric 42610b57cec5SDimitry Andric bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, 42620b57cec5SDimitry Andric unsigned OpName) const { 42630b57cec5SDimitry Andric const MachineOperand *Mods = getNamedOperand(MI, OpName); 42640b57cec5SDimitry Andric return Mods && Mods->getImm(); 42650b57cec5SDimitry Andric } 42660b57cec5SDimitry Andric 42670b57cec5SDimitry Andric bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const { 426881ad6265SDimitry Andric return any_of(ModifierOpNames, 426981ad6265SDimitry Andric [&](unsigned Name) { return hasModifiersSet(MI, Name); }); 42700b57cec5SDimitry Andric } 42710b57cec5SDimitry Andric 42720b57cec5SDimitry Andric bool SIInstrInfo::canShrink(const MachineInstr &MI, 42730b57cec5SDimitry Andric const MachineRegisterInfo &MRI) const { 42740b57cec5SDimitry Andric const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); 42750b57cec5SDimitry Andric // Can't shrink instruction with three operands. 42760b57cec5SDimitry Andric if (Src2) { 42770b57cec5SDimitry Andric switch (MI.getOpcode()) { 42780b57cec5SDimitry Andric default: return false; 42790b57cec5SDimitry Andric 42800b57cec5SDimitry Andric case AMDGPU::V_ADDC_U32_e64: 42810b57cec5SDimitry Andric case AMDGPU::V_SUBB_U32_e64: 42820b57cec5SDimitry Andric case AMDGPU::V_SUBBREV_U32_e64: { 42830b57cec5SDimitry Andric const MachineOperand *Src1 42840b57cec5SDimitry Andric = getNamedOperand(MI, AMDGPU::OpName::src1); 42850b57cec5SDimitry Andric if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg())) 42860b57cec5SDimitry Andric return false; 42870b57cec5SDimitry Andric // Additional verification is needed for sdst/src2. 42880b57cec5SDimitry Andric return true; 42890b57cec5SDimitry Andric } 42900b57cec5SDimitry Andric case AMDGPU::V_MAC_F16_e64: 4291349cc55cSDimitry Andric case AMDGPU::V_MAC_F32_e64: 4292349cc55cSDimitry Andric case AMDGPU::V_MAC_LEGACY_F32_e64: 42930b57cec5SDimitry Andric case AMDGPU::V_FMAC_F16_e64: 4294bdd1243dSDimitry Andric case AMDGPU::V_FMAC_F16_t16_e64: 4295349cc55cSDimitry Andric case AMDGPU::V_FMAC_F32_e64: 4296fe6060f1SDimitry Andric case AMDGPU::V_FMAC_F64_e64: 4297349cc55cSDimitry Andric case AMDGPU::V_FMAC_LEGACY_F32_e64: 42980b57cec5SDimitry Andric if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) || 42990b57cec5SDimitry Andric hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers)) 43000b57cec5SDimitry Andric return false; 43010b57cec5SDimitry Andric break; 43020b57cec5SDimitry Andric 43030b57cec5SDimitry Andric case AMDGPU::V_CNDMASK_B32_e64: 43040b57cec5SDimitry Andric break; 43050b57cec5SDimitry Andric } 43060b57cec5SDimitry Andric } 43070b57cec5SDimitry Andric 43080b57cec5SDimitry Andric const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); 43090b57cec5SDimitry Andric if (Src1 && (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()) || 43100b57cec5SDimitry Andric hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))) 43110b57cec5SDimitry Andric return false; 43120b57cec5SDimitry Andric 43130b57cec5SDimitry Andric // We don't need to check src0, all input types are legal, so just make sure 43140b57cec5SDimitry Andric // src0 isn't using any modifiers. 43150b57cec5SDimitry Andric if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers)) 43160b57cec5SDimitry Andric return false; 43170b57cec5SDimitry Andric 43180b57cec5SDimitry Andric // Can it be shrunk to a valid 32 bit opcode? 43190b57cec5SDimitry Andric if (!hasVALU32BitEncoding(MI.getOpcode())) 43200b57cec5SDimitry Andric return false; 43210b57cec5SDimitry Andric 43220b57cec5SDimitry Andric // Check output modifiers 43230b57cec5SDimitry Andric return !hasModifiersSet(MI, AMDGPU::OpName::omod) && 43240b57cec5SDimitry Andric !hasModifiersSet(MI, AMDGPU::OpName::clamp); 43250b57cec5SDimitry Andric } 43260b57cec5SDimitry Andric 43270b57cec5SDimitry Andric // Set VCC operand with all flags from \p Orig, except for setting it as 43280b57cec5SDimitry Andric // implicit. 43290b57cec5SDimitry Andric static void copyFlagsToImplicitVCC(MachineInstr &MI, 43300b57cec5SDimitry Andric const MachineOperand &Orig) { 43310b57cec5SDimitry Andric 43320b57cec5SDimitry Andric for (MachineOperand &Use : MI.implicit_operands()) { 43335ffd83dbSDimitry Andric if (Use.isUse() && 43345ffd83dbSDimitry Andric (Use.getReg() == AMDGPU::VCC || Use.getReg() == AMDGPU::VCC_LO)) { 43350b57cec5SDimitry Andric Use.setIsUndef(Orig.isUndef()); 43360b57cec5SDimitry Andric Use.setIsKill(Orig.isKill()); 43370b57cec5SDimitry Andric return; 43380b57cec5SDimitry Andric } 43390b57cec5SDimitry Andric } 43400b57cec5SDimitry Andric } 43410b57cec5SDimitry Andric 43420b57cec5SDimitry Andric MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, 43430b57cec5SDimitry Andric unsigned Op32) const { 434481ad6265SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 43450b57cec5SDimitry Andric MachineInstrBuilder Inst32 = 43465ffd83dbSDimitry Andric BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32)) 43475ffd83dbSDimitry Andric .setMIFlags(MI.getFlags()); 43480b57cec5SDimitry Andric 43490b57cec5SDimitry Andric // Add the dst operand if the 32-bit encoding also has an explicit $vdst. 43500b57cec5SDimitry Andric // For VOPC instructions, this is replaced by an implicit def of vcc. 4351bdd1243dSDimitry Andric if (AMDGPU::hasNamedOperand(Op32, AMDGPU::OpName::vdst)) { 43520b57cec5SDimitry Andric // dst 43530b57cec5SDimitry Andric Inst32.add(MI.getOperand(0)); 4354bdd1243dSDimitry Andric } else if (AMDGPU::hasNamedOperand(Op32, AMDGPU::OpName::sdst)) { 435581ad6265SDimitry Andric // VOPCX instructions won't be writing to an explicit dst, so this should 435681ad6265SDimitry Andric // not fail for these instructions. 43570b57cec5SDimitry Andric assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) || 43580b57cec5SDimitry Andric (MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) && 43590b57cec5SDimitry Andric "Unexpected case"); 43600b57cec5SDimitry Andric } 43610b57cec5SDimitry Andric 43620b57cec5SDimitry Andric Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0)); 43630b57cec5SDimitry Andric 43640b57cec5SDimitry Andric const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); 43650b57cec5SDimitry Andric if (Src1) 43660b57cec5SDimitry Andric Inst32.add(*Src1); 43670b57cec5SDimitry Andric 43680b57cec5SDimitry Andric const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); 43690b57cec5SDimitry Andric 43700b57cec5SDimitry Andric if (Src2) { 43710b57cec5SDimitry Andric int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); 43720b57cec5SDimitry Andric if (Op32Src2Idx != -1) { 43730b57cec5SDimitry Andric Inst32.add(*Src2); 43740b57cec5SDimitry Andric } else { 43750b57cec5SDimitry Andric // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is 4376e8d8bef9SDimitry Andric // replaced with an implicit read of vcc or vcc_lo. The implicit read 4377e8d8bef9SDimitry Andric // of vcc was already added during the initial BuildMI, but we 4378e8d8bef9SDimitry Andric // 1) may need to change vcc to vcc_lo to preserve the original register 4379e8d8bef9SDimitry Andric // 2) have to preserve the original flags. 4380e8d8bef9SDimitry Andric fixImplicitOperands(*Inst32); 43810b57cec5SDimitry Andric copyFlagsToImplicitVCC(*Inst32, *Src2); 43820b57cec5SDimitry Andric } 43830b57cec5SDimitry Andric } 43840b57cec5SDimitry Andric 43850b57cec5SDimitry Andric return Inst32; 43860b57cec5SDimitry Andric } 43870b57cec5SDimitry Andric 43880b57cec5SDimitry Andric bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, 43890b57cec5SDimitry Andric const MachineOperand &MO, 43900b57cec5SDimitry Andric const MCOperandInfo &OpInfo) const { 43910b57cec5SDimitry Andric // Literal constants use the constant bus. 43920b57cec5SDimitry Andric if (!MO.isReg()) 4393bdd1243dSDimitry Andric return !isInlineConstant(MO, OpInfo); 43940b57cec5SDimitry Andric 43950b57cec5SDimitry Andric if (!MO.isUse()) 43960b57cec5SDimitry Andric return false; 43970b57cec5SDimitry Andric 4398e8d8bef9SDimitry Andric if (MO.getReg().isVirtual()) 43990b57cec5SDimitry Andric return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); 44000b57cec5SDimitry Andric 44010b57cec5SDimitry Andric // Null is free 440281ad6265SDimitry Andric if (MO.getReg() == AMDGPU::SGPR_NULL || MO.getReg() == AMDGPU::SGPR_NULL64) 44030b57cec5SDimitry Andric return false; 44040b57cec5SDimitry Andric 44050b57cec5SDimitry Andric // SGPRs use the constant bus 44060b57cec5SDimitry Andric if (MO.isImplicit()) { 44070b57cec5SDimitry Andric return MO.getReg() == AMDGPU::M0 || 44080b57cec5SDimitry Andric MO.getReg() == AMDGPU::VCC || 44090b57cec5SDimitry Andric MO.getReg() == AMDGPU::VCC_LO; 44100b57cec5SDimitry Andric } else { 44110b57cec5SDimitry Andric return AMDGPU::SReg_32RegClass.contains(MO.getReg()) || 44120b57cec5SDimitry Andric AMDGPU::SReg_64RegClass.contains(MO.getReg()); 44130b57cec5SDimitry Andric } 44140b57cec5SDimitry Andric } 44150b57cec5SDimitry Andric 44165ffd83dbSDimitry Andric static Register findImplicitSGPRRead(const MachineInstr &MI) { 44170b57cec5SDimitry Andric for (const MachineOperand &MO : MI.implicit_operands()) { 44180b57cec5SDimitry Andric // We only care about reads. 44190b57cec5SDimitry Andric if (MO.isDef()) 44200b57cec5SDimitry Andric continue; 44210b57cec5SDimitry Andric 44220b57cec5SDimitry Andric switch (MO.getReg()) { 44230b57cec5SDimitry Andric case AMDGPU::VCC: 44240b57cec5SDimitry Andric case AMDGPU::VCC_LO: 44250b57cec5SDimitry Andric case AMDGPU::VCC_HI: 44260b57cec5SDimitry Andric case AMDGPU::M0: 44270b57cec5SDimitry Andric case AMDGPU::FLAT_SCR: 44280b57cec5SDimitry Andric return MO.getReg(); 44290b57cec5SDimitry Andric 44300b57cec5SDimitry Andric default: 44310b57cec5SDimitry Andric break; 44320b57cec5SDimitry Andric } 44330b57cec5SDimitry Andric } 44340b57cec5SDimitry Andric 4435bdd1243dSDimitry Andric return Register(); 44360b57cec5SDimitry Andric } 44370b57cec5SDimitry Andric 44380b57cec5SDimitry Andric static bool shouldReadExec(const MachineInstr &MI) { 44390b57cec5SDimitry Andric if (SIInstrInfo::isVALU(MI)) { 44400b57cec5SDimitry Andric switch (MI.getOpcode()) { 44410b57cec5SDimitry Andric case AMDGPU::V_READLANE_B32: 4442*5f757f3fSDimitry Andric case AMDGPU::SI_RESTORE_S32_FROM_VGPR: 44430b57cec5SDimitry Andric case AMDGPU::V_WRITELANE_B32: 4444*5f757f3fSDimitry Andric case AMDGPU::SI_SPILL_S32_TO_VGPR: 44450b57cec5SDimitry Andric return false; 44460b57cec5SDimitry Andric } 44470b57cec5SDimitry Andric 44480b57cec5SDimitry Andric return true; 44490b57cec5SDimitry Andric } 44500b57cec5SDimitry Andric 44518bcb0991SDimitry Andric if (MI.isPreISelOpcode() || 44528bcb0991SDimitry Andric SIInstrInfo::isGenericOpcode(MI.getOpcode()) || 44530b57cec5SDimitry Andric SIInstrInfo::isSALU(MI) || 44540b57cec5SDimitry Andric SIInstrInfo::isSMRD(MI)) 44550b57cec5SDimitry Andric return false; 44560b57cec5SDimitry Andric 44570b57cec5SDimitry Andric return true; 44580b57cec5SDimitry Andric } 44590b57cec5SDimitry Andric 44600b57cec5SDimitry Andric static bool isSubRegOf(const SIRegisterInfo &TRI, 44610b57cec5SDimitry Andric const MachineOperand &SuperVec, 44620b57cec5SDimitry Andric const MachineOperand &SubReg) { 4463e8d8bef9SDimitry Andric if (SubReg.getReg().isPhysical()) 44640b57cec5SDimitry Andric return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg()); 44650b57cec5SDimitry Andric 44660b57cec5SDimitry Andric return SubReg.getSubReg() != AMDGPU::NoSubRegister && 44670b57cec5SDimitry Andric SubReg.getReg() == SuperVec.getReg(); 44680b57cec5SDimitry Andric } 44690b57cec5SDimitry Andric 44700b57cec5SDimitry Andric bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, 44710b57cec5SDimitry Andric StringRef &ErrInfo) const { 44720b57cec5SDimitry Andric uint16_t Opcode = MI.getOpcode(); 44730b57cec5SDimitry Andric if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) 44740b57cec5SDimitry Andric return true; 44750b57cec5SDimitry Andric 44760b57cec5SDimitry Andric const MachineFunction *MF = MI.getParent()->getParent(); 44770b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 44780b57cec5SDimitry Andric 44790b57cec5SDimitry Andric int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 44800b57cec5SDimitry Andric int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 44810b57cec5SDimitry Andric int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4482753f127fSDimitry Andric int Src3Idx = -1; 4483753f127fSDimitry Andric if (Src0Idx == -1) { 4484753f127fSDimitry Andric // VOPD V_DUAL_* instructions use different operand names. 4485753f127fSDimitry Andric Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X); 4486753f127fSDimitry Andric Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X); 4487753f127fSDimitry Andric Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y); 4488753f127fSDimitry Andric Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y); 4489753f127fSDimitry Andric } 44900b57cec5SDimitry Andric 44910b57cec5SDimitry Andric // Make sure the number of operands is correct. 44920b57cec5SDimitry Andric const MCInstrDesc &Desc = get(Opcode); 44930b57cec5SDimitry Andric if (!Desc.isVariadic() && 44940b57cec5SDimitry Andric Desc.getNumOperands() != MI.getNumExplicitOperands()) { 44950b57cec5SDimitry Andric ErrInfo = "Instruction has wrong number of operands."; 44960b57cec5SDimitry Andric return false; 44970b57cec5SDimitry Andric } 44980b57cec5SDimitry Andric 44990b57cec5SDimitry Andric if (MI.isInlineAsm()) { 45000b57cec5SDimitry Andric // Verify register classes for inlineasm constraints. 45010b57cec5SDimitry Andric for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands(); 45020b57cec5SDimitry Andric I != E; ++I) { 45030b57cec5SDimitry Andric const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI); 45040b57cec5SDimitry Andric if (!RC) 45050b57cec5SDimitry Andric continue; 45060b57cec5SDimitry Andric 45070b57cec5SDimitry Andric const MachineOperand &Op = MI.getOperand(I); 45080b57cec5SDimitry Andric if (!Op.isReg()) 45090b57cec5SDimitry Andric continue; 45100b57cec5SDimitry Andric 45118bcb0991SDimitry Andric Register Reg = Op.getReg(); 4512e8d8bef9SDimitry Andric if (!Reg.isVirtual() && !RC->contains(Reg)) { 45130b57cec5SDimitry Andric ErrInfo = "inlineasm operand has incorrect register class."; 45140b57cec5SDimitry Andric return false; 45150b57cec5SDimitry Andric } 45160b57cec5SDimitry Andric } 45170b57cec5SDimitry Andric 45180b57cec5SDimitry Andric return true; 45190b57cec5SDimitry Andric } 45200b57cec5SDimitry Andric 4521*5f757f3fSDimitry Andric if (isImage(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) { 4522*5f757f3fSDimitry Andric ErrInfo = "missing memory operand from image instruction."; 45235ffd83dbSDimitry Andric return false; 45245ffd83dbSDimitry Andric } 45255ffd83dbSDimitry Andric 45260b57cec5SDimitry Andric // Make sure the register classes are correct. 45270b57cec5SDimitry Andric for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 4528fe6060f1SDimitry Andric const MachineOperand &MO = MI.getOperand(i); 4529fe6060f1SDimitry Andric if (MO.isFPImm()) { 45300b57cec5SDimitry Andric ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " 45310b57cec5SDimitry Andric "all fp values to integers."; 45320b57cec5SDimitry Andric return false; 45330b57cec5SDimitry Andric } 45340b57cec5SDimitry Andric 4535bdd1243dSDimitry Andric int RegClass = Desc.operands()[i].RegClass; 45360b57cec5SDimitry Andric 4537bdd1243dSDimitry Andric switch (Desc.operands()[i].OperandType) { 45380b57cec5SDimitry Andric case MCOI::OPERAND_REGISTER: 45390b57cec5SDimitry Andric if (MI.getOperand(i).isImm() || MI.getOperand(i).isGlobal()) { 45400b57cec5SDimitry Andric ErrInfo = "Illegal immediate value for operand."; 45410b57cec5SDimitry Andric return false; 45420b57cec5SDimitry Andric } 45430b57cec5SDimitry Andric break; 45440b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_INT32: 45450b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP32: 4546349cc55cSDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 454781ad6265SDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2FP32: 45480b57cec5SDimitry Andric break; 45490b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT32: 45500b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP32: 45510b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT64: 45520b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP64: 45530b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT16: 45540b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP16: 45550b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 45560b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 45570b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 4558fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 4559fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP64: { 45600b57cec5SDimitry Andric if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) { 45610b57cec5SDimitry Andric ErrInfo = "Illegal immediate value for operand."; 45620b57cec5SDimitry Andric return false; 45630b57cec5SDimitry Andric } 45640b57cec5SDimitry Andric break; 45650b57cec5SDimitry Andric } 4566*5f757f3fSDimitry Andric case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 4567*5f757f3fSDimitry Andric if (!MI.getOperand(i).isImm() || !isInlineConstant(MI, i)) { 4568*5f757f3fSDimitry Andric ErrInfo = "Expected inline constant for operand."; 4569*5f757f3fSDimitry Andric return false; 4570*5f757f3fSDimitry Andric } 4571*5f757f3fSDimitry Andric break; 45720b57cec5SDimitry Andric case MCOI::OPERAND_IMMEDIATE: 45730b57cec5SDimitry Andric case AMDGPU::OPERAND_KIMM32: 45740b57cec5SDimitry Andric // Check if this operand is an immediate. 45750b57cec5SDimitry Andric // FrameIndex operands will be replaced by immediates, so they are 45760b57cec5SDimitry Andric // allowed. 45770b57cec5SDimitry Andric if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) { 45780b57cec5SDimitry Andric ErrInfo = "Expected immediate, but got non-immediate"; 45790b57cec5SDimitry Andric return false; 45800b57cec5SDimitry Andric } 4581bdd1243dSDimitry Andric [[fallthrough]]; 45820b57cec5SDimitry Andric default: 45830b57cec5SDimitry Andric continue; 45840b57cec5SDimitry Andric } 45850b57cec5SDimitry Andric 4586fe6060f1SDimitry Andric if (!MO.isReg()) 4587fe6060f1SDimitry Andric continue; 4588fe6060f1SDimitry Andric Register Reg = MO.getReg(); 4589fe6060f1SDimitry Andric if (!Reg) 45900b57cec5SDimitry Andric continue; 45910b57cec5SDimitry Andric 4592fe6060f1SDimitry Andric // FIXME: Ideally we would have separate instruction definitions with the 4593fe6060f1SDimitry Andric // aligned register constraint. 4594fe6060f1SDimitry Andric // FIXME: We do not verify inline asm operands, but custom inline asm 4595fe6060f1SDimitry Andric // verification is broken anyway 4596fe6060f1SDimitry Andric if (ST.needsAlignedVGPRs()) { 4597fe6060f1SDimitry Andric const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg); 45984824e7fdSDimitry Andric if (RI.hasVectorRegisters(RC) && MO.getSubReg()) { 4599fe6060f1SDimitry Andric const TargetRegisterClass *SubRC = 4600bdd1243dSDimitry Andric RI.getSubRegisterClass(RC, MO.getSubReg()); 4601fe6060f1SDimitry Andric RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg()); 4602fe6060f1SDimitry Andric if (RC) 4603fe6060f1SDimitry Andric RC = SubRC; 4604fe6060f1SDimitry Andric } 4605fe6060f1SDimitry Andric 4606fe6060f1SDimitry Andric // Check that this is the aligned version of the class. 4607fe6060f1SDimitry Andric if (!RC || !RI.isProperlyAlignedRC(*RC)) { 4608fe6060f1SDimitry Andric ErrInfo = "Subtarget requires even aligned vector registers"; 4609fe6060f1SDimitry Andric return false; 4610fe6060f1SDimitry Andric } 4611fe6060f1SDimitry Andric } 4612fe6060f1SDimitry Andric 46130b57cec5SDimitry Andric if (RegClass != -1) { 4614fe6060f1SDimitry Andric if (Reg.isVirtual()) 46150b57cec5SDimitry Andric continue; 46160b57cec5SDimitry Andric 46170b57cec5SDimitry Andric const TargetRegisterClass *RC = RI.getRegClass(RegClass); 46180b57cec5SDimitry Andric if (!RC->contains(Reg)) { 46190b57cec5SDimitry Andric ErrInfo = "Operand has incorrect register class."; 46200b57cec5SDimitry Andric return false; 46210b57cec5SDimitry Andric } 46220b57cec5SDimitry Andric } 46230b57cec5SDimitry Andric } 46240b57cec5SDimitry Andric 46250b57cec5SDimitry Andric // Verify SDWA 46260b57cec5SDimitry Andric if (isSDWA(MI)) { 46270b57cec5SDimitry Andric if (!ST.hasSDWA()) { 46280b57cec5SDimitry Andric ErrInfo = "SDWA is not supported on this target"; 46290b57cec5SDimitry Andric return false; 46300b57cec5SDimitry Andric } 46310b57cec5SDimitry Andric 46320b57cec5SDimitry Andric int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 46330b57cec5SDimitry Andric 463481ad6265SDimitry Andric for (int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) { 46350b57cec5SDimitry Andric if (OpIdx == -1) 46360b57cec5SDimitry Andric continue; 46370b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(OpIdx); 46380b57cec5SDimitry Andric 46390b57cec5SDimitry Andric if (!ST.hasSDWAScalar()) { 46400b57cec5SDimitry Andric // Only VGPRS on VI 46410b57cec5SDimitry Andric if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) { 46420b57cec5SDimitry Andric ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI"; 46430b57cec5SDimitry Andric return false; 46440b57cec5SDimitry Andric } 46450b57cec5SDimitry Andric } else { 46460b57cec5SDimitry Andric // No immediates on GFX9 46470b57cec5SDimitry Andric if (!MO.isReg()) { 4648e8d8bef9SDimitry Andric ErrInfo = 4649e8d8bef9SDimitry Andric "Only reg allowed as operands in SDWA instructions on GFX9+"; 46500b57cec5SDimitry Andric return false; 46510b57cec5SDimitry Andric } 46520b57cec5SDimitry Andric } 46530b57cec5SDimitry Andric } 46540b57cec5SDimitry Andric 46550b57cec5SDimitry Andric if (!ST.hasSDWAOmod()) { 46560b57cec5SDimitry Andric // No omod allowed on VI 46570b57cec5SDimitry Andric const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); 46580b57cec5SDimitry Andric if (OMod != nullptr && 46590b57cec5SDimitry Andric (!OMod->isImm() || OMod->getImm() != 0)) { 46600b57cec5SDimitry Andric ErrInfo = "OMod not allowed in SDWA instructions on VI"; 46610b57cec5SDimitry Andric return false; 46620b57cec5SDimitry Andric } 46630b57cec5SDimitry Andric } 46640b57cec5SDimitry Andric 46650b57cec5SDimitry Andric uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode); 46660b57cec5SDimitry Andric if (isVOPC(BasicOpcode)) { 46670b57cec5SDimitry Andric if (!ST.hasSDWASdst() && DstIdx != -1) { 46680b57cec5SDimitry Andric // Only vcc allowed as dst on VI for VOPC 46690b57cec5SDimitry Andric const MachineOperand &Dst = MI.getOperand(DstIdx); 46700b57cec5SDimitry Andric if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) { 46710b57cec5SDimitry Andric ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; 46720b57cec5SDimitry Andric return false; 46730b57cec5SDimitry Andric } 46740b57cec5SDimitry Andric } else if (!ST.hasSDWAOutModsVOPC()) { 46750b57cec5SDimitry Andric // No clamp allowed on GFX9 for VOPC 46760b57cec5SDimitry Andric const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); 46770b57cec5SDimitry Andric if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) { 46780b57cec5SDimitry Andric ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; 46790b57cec5SDimitry Andric return false; 46800b57cec5SDimitry Andric } 46810b57cec5SDimitry Andric 46820b57cec5SDimitry Andric // No omod allowed on GFX9 for VOPC 46830b57cec5SDimitry Andric const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); 46840b57cec5SDimitry Andric if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) { 46850b57cec5SDimitry Andric ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI"; 46860b57cec5SDimitry Andric return false; 46870b57cec5SDimitry Andric } 46880b57cec5SDimitry Andric } 46890b57cec5SDimitry Andric } 46900b57cec5SDimitry Andric 46910b57cec5SDimitry Andric const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused); 46920b57cec5SDimitry Andric if (DstUnused && DstUnused->isImm() && 46930b57cec5SDimitry Andric DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) { 46940b57cec5SDimitry Andric const MachineOperand &Dst = MI.getOperand(DstIdx); 46950b57cec5SDimitry Andric if (!Dst.isReg() || !Dst.isTied()) { 46960b57cec5SDimitry Andric ErrInfo = "Dst register should have tied register"; 46970b57cec5SDimitry Andric return false; 46980b57cec5SDimitry Andric } 46990b57cec5SDimitry Andric 47000b57cec5SDimitry Andric const MachineOperand &TiedMO = 47010b57cec5SDimitry Andric MI.getOperand(MI.findTiedOperandIdx(DstIdx)); 47020b57cec5SDimitry Andric if (!TiedMO.isReg() || !TiedMO.isImplicit() || !TiedMO.isUse()) { 47030b57cec5SDimitry Andric ErrInfo = 47040b57cec5SDimitry Andric "Dst register should be tied to implicit use of preserved register"; 47050b57cec5SDimitry Andric return false; 4706e8d8bef9SDimitry Andric } else if (TiedMO.getReg().isPhysical() && 47070b57cec5SDimitry Andric Dst.getReg() != TiedMO.getReg()) { 47080b57cec5SDimitry Andric ErrInfo = "Dst register should use same physical register as preserved"; 47090b57cec5SDimitry Andric return false; 47100b57cec5SDimitry Andric } 47110b57cec5SDimitry Andric } 47120b57cec5SDimitry Andric } 47130b57cec5SDimitry Andric 4714*5f757f3fSDimitry Andric // Verify MIMG / VIMAGE / VSAMPLE 4715*5f757f3fSDimitry Andric if (isImage(MI.getOpcode()) && !MI.mayStore()) { 47160b57cec5SDimitry Andric // Ensure that the return type used is large enough for all the options 47170b57cec5SDimitry Andric // being used TFE/LWE require an extra result register. 47180b57cec5SDimitry Andric const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask); 47190b57cec5SDimitry Andric if (DMask) { 47200b57cec5SDimitry Andric uint64_t DMaskImm = DMask->getImm(); 47210b57cec5SDimitry Andric uint32_t RegCount = 4722bdd1243dSDimitry Andric isGather4(MI.getOpcode()) ? 4 : llvm::popcount(DMaskImm); 47230b57cec5SDimitry Andric const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe); 47240b57cec5SDimitry Andric const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe); 47250b57cec5SDimitry Andric const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16); 47260b57cec5SDimitry Andric 47270b57cec5SDimitry Andric // Adjust for packed 16 bit values 47280b57cec5SDimitry Andric if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem()) 472906c3fb27SDimitry Andric RegCount = divideCeil(RegCount, 2); 47300b57cec5SDimitry Andric 47310b57cec5SDimitry Andric // Adjust if using LWE or TFE 47320b57cec5SDimitry Andric if ((LWE && LWE->getImm()) || (TFE && TFE->getImm())) 47330b57cec5SDimitry Andric RegCount += 1; 47340b57cec5SDimitry Andric 47350b57cec5SDimitry Andric const uint32_t DstIdx = 47360b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); 47370b57cec5SDimitry Andric const MachineOperand &Dst = MI.getOperand(DstIdx); 47380b57cec5SDimitry Andric if (Dst.isReg()) { 47390b57cec5SDimitry Andric const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx); 47400b57cec5SDimitry Andric uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32; 47410b57cec5SDimitry Andric if (RegCount > DstSize) { 474206c3fb27SDimitry Andric ErrInfo = "Image instruction returns too many registers for dst " 47430b57cec5SDimitry Andric "register class"; 47440b57cec5SDimitry Andric return false; 47450b57cec5SDimitry Andric } 47460b57cec5SDimitry Andric } 47470b57cec5SDimitry Andric } 47480b57cec5SDimitry Andric } 47490b57cec5SDimitry Andric 47500b57cec5SDimitry Andric // Verify VOP*. Ignore multiple sgpr operands on writelane. 475181ad6265SDimitry Andric if (isVALU(MI) && Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) { 47520b57cec5SDimitry Andric unsigned ConstantBusCount = 0; 4753fe6060f1SDimitry Andric bool UsesLiteral = false; 4754fe6060f1SDimitry Andric const MachineOperand *LiteralVal = nullptr; 47550b57cec5SDimitry Andric 475681ad6265SDimitry Andric int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 475781ad6265SDimitry Andric if (ImmIdx != -1) { 47580b57cec5SDimitry Andric ++ConstantBusCount; 475981ad6265SDimitry Andric UsesLiteral = true; 476081ad6265SDimitry Andric LiteralVal = &MI.getOperand(ImmIdx); 476181ad6265SDimitry Andric } 47620b57cec5SDimitry Andric 47635ffd83dbSDimitry Andric SmallVector<Register, 2> SGPRsUsed; 4764e8d8bef9SDimitry Andric Register SGPRUsed; 47650b57cec5SDimitry Andric 476681ad6265SDimitry Andric // Only look at the true operands. Only a real operand can use the constant 476781ad6265SDimitry Andric // bus, and we don't want to check pseudo-operands like the source modifier 476881ad6265SDimitry Andric // flags. 4769753f127fSDimitry Andric for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) { 47700b57cec5SDimitry Andric if (OpIdx == -1) 4771753f127fSDimitry Andric continue; 47720b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(OpIdx); 4773bdd1243dSDimitry Andric if (usesConstantBus(MRI, MO, MI.getDesc().operands()[OpIdx])) { 47740b57cec5SDimitry Andric if (MO.isReg()) { 47750b57cec5SDimitry Andric SGPRUsed = MO.getReg(); 4776bdd1243dSDimitry Andric if (!llvm::is_contained(SGPRsUsed, SGPRUsed)) { 47770b57cec5SDimitry Andric ++ConstantBusCount; 47780b57cec5SDimitry Andric SGPRsUsed.push_back(SGPRUsed); 47790b57cec5SDimitry Andric } 47800b57cec5SDimitry Andric } else { 4781fe6060f1SDimitry Andric if (!UsesLiteral) { 47820b57cec5SDimitry Andric ++ConstantBusCount; 4783fe6060f1SDimitry Andric UsesLiteral = true; 4784fe6060f1SDimitry Andric LiteralVal = &MO; 4785fe6060f1SDimitry Andric } else if (!MO.isIdenticalTo(*LiteralVal)) { 478681ad6265SDimitry Andric assert(isVOP2(MI) || isVOP3(MI)); 478781ad6265SDimitry Andric ErrInfo = "VOP2/VOP3 instruction uses more than one literal"; 4788fe6060f1SDimitry Andric return false; 4789fe6060f1SDimitry Andric } 47900b57cec5SDimitry Andric } 47910b57cec5SDimitry Andric } 47920b57cec5SDimitry Andric } 4793e8d8bef9SDimitry Andric 4794e8d8bef9SDimitry Andric SGPRUsed = findImplicitSGPRRead(MI); 4795bdd1243dSDimitry Andric if (SGPRUsed) { 479681ad6265SDimitry Andric // Implicit uses may safely overlap true operands 4797e8d8bef9SDimitry Andric if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) { 4798e8d8bef9SDimitry Andric return !RI.regsOverlap(SGPRUsed, SGPR); 4799e8d8bef9SDimitry Andric })) { 4800e8d8bef9SDimitry Andric ++ConstantBusCount; 4801e8d8bef9SDimitry Andric SGPRsUsed.push_back(SGPRUsed); 4802e8d8bef9SDimitry Andric } 4803e8d8bef9SDimitry Andric } 4804e8d8bef9SDimitry Andric 48050b57cec5SDimitry Andric // v_writelane_b32 is an exception from constant bus restriction: 48060b57cec5SDimitry Andric // vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const 48070b57cec5SDimitry Andric if (ConstantBusCount > ST.getConstantBusLimit(Opcode) && 48080b57cec5SDimitry Andric Opcode != AMDGPU::V_WRITELANE_B32) { 48090b57cec5SDimitry Andric ErrInfo = "VOP* instruction violates constant bus restriction"; 48100b57cec5SDimitry Andric return false; 48110b57cec5SDimitry Andric } 48120b57cec5SDimitry Andric 4813fe6060f1SDimitry Andric if (isVOP3(MI) && UsesLiteral && !ST.hasVOP3Literal()) { 48140b57cec5SDimitry Andric ErrInfo = "VOP3 instruction uses literal"; 48150b57cec5SDimitry Andric return false; 48160b57cec5SDimitry Andric } 48170b57cec5SDimitry Andric } 48180b57cec5SDimitry Andric 48198bcb0991SDimitry Andric // Special case for writelane - this can break the multiple constant bus rule, 48208bcb0991SDimitry Andric // but still can't use more than one SGPR register 48218bcb0991SDimitry Andric if (Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) { 48228bcb0991SDimitry Andric unsigned SGPRCount = 0; 4823bdd1243dSDimitry Andric Register SGPRUsed; 48248bcb0991SDimitry Andric 482581ad6265SDimitry Andric for (int OpIdx : {Src0Idx, Src1Idx}) { 48268bcb0991SDimitry Andric if (OpIdx == -1) 48278bcb0991SDimitry Andric break; 48288bcb0991SDimitry Andric 48298bcb0991SDimitry Andric const MachineOperand &MO = MI.getOperand(OpIdx); 48308bcb0991SDimitry Andric 4831bdd1243dSDimitry Andric if (usesConstantBus(MRI, MO, MI.getDesc().operands()[OpIdx])) { 48328bcb0991SDimitry Andric if (MO.isReg() && MO.getReg() != AMDGPU::M0) { 48338bcb0991SDimitry Andric if (MO.getReg() != SGPRUsed) 48348bcb0991SDimitry Andric ++SGPRCount; 48358bcb0991SDimitry Andric SGPRUsed = MO.getReg(); 48368bcb0991SDimitry Andric } 48378bcb0991SDimitry Andric } 48388bcb0991SDimitry Andric if (SGPRCount > ST.getConstantBusLimit(Opcode)) { 48398bcb0991SDimitry Andric ErrInfo = "WRITELANE instruction violates constant bus restriction"; 48408bcb0991SDimitry Andric return false; 48418bcb0991SDimitry Andric } 48428bcb0991SDimitry Andric } 48438bcb0991SDimitry Andric } 48448bcb0991SDimitry Andric 48450b57cec5SDimitry Andric // Verify misc. restrictions on specific instructions. 4846e8d8bef9SDimitry Andric if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 || 4847e8d8bef9SDimitry Andric Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) { 48480b57cec5SDimitry Andric const MachineOperand &Src0 = MI.getOperand(Src0Idx); 48490b57cec5SDimitry Andric const MachineOperand &Src1 = MI.getOperand(Src1Idx); 48500b57cec5SDimitry Andric const MachineOperand &Src2 = MI.getOperand(Src2Idx); 48510b57cec5SDimitry Andric if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 48520b57cec5SDimitry Andric if (!compareMachineOp(Src0, Src1) && 48530b57cec5SDimitry Andric !compareMachineOp(Src0, Src2)) { 48540b57cec5SDimitry Andric ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 48550b57cec5SDimitry Andric return false; 48560b57cec5SDimitry Andric } 48570b57cec5SDimitry Andric } 4858e8d8bef9SDimitry Andric if ((getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() & 4859e8d8bef9SDimitry Andric SISrcMods::ABS) || 4860e8d8bef9SDimitry Andric (getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm() & 4861e8d8bef9SDimitry Andric SISrcMods::ABS) || 4862e8d8bef9SDimitry Andric (getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() & 4863e8d8bef9SDimitry Andric SISrcMods::ABS)) { 4864e8d8bef9SDimitry Andric ErrInfo = "ABS not allowed in VOP3B instructions"; 4865e8d8bef9SDimitry Andric return false; 4866e8d8bef9SDimitry Andric } 48670b57cec5SDimitry Andric } 48680b57cec5SDimitry Andric 48690b57cec5SDimitry Andric if (isSOP2(MI) || isSOPC(MI)) { 48700b57cec5SDimitry Andric const MachineOperand &Src0 = MI.getOperand(Src0Idx); 48710b57cec5SDimitry Andric const MachineOperand &Src1 = MI.getOperand(Src1Idx); 48720b57cec5SDimitry Andric 487381ad6265SDimitry Andric if (!Src0.isReg() && !Src1.isReg() && 4874bdd1243dSDimitry Andric !isInlineConstant(Src0, Desc.operands()[Src0Idx]) && 4875bdd1243dSDimitry Andric !isInlineConstant(Src1, Desc.operands()[Src1Idx]) && 487681ad6265SDimitry Andric !Src0.isIdenticalTo(Src1)) { 48770b57cec5SDimitry Andric ErrInfo = "SOP2/SOPC instruction requires too many immediate constants"; 48780b57cec5SDimitry Andric return false; 48790b57cec5SDimitry Andric } 48800b57cec5SDimitry Andric } 48810b57cec5SDimitry Andric 48820b57cec5SDimitry Andric if (isSOPK(MI)) { 48830b57cec5SDimitry Andric auto Op = getNamedOperand(MI, AMDGPU::OpName::simm16); 48840b57cec5SDimitry Andric if (Desc.isBranch()) { 48850b57cec5SDimitry Andric if (!Op->isMBB()) { 48860b57cec5SDimitry Andric ErrInfo = "invalid branch target for SOPK instruction"; 48870b57cec5SDimitry Andric return false; 48880b57cec5SDimitry Andric } 48890b57cec5SDimitry Andric } else { 48900b57cec5SDimitry Andric uint64_t Imm = Op->getImm(); 48910b57cec5SDimitry Andric if (sopkIsZext(MI)) { 48920b57cec5SDimitry Andric if (!isUInt<16>(Imm)) { 48930b57cec5SDimitry Andric ErrInfo = "invalid immediate for SOPK instruction"; 48940b57cec5SDimitry Andric return false; 48950b57cec5SDimitry Andric } 48960b57cec5SDimitry Andric } else { 48970b57cec5SDimitry Andric if (!isInt<16>(Imm)) { 48980b57cec5SDimitry Andric ErrInfo = "invalid immediate for SOPK instruction"; 48990b57cec5SDimitry Andric return false; 49000b57cec5SDimitry Andric } 49010b57cec5SDimitry Andric } 49020b57cec5SDimitry Andric } 49030b57cec5SDimitry Andric } 49040b57cec5SDimitry Andric 49050b57cec5SDimitry Andric if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 || 49060b57cec5SDimitry Andric Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 || 49070b57cec5SDimitry Andric Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || 49080b57cec5SDimitry Andric Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) { 49090b57cec5SDimitry Andric const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || 49100b57cec5SDimitry Andric Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64; 49110b57cec5SDimitry Andric 4912bdd1243dSDimitry Andric const unsigned StaticNumOps = 4913bdd1243dSDimitry Andric Desc.getNumOperands() + Desc.implicit_uses().size(); 49140b57cec5SDimitry Andric const unsigned NumImplicitOps = IsDst ? 2 : 1; 49150b57cec5SDimitry Andric 49160b57cec5SDimitry Andric // Allow additional implicit operands. This allows a fixup done by the post 49170b57cec5SDimitry Andric // RA scheduler where the main implicit operand is killed and implicit-defs 49180b57cec5SDimitry Andric // are added for sub-registers that remain live after this instruction. 49190b57cec5SDimitry Andric if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) { 49200b57cec5SDimitry Andric ErrInfo = "missing implicit register operands"; 49210b57cec5SDimitry Andric return false; 49220b57cec5SDimitry Andric } 49230b57cec5SDimitry Andric 49240b57cec5SDimitry Andric const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); 49250b57cec5SDimitry Andric if (IsDst) { 49260b57cec5SDimitry Andric if (!Dst->isUse()) { 49270b57cec5SDimitry Andric ErrInfo = "v_movreld_b32 vdst should be a use operand"; 49280b57cec5SDimitry Andric return false; 49290b57cec5SDimitry Andric } 49300b57cec5SDimitry Andric 49310b57cec5SDimitry Andric unsigned UseOpIdx; 49320b57cec5SDimitry Andric if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) || 49330b57cec5SDimitry Andric UseOpIdx != StaticNumOps + 1) { 49340b57cec5SDimitry Andric ErrInfo = "movrel implicit operands should be tied"; 49350b57cec5SDimitry Andric return false; 49360b57cec5SDimitry Andric } 49370b57cec5SDimitry Andric } 49380b57cec5SDimitry Andric 49390b57cec5SDimitry Andric const MachineOperand &Src0 = MI.getOperand(Src0Idx); 49400b57cec5SDimitry Andric const MachineOperand &ImpUse 49410b57cec5SDimitry Andric = MI.getOperand(StaticNumOps + NumImplicitOps - 1); 49420b57cec5SDimitry Andric if (!ImpUse.isReg() || !ImpUse.isUse() || 49430b57cec5SDimitry Andric !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) { 49440b57cec5SDimitry Andric ErrInfo = "src0 should be subreg of implicit vector use"; 49450b57cec5SDimitry Andric return false; 49460b57cec5SDimitry Andric } 49470b57cec5SDimitry Andric } 49480b57cec5SDimitry Andric 49490b57cec5SDimitry Andric // Make sure we aren't losing exec uses in the td files. This mostly requires 49500b57cec5SDimitry Andric // being careful when using let Uses to try to add other use registers. 49510b57cec5SDimitry Andric if (shouldReadExec(MI)) { 49520b57cec5SDimitry Andric if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { 49530b57cec5SDimitry Andric ErrInfo = "VALU instruction does not implicitly read exec mask"; 49540b57cec5SDimitry Andric return false; 49550b57cec5SDimitry Andric } 49560b57cec5SDimitry Andric } 49570b57cec5SDimitry Andric 49580b57cec5SDimitry Andric if (isSMRD(MI)) { 495981ad6265SDimitry Andric if (MI.mayStore() && 496081ad6265SDimitry Andric ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) { 49610b57cec5SDimitry Andric // The register offset form of scalar stores may only use m0 as the 49620b57cec5SDimitry Andric // soffset register. 496381ad6265SDimitry Andric const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soffset); 49640b57cec5SDimitry Andric if (Soff && Soff->getReg() != AMDGPU::M0) { 49650b57cec5SDimitry Andric ErrInfo = "scalar stores must use m0 as offset register"; 49660b57cec5SDimitry Andric return false; 49670b57cec5SDimitry Andric } 49680b57cec5SDimitry Andric } 49690b57cec5SDimitry Andric } 49700b57cec5SDimitry Andric 4971e8d8bef9SDimitry Andric if (isFLAT(MI) && !ST.hasFlatInstOffsets()) { 49720b57cec5SDimitry Andric const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); 49730b57cec5SDimitry Andric if (Offset->getImm() != 0) { 49740b57cec5SDimitry Andric ErrInfo = "subtarget does not support offsets in flat instructions"; 49750b57cec5SDimitry Andric return false; 49760b57cec5SDimitry Andric } 49770b57cec5SDimitry Andric } 49780b57cec5SDimitry Andric 4979*5f757f3fSDimitry Andric if (isImage(MI)) { 49800b57cec5SDimitry Andric const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim); 49810b57cec5SDimitry Andric if (DimOp) { 49820b57cec5SDimitry Andric int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode, 49830b57cec5SDimitry Andric AMDGPU::OpName::vaddr0); 4984*5f757f3fSDimitry Andric int RSrcOpName = 4985*5f757f3fSDimitry Andric isMIMG(MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc; 4986*5f757f3fSDimitry Andric int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName); 49870b57cec5SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode); 49880b57cec5SDimitry Andric const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 49890b57cec5SDimitry Andric AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 49900b57cec5SDimitry Andric const AMDGPU::MIMGDimInfo *Dim = 49910b57cec5SDimitry Andric AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm()); 49920b57cec5SDimitry Andric 49930b57cec5SDimitry Andric if (!Dim) { 49940b57cec5SDimitry Andric ErrInfo = "dim is out of range"; 49950b57cec5SDimitry Andric return false; 49960b57cec5SDimitry Andric } 49970b57cec5SDimitry Andric 49985ffd83dbSDimitry Andric bool IsA16 = false; 49995ffd83dbSDimitry Andric if (ST.hasR128A16()) { 50005ffd83dbSDimitry Andric const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128); 50015ffd83dbSDimitry Andric IsA16 = R128A16->getImm() != 0; 5002bdd1243dSDimitry Andric } else if (ST.hasA16()) { 50035ffd83dbSDimitry Andric const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16); 50045ffd83dbSDimitry Andric IsA16 = A16->getImm() != 0; 50055ffd83dbSDimitry Andric } 50065ffd83dbSDimitry Andric 5007*5f757f3fSDimitry Andric bool IsNSA = RsrcIdx - VAddr0Idx > 1; 50085ffd83dbSDimitry Andric 5009fe6060f1SDimitry Andric unsigned AddrWords = 5010fe6060f1SDimitry Andric AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, ST.hasG16()); 50110b57cec5SDimitry Andric 50120b57cec5SDimitry Andric unsigned VAddrWords; 50130b57cec5SDimitry Andric if (IsNSA) { 5014*5f757f3fSDimitry Andric VAddrWords = RsrcIdx - VAddr0Idx; 5015*5f757f3fSDimitry Andric if (ST.hasPartialNSAEncoding() && 5016*5f757f3fSDimitry Andric AddrWords > ST.getNSAMaxSize(isVSAMPLE(MI))) { 5017*5f757f3fSDimitry Andric unsigned LastVAddrIdx = RsrcIdx - 1; 501806c3fb27SDimitry Andric VAddrWords += getOpSize(MI, LastVAddrIdx) / 4 - 1; 501906c3fb27SDimitry Andric } 50200b57cec5SDimitry Andric } else { 502106c3fb27SDimitry Andric VAddrWords = getOpSize(MI, VAddr0Idx) / 4; 5022bdd1243dSDimitry Andric if (AddrWords > 12) 50230b57cec5SDimitry Andric AddrWords = 16; 50240b57cec5SDimitry Andric } 50250b57cec5SDimitry Andric 50260b57cec5SDimitry Andric if (VAddrWords != AddrWords) { 50275ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "bad vaddr size, expected " << AddrWords 50285ffd83dbSDimitry Andric << " but got " << VAddrWords << "\n"); 50290b57cec5SDimitry Andric ErrInfo = "bad vaddr size"; 50300b57cec5SDimitry Andric return false; 50310b57cec5SDimitry Andric } 50320b57cec5SDimitry Andric } 50330b57cec5SDimitry Andric } 50340b57cec5SDimitry Andric 50350b57cec5SDimitry Andric const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl); 50360b57cec5SDimitry Andric if (DppCt) { 50370b57cec5SDimitry Andric using namespace AMDGPU::DPP; 50380b57cec5SDimitry Andric 50390b57cec5SDimitry Andric unsigned DC = DppCt->getImm(); 50400b57cec5SDimitry Andric if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 || 50410b57cec5SDimitry Andric DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST || 50420b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) || 50430b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) || 50440b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) || 50450b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) || 50460b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) { 50470b57cec5SDimitry Andric ErrInfo = "Invalid dpp_ctrl value"; 50480b57cec5SDimitry Andric return false; 50490b57cec5SDimitry Andric } 50500b57cec5SDimitry Andric if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 && 50510b57cec5SDimitry Andric ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 50520b57cec5SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 50530b57cec5SDimitry Andric "wavefront shifts are not supported on GFX10+"; 50540b57cec5SDimitry Andric return false; 50550b57cec5SDimitry Andric } 50560b57cec5SDimitry Andric if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 && 50570b57cec5SDimitry Andric ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 50580b57cec5SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 50598bcb0991SDimitry Andric "broadcasts are not supported on GFX10+"; 50600b57cec5SDimitry Andric return false; 50610b57cec5SDimitry Andric } 50620b57cec5SDimitry Andric if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST && 50630b57cec5SDimitry Andric ST.getGeneration() < AMDGPUSubtarget::GFX10) { 5064fe6060f1SDimitry Andric if (DC >= DppCtrl::ROW_NEWBCAST_FIRST && 5065fe6060f1SDimitry Andric DC <= DppCtrl::ROW_NEWBCAST_LAST && 5066fe6060f1SDimitry Andric !ST.hasGFX90AInsts()) { 5067fe6060f1SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 5068fe6060f1SDimitry Andric "row_newbroadcast/row_share is not supported before " 5069fe6060f1SDimitry Andric "GFX90A/GFX10"; 5070fe6060f1SDimitry Andric return false; 5071fe6060f1SDimitry Andric } else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) { 50720b57cec5SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 50730b57cec5SDimitry Andric "row_share and row_xmask are not supported before GFX10"; 50740b57cec5SDimitry Andric return false; 50750b57cec5SDimitry Andric } 50760b57cec5SDimitry Andric } 50770b57cec5SDimitry Andric 5078fe6060f1SDimitry Andric if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO && 5079*5f757f3fSDimitry Andric !AMDGPU::isLegalDPALU_DPPControl(DC) && AMDGPU::isDPALU_DPP(Desc)) { 5080fe6060f1SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 5081*5f757f3fSDimitry Andric "DP ALU dpp only support row_newbcast"; 5082fe6060f1SDimitry Andric return false; 5083fe6060f1SDimitry Andric } 5084fe6060f1SDimitry Andric } 5085fe6060f1SDimitry Andric 5086fe6060f1SDimitry Andric if ((MI.mayStore() || MI.mayLoad()) && !isVGPRSpill(MI)) { 5087fe6060f1SDimitry Andric const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); 5088fe6060f1SDimitry Andric uint16_t DataNameIdx = isDS(Opcode) ? AMDGPU::OpName::data0 5089fe6060f1SDimitry Andric : AMDGPU::OpName::vdata; 5090fe6060f1SDimitry Andric const MachineOperand *Data = getNamedOperand(MI, DataNameIdx); 5091fe6060f1SDimitry Andric const MachineOperand *Data2 = getNamedOperand(MI, AMDGPU::OpName::data1); 5092fe6060f1SDimitry Andric if (Data && !Data->isReg()) 5093fe6060f1SDimitry Andric Data = nullptr; 5094fe6060f1SDimitry Andric 5095fe6060f1SDimitry Andric if (ST.hasGFX90AInsts()) { 5096fe6060f1SDimitry Andric if (Dst && Data && 5097fe6060f1SDimitry Andric (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI, Data->getReg()))) { 5098fe6060f1SDimitry Andric ErrInfo = "Invalid register class: " 5099fe6060f1SDimitry Andric "vdata and vdst should be both VGPR or AGPR"; 5100fe6060f1SDimitry Andric return false; 5101fe6060f1SDimitry Andric } 5102fe6060f1SDimitry Andric if (Data && Data2 && 5103fe6060f1SDimitry Andric (RI.isAGPR(MRI, Data->getReg()) != RI.isAGPR(MRI, Data2->getReg()))) { 5104fe6060f1SDimitry Andric ErrInfo = "Invalid register class: " 5105fe6060f1SDimitry Andric "both data operands should be VGPR or AGPR"; 5106fe6060f1SDimitry Andric return false; 5107fe6060f1SDimitry Andric } 5108fe6060f1SDimitry Andric } else { 5109fe6060f1SDimitry Andric if ((Dst && RI.isAGPR(MRI, Dst->getReg())) || 5110fe6060f1SDimitry Andric (Data && RI.isAGPR(MRI, Data->getReg())) || 5111fe6060f1SDimitry Andric (Data2 && RI.isAGPR(MRI, Data2->getReg()))) { 5112fe6060f1SDimitry Andric ErrInfo = "Invalid register class: " 5113fe6060f1SDimitry Andric "agpr loads and stores not supported on this GPU"; 5114fe6060f1SDimitry Andric return false; 5115fe6060f1SDimitry Andric } 5116fe6060f1SDimitry Andric } 5117fe6060f1SDimitry Andric } 5118fe6060f1SDimitry Andric 511981ad6265SDimitry Andric if (ST.needsAlignedVGPRs()) { 512081ad6265SDimitry Andric const auto isAlignedReg = [&MI, &MRI, this](unsigned OpName) -> bool { 512181ad6265SDimitry Andric const MachineOperand *Op = getNamedOperand(MI, OpName); 512281ad6265SDimitry Andric if (!Op) 512381ad6265SDimitry Andric return true; 5124fe6060f1SDimitry Andric Register Reg = Op->getReg(); 512581ad6265SDimitry Andric if (Reg.isPhysical()) 512681ad6265SDimitry Andric return !(RI.getHWRegIndex(Reg) & 1); 5127fe6060f1SDimitry Andric const TargetRegisterClass &RC = *MRI.getRegClass(Reg); 512881ad6265SDimitry Andric return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && 5129fe6060f1SDimitry Andric !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); 513081ad6265SDimitry Andric }; 5131fe6060f1SDimitry Andric 513281ad6265SDimitry Andric if (MI.getOpcode() == AMDGPU::DS_GWS_INIT || 513381ad6265SDimitry Andric MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || 513481ad6265SDimitry Andric MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) { 513581ad6265SDimitry Andric 513681ad6265SDimitry Andric if (!isAlignedReg(AMDGPU::OpName::data0)) { 5137fe6060f1SDimitry Andric ErrInfo = "Subtarget requires even aligned vector registers " 5138fe6060f1SDimitry Andric "for DS_GWS instructions"; 5139fe6060f1SDimitry Andric return false; 5140fe6060f1SDimitry Andric } 5141fe6060f1SDimitry Andric } 5142fe6060f1SDimitry Andric 514381ad6265SDimitry Andric if (isMIMG(MI)) { 514481ad6265SDimitry Andric if (!isAlignedReg(AMDGPU::OpName::vaddr)) { 514581ad6265SDimitry Andric ErrInfo = "Subtarget requires even aligned vector registers " 514681ad6265SDimitry Andric "for vaddr operand of image instructions"; 514781ad6265SDimitry Andric return false; 514881ad6265SDimitry Andric } 514981ad6265SDimitry Andric } 515081ad6265SDimitry Andric } 515181ad6265SDimitry Andric 515281ad6265SDimitry Andric if (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 515381ad6265SDimitry Andric !ST.hasGFX90AInsts()) { 515481ad6265SDimitry Andric const MachineOperand *Src = getNamedOperand(MI, AMDGPU::OpName::src0); 515581ad6265SDimitry Andric if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) { 515681ad6265SDimitry Andric ErrInfo = "Invalid register class: " 515781ad6265SDimitry Andric "v_accvgpr_write with an SGPR is not supported on this GPU"; 515881ad6265SDimitry Andric return false; 515981ad6265SDimitry Andric } 516081ad6265SDimitry Andric } 516181ad6265SDimitry Andric 516204eeddc0SDimitry Andric if (Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) { 516304eeddc0SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(1); 516404eeddc0SDimitry Andric if (!SrcOp.isReg() || SrcOp.getReg().isVirtual()) { 516504eeddc0SDimitry Andric ErrInfo = "pseudo expects only physical SGPRs"; 516604eeddc0SDimitry Andric return false; 516704eeddc0SDimitry Andric } 516804eeddc0SDimitry Andric } 516904eeddc0SDimitry Andric 51700b57cec5SDimitry Andric return true; 51710b57cec5SDimitry Andric } 51720b57cec5SDimitry Andric 5173*5f757f3fSDimitry Andric // It is more readable to list mapped opcodes on the same line. 5174*5f757f3fSDimitry Andric // clang-format off 5175*5f757f3fSDimitry Andric 51760b57cec5SDimitry Andric unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { 51770b57cec5SDimitry Andric switch (MI.getOpcode()) { 51780b57cec5SDimitry Andric default: return AMDGPU::INSTRUCTION_LIST_END; 51790b57cec5SDimitry Andric case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 51800b57cec5SDimitry Andric case AMDGPU::COPY: return AMDGPU::COPY; 51810b57cec5SDimitry Andric case AMDGPU::PHI: return AMDGPU::PHI; 51820b57cec5SDimitry Andric case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 51830b57cec5SDimitry Andric case AMDGPU::WQM: return AMDGPU::WQM; 51848bcb0991SDimitry Andric case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM; 5185fe6060f1SDimitry Andric case AMDGPU::STRICT_WWM: return AMDGPU::STRICT_WWM; 5186fe6060f1SDimitry Andric case AMDGPU::STRICT_WQM: return AMDGPU::STRICT_WQM; 51870b57cec5SDimitry Andric case AMDGPU::S_MOV_B32: { 51880b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 51890b57cec5SDimitry Andric return MI.getOperand(1).isReg() || 51900b57cec5SDimitry Andric RI.isAGPR(MRI, MI.getOperand(0).getReg()) ? 51910b57cec5SDimitry Andric AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 51920b57cec5SDimitry Andric } 51930b57cec5SDimitry Andric case AMDGPU::S_ADD_I32: 5194e8d8bef9SDimitry Andric return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32; 51950b57cec5SDimitry Andric case AMDGPU::S_ADDC_U32: 51960b57cec5SDimitry Andric return AMDGPU::V_ADDC_U32_e32; 51970b57cec5SDimitry Andric case AMDGPU::S_SUB_I32: 5198e8d8bef9SDimitry Andric return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32; 51990b57cec5SDimitry Andric // FIXME: These are not consistently handled, and selected when the carry is 52000b57cec5SDimitry Andric // used. 52010b57cec5SDimitry Andric case AMDGPU::S_ADD_U32: 5202e8d8bef9SDimitry Andric return AMDGPU::V_ADD_CO_U32_e32; 52030b57cec5SDimitry Andric case AMDGPU::S_SUB_U32: 5204e8d8bef9SDimitry Andric return AMDGPU::V_SUB_CO_U32_e32; 52050b57cec5SDimitry Andric case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 5206e8d8bef9SDimitry Andric case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32_e64; 5207e8d8bef9SDimitry Andric case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32_e64; 5208e8d8bef9SDimitry Andric case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32_e64; 52090b57cec5SDimitry Andric case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64; 52100b57cec5SDimitry Andric case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64; 52110b57cec5SDimitry Andric case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64; 52120b57cec5SDimitry Andric case AMDGPU::S_XNOR_B32: 52130b57cec5SDimitry Andric return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END; 52140b57cec5SDimitry Andric case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64; 52150b57cec5SDimitry Andric case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64; 52160b57cec5SDimitry Andric case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64; 52170b57cec5SDimitry Andric case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64; 52180b57cec5SDimitry Andric case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 5219e8d8bef9SDimitry Andric case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64_e64; 52200b57cec5SDimitry Andric case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 5221e8d8bef9SDimitry Andric case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64_e64; 52220b57cec5SDimitry Andric case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 5223e8d8bef9SDimitry Andric case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64_e64; 5224e8d8bef9SDimitry Andric case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32_e64; 5225e8d8bef9SDimitry Andric case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32_e64; 5226e8d8bef9SDimitry Andric case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32_e64; 5227e8d8bef9SDimitry Andric case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32_e64; 52280b57cec5SDimitry Andric case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; 52290b57cec5SDimitry Andric case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 52300b57cec5SDimitry Andric case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 52310b57cec5SDimitry Andric case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 5232349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e64; 5233349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e64; 5234349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e64; 5235349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e64; 5236349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e64; 5237349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e64; 5238349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e64; 5239349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e64; 5240349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e64; 5241349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e64; 5242349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e64; 5243349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e64; 5244349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e64; 5245349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e64; 52460b57cec5SDimitry Andric case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; 52470b57cec5SDimitry Andric case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 52480b57cec5SDimitry Andric case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 52490b57cec5SDimitry Andric case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; 52500b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; 52510b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; 5252*5f757f3fSDimitry Andric case AMDGPU::S_CVT_F32_I32: return AMDGPU::V_CVT_F32_I32_e64; 5253*5f757f3fSDimitry Andric case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64; 5254*5f757f3fSDimitry Andric case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64; 5255*5f757f3fSDimitry Andric case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64; 5256*5f757f3fSDimitry Andric case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64; 5257*5f757f3fSDimitry Andric case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64; 5258*5f757f3fSDimitry Andric case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64; 5259*5f757f3fSDimitry Andric case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64; 5260*5f757f3fSDimitry Andric case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64; 5261*5f757f3fSDimitry Andric case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64; 5262*5f757f3fSDimitry Andric case AMDGPU::S_RNDNE_F32: return AMDGPU::V_RNDNE_F32_e64; 5263*5f757f3fSDimitry Andric case AMDGPU::S_CEIL_F16: return AMDGPU::V_CEIL_F16_t16_e64; 5264*5f757f3fSDimitry Andric case AMDGPU::S_FLOOR_F16: return AMDGPU::V_FLOOR_F16_t16_e64; 5265*5f757f3fSDimitry Andric case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_t16_e64; 5266*5f757f3fSDimitry Andric case AMDGPU::S_RNDNE_F16: return AMDGPU::V_RNDNE_F16_t16_e64; 5267*5f757f3fSDimitry Andric case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64; 5268*5f757f3fSDimitry Andric case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64; 5269*5f757f3fSDimitry Andric case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64; 5270*5f757f3fSDimitry Andric case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64; 5271*5f757f3fSDimitry Andric case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64; 5272*5f757f3fSDimitry Andric case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64; 5273*5f757f3fSDimitry Andric case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64; 5274*5f757f3fSDimitry Andric case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64; 5275*5f757f3fSDimitry Andric case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64; 5276*5f757f3fSDimitry Andric case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64; 5277*5f757f3fSDimitry Andric case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64; 5278*5f757f3fSDimitry Andric case AMDGPU::S_MINIMUM_F16: return AMDGPU::V_MINIMUM_F16_e64; 5279*5f757f3fSDimitry Andric case AMDGPU::S_MAXIMUM_F16: return AMDGPU::V_MAXIMUM_F16_e64; 5280*5f757f3fSDimitry Andric case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64; 5281*5f757f3fSDimitry Andric case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64; 5282*5f757f3fSDimitry Andric case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64; 5283*5f757f3fSDimitry Andric case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_t16_e64; 5284*5f757f3fSDimitry Andric case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32; 5285*5f757f3fSDimitry Andric case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32; 5286*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64; 5287*5f757f3fSDimitry Andric case AMDGPU::S_CMP_EQ_F32: return AMDGPU::V_CMP_EQ_F32_e64; 5288*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LE_F32: return AMDGPU::V_CMP_LE_F32_e64; 5289*5f757f3fSDimitry Andric case AMDGPU::S_CMP_GT_F32: return AMDGPU::V_CMP_GT_F32_e64; 5290*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_F32: return AMDGPU::V_CMP_LG_F32_e64; 5291*5f757f3fSDimitry Andric case AMDGPU::S_CMP_GE_F32: return AMDGPU::V_CMP_GE_F32_e64; 5292*5f757f3fSDimitry Andric case AMDGPU::S_CMP_O_F32: return AMDGPU::V_CMP_O_F32_e64; 5293*5f757f3fSDimitry Andric case AMDGPU::S_CMP_U_F32: return AMDGPU::V_CMP_U_F32_e64; 5294*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NGE_F32: return AMDGPU::V_CMP_NGE_F32_e64; 5295*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLG_F32: return AMDGPU::V_CMP_NLG_F32_e64; 5296*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NGT_F32: return AMDGPU::V_CMP_NGT_F32_e64; 5297*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLE_F32: return AMDGPU::V_CMP_NLE_F32_e64; 5298*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NEQ_F32: return AMDGPU::V_CMP_NEQ_F32_e64; 5299*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLT_F32: return AMDGPU::V_CMP_NLT_F32_e64; 5300*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LT_F16: return AMDGPU::V_CMP_LT_F16_t16_e64; 5301*5f757f3fSDimitry Andric case AMDGPU::S_CMP_EQ_F16: return AMDGPU::V_CMP_EQ_F16_t16_e64; 5302*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LE_F16: return AMDGPU::V_CMP_LE_F16_t16_e64; 5303*5f757f3fSDimitry Andric case AMDGPU::S_CMP_GT_F16: return AMDGPU::V_CMP_GT_F16_t16_e64; 5304*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_F16: return AMDGPU::V_CMP_LG_F16_t16_e64; 5305*5f757f3fSDimitry Andric case AMDGPU::S_CMP_GE_F16: return AMDGPU::V_CMP_GE_F16_t16_e64; 5306*5f757f3fSDimitry Andric case AMDGPU::S_CMP_O_F16: return AMDGPU::V_CMP_O_F16_t16_e64; 5307*5f757f3fSDimitry Andric case AMDGPU::S_CMP_U_F16: return AMDGPU::V_CMP_U_F16_t16_e64; 5308*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NGE_F16: return AMDGPU::V_CMP_NGE_F16_t16_e64; 5309*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLG_F16: return AMDGPU::V_CMP_NLG_F16_t16_e64; 5310*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NGT_F16: return AMDGPU::V_CMP_NGT_F16_t16_e64; 5311*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64; 5312*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64; 5313*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64; 5314*5f757f3fSDimitry Andric case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64; 5315*5f757f3fSDimitry Andric case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_t16_e64; 5316*5f757f3fSDimitry Andric case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64; 5317*5f757f3fSDimitry Andric case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_t16_e64; 5318*5f757f3fSDimitry Andric case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64; 5319*5f757f3fSDimitry Andric case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_t16_e64; 5320*5f757f3fSDimitry Andric case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64; 5321*5f757f3fSDimitry Andric case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_t16_e64; 5322*5f757f3fSDimitry Andric case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64; 5323*5f757f3fSDimitry Andric case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_t16_e64; 53240b57cec5SDimitry Andric } 53250b57cec5SDimitry Andric llvm_unreachable( 53260b57cec5SDimitry Andric "Unexpected scalar opcode without corresponding vector one!"); 53270b57cec5SDimitry Andric } 53280b57cec5SDimitry Andric 5329*5f757f3fSDimitry Andric // clang-format on 5330*5f757f3fSDimitry Andric 533106c3fb27SDimitry Andric void SIInstrInfo::insertScratchExecCopy(MachineFunction &MF, 533206c3fb27SDimitry Andric MachineBasicBlock &MBB, 533306c3fb27SDimitry Andric MachineBasicBlock::iterator MBBI, 533406c3fb27SDimitry Andric const DebugLoc &DL, Register Reg, 5335*5f757f3fSDimitry Andric bool IsSCCLive, 5336*5f757f3fSDimitry Andric SlotIndexes *Indexes) const { 533706c3fb27SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 533806c3fb27SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 533906c3fb27SDimitry Andric bool IsWave32 = ST.isWave32(); 534006c3fb27SDimitry Andric if (IsSCCLive) { 534106c3fb27SDimitry Andric // Insert two move instructions, one to save the original value of EXEC and 534206c3fb27SDimitry Andric // the other to turn on all bits in EXEC. This is required as we can't use 534306c3fb27SDimitry Andric // the single instruction S_OR_SAVEEXEC that clobbers SCC. 534406c3fb27SDimitry Andric unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 534506c3fb27SDimitry Andric MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 5346*5f757f3fSDimitry Andric auto StoreExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg) 5347*5f757f3fSDimitry Andric .addReg(Exec, RegState::Kill); 5348*5f757f3fSDimitry Andric auto FlipExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1); 5349*5f757f3fSDimitry Andric if (Indexes) { 5350*5f757f3fSDimitry Andric Indexes->insertMachineInstrInMaps(*StoreExecMI); 5351*5f757f3fSDimitry Andric Indexes->insertMachineInstrInMaps(*FlipExecMI); 5352*5f757f3fSDimitry Andric } 535306c3fb27SDimitry Andric } else { 535406c3fb27SDimitry Andric const unsigned OrSaveExec = 535506c3fb27SDimitry Andric IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 535606c3fb27SDimitry Andric auto SaveExec = 535706c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), Reg).addImm(-1); 535806c3fb27SDimitry Andric SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. 5359*5f757f3fSDimitry Andric if (Indexes) 5360*5f757f3fSDimitry Andric Indexes->insertMachineInstrInMaps(*SaveExec); 536106c3fb27SDimitry Andric } 536206c3fb27SDimitry Andric } 536306c3fb27SDimitry Andric 536406c3fb27SDimitry Andric void SIInstrInfo::restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, 536506c3fb27SDimitry Andric MachineBasicBlock::iterator MBBI, 5366*5f757f3fSDimitry Andric const DebugLoc &DL, Register Reg, 5367*5f757f3fSDimitry Andric SlotIndexes *Indexes) const { 536806c3fb27SDimitry Andric unsigned ExecMov = isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 536906c3fb27SDimitry Andric MCRegister Exec = isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 5370*5f757f3fSDimitry Andric auto ExecRestoreMI = 537106c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, get(ExecMov), Exec).addReg(Reg, RegState::Kill); 5372*5f757f3fSDimitry Andric if (Indexes) 5373*5f757f3fSDimitry Andric Indexes->insertMachineInstrInMaps(*ExecRestoreMI); 537406c3fb27SDimitry Andric } 537506c3fb27SDimitry Andric 537681ad6265SDimitry Andric static const TargetRegisterClass * 537781ad6265SDimitry Andric adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, 5378fe6060f1SDimitry Andric const MachineRegisterInfo &MRI, 537981ad6265SDimitry Andric const MCInstrDesc &TID, unsigned RCID, 5380fe6060f1SDimitry Andric bool IsAllocatable) { 5381fe6060f1SDimitry Andric if ((IsAllocatable || !ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && 53820eae32dcSDimitry Andric (((TID.mayLoad() || TID.mayStore()) && 53830eae32dcSDimitry Andric !(TID.TSFlags & SIInstrFlags::VGPRSpill)) || 5384fe6060f1SDimitry Andric (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) { 5385fe6060f1SDimitry Andric switch (RCID) { 538681ad6265SDimitry Andric case AMDGPU::AV_32RegClassID: 538781ad6265SDimitry Andric RCID = AMDGPU::VGPR_32RegClassID; 538881ad6265SDimitry Andric break; 538981ad6265SDimitry Andric case AMDGPU::AV_64RegClassID: 539081ad6265SDimitry Andric RCID = AMDGPU::VReg_64RegClassID; 539181ad6265SDimitry Andric break; 539281ad6265SDimitry Andric case AMDGPU::AV_96RegClassID: 539381ad6265SDimitry Andric RCID = AMDGPU::VReg_96RegClassID; 539481ad6265SDimitry Andric break; 539581ad6265SDimitry Andric case AMDGPU::AV_128RegClassID: 539681ad6265SDimitry Andric RCID = AMDGPU::VReg_128RegClassID; 539781ad6265SDimitry Andric break; 539881ad6265SDimitry Andric case AMDGPU::AV_160RegClassID: 539981ad6265SDimitry Andric RCID = AMDGPU::VReg_160RegClassID; 540081ad6265SDimitry Andric break; 540181ad6265SDimitry Andric case AMDGPU::AV_512RegClassID: 540281ad6265SDimitry Andric RCID = AMDGPU::VReg_512RegClassID; 540381ad6265SDimitry Andric break; 5404fe6060f1SDimitry Andric default: 5405fe6060f1SDimitry Andric break; 5406fe6060f1SDimitry Andric } 5407fe6060f1SDimitry Andric } 540881ad6265SDimitry Andric 540981ad6265SDimitry Andric return RI.getProperlyAlignedRC(RI.getRegClass(RCID)); 5410fe6060f1SDimitry Andric } 5411fe6060f1SDimitry Andric 5412fe6060f1SDimitry Andric const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, 5413fe6060f1SDimitry Andric unsigned OpNum, const TargetRegisterInfo *TRI, 5414fe6060f1SDimitry Andric const MachineFunction &MF) 5415fe6060f1SDimitry Andric const { 5416fe6060f1SDimitry Andric if (OpNum >= TID.getNumOperands()) 5417fe6060f1SDimitry Andric return nullptr; 5418bdd1243dSDimitry Andric auto RegClass = TID.operands()[OpNum].RegClass; 5419fe6060f1SDimitry Andric bool IsAllocatable = false; 5420fe6060f1SDimitry Andric if (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::FLAT)) { 5421fe6060f1SDimitry Andric // vdst and vdata should be both VGPR or AGPR, same for the DS instructions 542281ad6265SDimitry Andric // with two data operands. Request register class constrained to VGPR only 5423fe6060f1SDimitry Andric // of both operands present as Machine Copy Propagation can not check this 5424fe6060f1SDimitry Andric // constraint and possibly other passes too. 5425fe6060f1SDimitry Andric // 5426fe6060f1SDimitry Andric // The check is limited to FLAT and DS because atomics in non-flat encoding 5427fe6060f1SDimitry Andric // have their vdst and vdata tied to be the same register. 5428fe6060f1SDimitry Andric const int VDstIdx = AMDGPU::getNamedOperandIdx(TID.Opcode, 5429fe6060f1SDimitry Andric AMDGPU::OpName::vdst); 5430fe6060f1SDimitry Andric const int DataIdx = AMDGPU::getNamedOperandIdx(TID.Opcode, 5431fe6060f1SDimitry Andric (TID.TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 5432fe6060f1SDimitry Andric : AMDGPU::OpName::vdata); 5433fe6060f1SDimitry Andric if (DataIdx != -1) { 5434bdd1243dSDimitry Andric IsAllocatable = VDstIdx != -1 || AMDGPU::hasNamedOperand( 5435bdd1243dSDimitry Andric TID.Opcode, AMDGPU::OpName::data1); 5436fe6060f1SDimitry Andric } 5437fe6060f1SDimitry Andric } 543881ad6265SDimitry Andric return adjustAllocatableRegClass(ST, RI, MF.getRegInfo(), TID, RegClass, 5439fe6060f1SDimitry Andric IsAllocatable); 5440fe6060f1SDimitry Andric } 5441fe6060f1SDimitry Andric 54420b57cec5SDimitry Andric const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 54430b57cec5SDimitry Andric unsigned OpNo) const { 54440b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 54450b57cec5SDimitry Andric const MCInstrDesc &Desc = get(MI.getOpcode()); 54460b57cec5SDimitry Andric if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 5447bdd1243dSDimitry Andric Desc.operands()[OpNo].RegClass == -1) { 54488bcb0991SDimitry Andric Register Reg = MI.getOperand(OpNo).getReg(); 54490b57cec5SDimitry Andric 5450e8d8bef9SDimitry Andric if (Reg.isVirtual()) 54510b57cec5SDimitry Andric return MRI.getRegClass(Reg); 5452bdd1243dSDimitry Andric return RI.getPhysRegBaseClass(Reg); 54530b57cec5SDimitry Andric } 54540b57cec5SDimitry Andric 5455bdd1243dSDimitry Andric unsigned RCID = Desc.operands()[OpNo].RegClass; 545681ad6265SDimitry Andric return adjustAllocatableRegClass(ST, RI, MRI, Desc, RCID, true); 54570b57cec5SDimitry Andric } 54580b57cec5SDimitry Andric 54590b57cec5SDimitry Andric void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { 54600b57cec5SDimitry Andric MachineBasicBlock::iterator I = MI; 54610b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 54620b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 54630b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 5464bdd1243dSDimitry Andric unsigned RCID = get(MI.getOpcode()).operands()[OpIdx].RegClass; 54650b57cec5SDimitry Andric const TargetRegisterClass *RC = RI.getRegClass(RCID); 5466e8d8bef9SDimitry Andric unsigned Size = RI.getRegSizeInBits(*RC); 54670b57cec5SDimitry Andric unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32; 54680b57cec5SDimitry Andric if (MO.isReg()) 54690b57cec5SDimitry Andric Opcode = AMDGPU::COPY; 54700b57cec5SDimitry Andric else if (RI.isSGPRClass(RC)) 54710b57cec5SDimitry Andric Opcode = (Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; 54720b57cec5SDimitry Andric 54730b57cec5SDimitry Andric const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 54748bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(VRC); 54750b57cec5SDimitry Andric DebugLoc DL = MBB->findDebugLoc(I); 54760b57cec5SDimitry Andric BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO); 54770b57cec5SDimitry Andric MO.ChangeToRegister(Reg, false); 54780b57cec5SDimitry Andric } 54790b57cec5SDimitry Andric 5480*5f757f3fSDimitry Andric unsigned SIInstrInfo::buildExtractSubReg( 5481*5f757f3fSDimitry Andric MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, 5482*5f757f3fSDimitry Andric const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, 5483*5f757f3fSDimitry Andric unsigned SubIdx, const TargetRegisterClass *SubRC) const { 54840b57cec5SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 54850b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc(); 54868bcb0991SDimitry Andric Register SubReg = MRI.createVirtualRegister(SubRC); 54870b57cec5SDimitry Andric 54880b57cec5SDimitry Andric if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { 54890b57cec5SDimitry Andric BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 54900b57cec5SDimitry Andric .addReg(SuperReg.getReg(), 0, SubIdx); 54910b57cec5SDimitry Andric return SubReg; 54920b57cec5SDimitry Andric } 54930b57cec5SDimitry Andric 54940b57cec5SDimitry Andric // Just in case the super register is itself a sub-register, copy it to a new 54950b57cec5SDimitry Andric // value so we don't need to worry about merging its subreg index with the 54960b57cec5SDimitry Andric // SubIdx passed to this function. The register coalescer should be able to 54970b57cec5SDimitry Andric // eliminate this extra copy. 54988bcb0991SDimitry Andric Register NewSuperReg = MRI.createVirtualRegister(SuperRC); 54990b57cec5SDimitry Andric 55000b57cec5SDimitry Andric BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) 55010b57cec5SDimitry Andric .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); 55020b57cec5SDimitry Andric 55030b57cec5SDimitry Andric BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 55040b57cec5SDimitry Andric .addReg(NewSuperReg, 0, SubIdx); 55050b57cec5SDimitry Andric 55060b57cec5SDimitry Andric return SubReg; 55070b57cec5SDimitry Andric } 55080b57cec5SDimitry Andric 55090b57cec5SDimitry Andric MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 5510*5f757f3fSDimitry Andric MachineBasicBlock::iterator MII, MachineRegisterInfo &MRI, 5511*5f757f3fSDimitry Andric const MachineOperand &Op, const TargetRegisterClass *SuperRC, 5512*5f757f3fSDimitry Andric unsigned SubIdx, const TargetRegisterClass *SubRC) const { 55130b57cec5SDimitry Andric if (Op.isImm()) { 55140b57cec5SDimitry Andric if (SubIdx == AMDGPU::sub0) 55150b57cec5SDimitry Andric return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm())); 55160b57cec5SDimitry Andric if (SubIdx == AMDGPU::sub1) 55170b57cec5SDimitry Andric return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32)); 55180b57cec5SDimitry Andric 55190b57cec5SDimitry Andric llvm_unreachable("Unhandled register index for immediate"); 55200b57cec5SDimitry Andric } 55210b57cec5SDimitry Andric 55220b57cec5SDimitry Andric unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 55230b57cec5SDimitry Andric SubIdx, SubRC); 55240b57cec5SDimitry Andric return MachineOperand::CreateReg(SubReg, false); 55250b57cec5SDimitry Andric } 55260b57cec5SDimitry Andric 55270b57cec5SDimitry Andric // Change the order of operands from (0, 1, 2) to (0, 2, 1) 55280b57cec5SDimitry Andric void SIInstrInfo::swapOperands(MachineInstr &Inst) const { 55290b57cec5SDimitry Andric assert(Inst.getNumExplicitOperands() == 3); 55300b57cec5SDimitry Andric MachineOperand Op1 = Inst.getOperand(1); 553181ad6265SDimitry Andric Inst.removeOperand(1); 55320b57cec5SDimitry Andric Inst.addOperand(Op1); 55330b57cec5SDimitry Andric } 55340b57cec5SDimitry Andric 55350b57cec5SDimitry Andric bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, 55360b57cec5SDimitry Andric const MCOperandInfo &OpInfo, 55370b57cec5SDimitry Andric const MachineOperand &MO) const { 55380b57cec5SDimitry Andric if (!MO.isReg()) 55390b57cec5SDimitry Andric return false; 55400b57cec5SDimitry Andric 55418bcb0991SDimitry Andric Register Reg = MO.getReg(); 55420b57cec5SDimitry Andric 5543480093f4SDimitry Andric const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass); 5544e8d8bef9SDimitry Andric if (Reg.isPhysical()) 5545e8d8bef9SDimitry Andric return DRC->contains(Reg); 5546e8d8bef9SDimitry Andric 5547e8d8bef9SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(Reg); 5548e8d8bef9SDimitry Andric 5549480093f4SDimitry Andric if (MO.getSubReg()) { 5550480093f4SDimitry Andric const MachineFunction *MF = MO.getParent()->getParent()->getParent(); 5551480093f4SDimitry Andric const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); 5552480093f4SDimitry Andric if (!SuperRC) 5553480093f4SDimitry Andric return false; 55540b57cec5SDimitry Andric 5555480093f4SDimitry Andric DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()); 5556480093f4SDimitry Andric if (!DRC) 5557480093f4SDimitry Andric return false; 5558480093f4SDimitry Andric } 5559480093f4SDimitry Andric return RC->hasSuperClassEq(DRC); 55600b57cec5SDimitry Andric } 55610b57cec5SDimitry Andric 55620b57cec5SDimitry Andric bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, 55630b57cec5SDimitry Andric const MCOperandInfo &OpInfo, 55640b57cec5SDimitry Andric const MachineOperand &MO) const { 55650b57cec5SDimitry Andric if (MO.isReg()) 55660b57cec5SDimitry Andric return isLegalRegOperand(MRI, OpInfo, MO); 55670b57cec5SDimitry Andric 55680b57cec5SDimitry Andric // Handle non-register types that are treated like immediates. 55690b57cec5SDimitry Andric assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); 55700b57cec5SDimitry Andric return true; 55710b57cec5SDimitry Andric } 55720b57cec5SDimitry Andric 55730b57cec5SDimitry Andric bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 55740b57cec5SDimitry Andric const MachineOperand *MO) const { 55750b57cec5SDimitry Andric const MachineFunction &MF = *MI.getParent()->getParent(); 55760b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 55770b57cec5SDimitry Andric const MCInstrDesc &InstDesc = MI.getDesc(); 5578bdd1243dSDimitry Andric const MCOperandInfo &OpInfo = InstDesc.operands()[OpIdx]; 55790b57cec5SDimitry Andric const TargetRegisterClass *DefinedRC = 55800b57cec5SDimitry Andric OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 55810b57cec5SDimitry Andric if (!MO) 55820b57cec5SDimitry Andric MO = &MI.getOperand(OpIdx); 55830b57cec5SDimitry Andric 55840b57cec5SDimitry Andric int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode()); 558581ad6265SDimitry Andric int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0; 55860b57cec5SDimitry Andric if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) { 5587bdd1243dSDimitry Andric if (!MO->isReg() && !isInlineConstant(*MO, OpInfo) && !LiteralLimit--) 55880b57cec5SDimitry Andric return false; 55890b57cec5SDimitry Andric 55900b57cec5SDimitry Andric SmallDenseSet<RegSubRegPair> SGPRsUsed; 55910b57cec5SDimitry Andric if (MO->isReg()) 55920b57cec5SDimitry Andric SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg())); 55930b57cec5SDimitry Andric 55940b57cec5SDimitry Andric for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 55950b57cec5SDimitry Andric if (i == OpIdx) 55960b57cec5SDimitry Andric continue; 55970b57cec5SDimitry Andric const MachineOperand &Op = MI.getOperand(i); 55980b57cec5SDimitry Andric if (Op.isReg()) { 55990b57cec5SDimitry Andric RegSubRegPair SGPR(Op.getReg(), Op.getSubReg()); 56000b57cec5SDimitry Andric if (!SGPRsUsed.count(SGPR) && 5601bdd1243dSDimitry Andric // FIXME: This can access off the end of the operands() array. 5602bdd1243dSDimitry Andric usesConstantBus(MRI, Op, InstDesc.operands().begin()[i])) { 56030b57cec5SDimitry Andric if (--ConstantBusLimit <= 0) 56040b57cec5SDimitry Andric return false; 56050b57cec5SDimitry Andric SGPRsUsed.insert(SGPR); 56060b57cec5SDimitry Andric } 5607*5f757f3fSDimitry Andric } else if (AMDGPU::isSISrcOperand(InstDesc, i) && 5608*5f757f3fSDimitry Andric !isInlineConstant(Op, InstDesc.operands()[i])) { 560981ad6265SDimitry Andric if (!LiteralLimit--) 56100b57cec5SDimitry Andric return false; 56110b57cec5SDimitry Andric if (--ConstantBusLimit <= 0) 56120b57cec5SDimitry Andric return false; 56130b57cec5SDimitry Andric } 56140b57cec5SDimitry Andric } 56150b57cec5SDimitry Andric } 56160b57cec5SDimitry Andric 56170b57cec5SDimitry Andric if (MO->isReg()) { 5618fcaf7f86SDimitry Andric if (!DefinedRC) 5619fcaf7f86SDimitry Andric return OpInfo.OperandType == MCOI::OPERAND_UNKNOWN; 5620fe6060f1SDimitry Andric if (!isLegalRegOperand(MRI, OpInfo, *MO)) 5621fe6060f1SDimitry Andric return false; 5622fe6060f1SDimitry Andric bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); 5623fe6060f1SDimitry Andric if (IsAGPR && !ST.hasMAIInsts()) 5624fe6060f1SDimitry Andric return false; 5625fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 5626fe6060f1SDimitry Andric if (IsAGPR && 5627fe6060f1SDimitry Andric (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && 5628fe6060f1SDimitry Andric (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc))) 5629fe6060f1SDimitry Andric return false; 5630fe6060f1SDimitry Andric // Atomics should have both vdst and vdata either vgpr or agpr. 5631fe6060f1SDimitry Andric const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 5632fe6060f1SDimitry Andric const int DataIdx = AMDGPU::getNamedOperandIdx(Opc, 5633fe6060f1SDimitry Andric isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata); 5634fe6060f1SDimitry Andric if ((int)OpIdx == VDstIdx && DataIdx != -1 && 5635fe6060f1SDimitry Andric MI.getOperand(DataIdx).isReg() && 5636fe6060f1SDimitry Andric RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR) 5637fe6060f1SDimitry Andric return false; 5638fe6060f1SDimitry Andric if ((int)OpIdx == DataIdx) { 5639fe6060f1SDimitry Andric if (VDstIdx != -1 && 5640fe6060f1SDimitry Andric RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR) 5641fe6060f1SDimitry Andric return false; 5642fe6060f1SDimitry Andric // DS instructions with 2 src operands also must have tied RC. 5643fe6060f1SDimitry Andric const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc, 5644fe6060f1SDimitry Andric AMDGPU::OpName::data1); 5645fe6060f1SDimitry Andric if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() && 5646fe6060f1SDimitry Andric RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR) 5647fe6060f1SDimitry Andric return false; 5648fe6060f1SDimitry Andric } 564981ad6265SDimitry Andric if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() && 5650fe6060f1SDimitry Andric (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) && 5651fe6060f1SDimitry Andric RI.isSGPRReg(MRI, MO->getReg())) 5652fe6060f1SDimitry Andric return false; 5653fe6060f1SDimitry Andric return true; 56540b57cec5SDimitry Andric } 56550b57cec5SDimitry Andric 5656*5f757f3fSDimitry Andric if (MO->isImm()) { 5657*5f757f3fSDimitry Andric uint64_t Imm = MO->getImm(); 5658*5f757f3fSDimitry Andric bool Is64BitFPOp = OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_FP64; 5659*5f757f3fSDimitry Andric bool Is64BitOp = Is64BitFPOp || 5660*5f757f3fSDimitry Andric OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || 5661*5f757f3fSDimitry Andric OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 || 5662*5f757f3fSDimitry Andric OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32; 5663*5f757f3fSDimitry Andric if (Is64BitOp && 5664*5f757f3fSDimitry Andric !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) { 5665*5f757f3fSDimitry Andric if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp)) 5666*5f757f3fSDimitry Andric return false; 5667*5f757f3fSDimitry Andric 5668*5f757f3fSDimitry Andric // FIXME: We can use sign extended 64-bit literals, but only for signed 5669*5f757f3fSDimitry Andric // operands. At the moment we do not know if an operand is signed. 5670*5f757f3fSDimitry Andric // Such operand will be encoded as its low 32 bits and then either 5671*5f757f3fSDimitry Andric // correctly sign extended or incorrectly zero extended by HW. 5672*5f757f3fSDimitry Andric if (!Is64BitFPOp && (int32_t)Imm < 0) 5673*5f757f3fSDimitry Andric return false; 5674*5f757f3fSDimitry Andric } 5675*5f757f3fSDimitry Andric } 5676*5f757f3fSDimitry Andric 56770b57cec5SDimitry Andric // Handle non-register types that are treated like immediates. 56780b57cec5SDimitry Andric assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal()); 56790b57cec5SDimitry Andric 56800b57cec5SDimitry Andric if (!DefinedRC) { 56810b57cec5SDimitry Andric // This operand expects an immediate. 56820b57cec5SDimitry Andric return true; 56830b57cec5SDimitry Andric } 56840b57cec5SDimitry Andric 56850b57cec5SDimitry Andric return isImmOperandLegal(MI, OpIdx, *MO); 56860b57cec5SDimitry Andric } 56870b57cec5SDimitry Andric 56880b57cec5SDimitry Andric void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, 56890b57cec5SDimitry Andric MachineInstr &MI) const { 56900b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 56910b57cec5SDimitry Andric const MCInstrDesc &InstrDesc = get(Opc); 56920b57cec5SDimitry Andric 56930b57cec5SDimitry Andric int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 56940b57cec5SDimitry Andric MachineOperand &Src0 = MI.getOperand(Src0Idx); 56950b57cec5SDimitry Andric 56960b57cec5SDimitry Andric int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 56970b57cec5SDimitry Andric MachineOperand &Src1 = MI.getOperand(Src1Idx); 56980b57cec5SDimitry Andric 56990b57cec5SDimitry Andric // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 57000b57cec5SDimitry Andric // we need to only have one constant bus use before GFX10. 5701bdd1243dSDimitry Andric bool HasImplicitSGPR = findImplicitSGPRRead(MI); 5702bdd1243dSDimitry Andric if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1 && Src0.isReg() && 5703bdd1243dSDimitry Andric RI.isSGPRReg(MRI, Src0.getReg())) 57040b57cec5SDimitry Andric legalizeOpWithMove(MI, Src0Idx); 57050b57cec5SDimitry Andric 57060b57cec5SDimitry Andric // Special case: V_WRITELANE_B32 accepts only immediate or SGPR operands for 57070b57cec5SDimitry Andric // both the value to write (src0) and lane select (src1). Fix up non-SGPR 57080b57cec5SDimitry Andric // src0/src1 with V_READFIRSTLANE. 57090b57cec5SDimitry Andric if (Opc == AMDGPU::V_WRITELANE_B32) { 57100b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 57110b57cec5SDimitry Andric if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) { 57128bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 57130b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 57140b57cec5SDimitry Andric .add(Src0); 57150b57cec5SDimitry Andric Src0.ChangeToRegister(Reg, false); 57160b57cec5SDimitry Andric } 57170b57cec5SDimitry Andric if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { 57188bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 57190b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 57200b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 57210b57cec5SDimitry Andric .add(Src1); 57220b57cec5SDimitry Andric Src1.ChangeToRegister(Reg, false); 57230b57cec5SDimitry Andric } 57240b57cec5SDimitry Andric return; 57250b57cec5SDimitry Andric } 57260b57cec5SDimitry Andric 57270b57cec5SDimitry Andric // No VOP2 instructions support AGPRs. 57280b57cec5SDimitry Andric if (Src0.isReg() && RI.isAGPR(MRI, Src0.getReg())) 57290b57cec5SDimitry Andric legalizeOpWithMove(MI, Src0Idx); 57300b57cec5SDimitry Andric 57310b57cec5SDimitry Andric if (Src1.isReg() && RI.isAGPR(MRI, Src1.getReg())) 57320b57cec5SDimitry Andric legalizeOpWithMove(MI, Src1Idx); 57330b57cec5SDimitry Andric 5734*5f757f3fSDimitry Andric // Special case: V_FMAC_F32 and V_FMAC_F16 have src2. 5735*5f757f3fSDimitry Andric if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) { 5736*5f757f3fSDimitry Andric int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 5737*5f757f3fSDimitry Andric if (!RI.isVGPR(MRI, MI.getOperand(Src2Idx).getReg())) 5738*5f757f3fSDimitry Andric legalizeOpWithMove(MI, Src2Idx); 5739*5f757f3fSDimitry Andric } 5740*5f757f3fSDimitry Andric 57410b57cec5SDimitry Andric // VOP2 src0 instructions support all operand types, so we don't need to check 57420b57cec5SDimitry Andric // their legality. If src1 is already legal, we don't need to do anything. 5743bdd1243dSDimitry Andric if (isLegalRegOperand(MRI, InstrDesc.operands()[Src1Idx], Src1)) 57440b57cec5SDimitry Andric return; 57450b57cec5SDimitry Andric 57460b57cec5SDimitry Andric // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for 57470b57cec5SDimitry Andric // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane 57480b57cec5SDimitry Andric // select is uniform. 57490b57cec5SDimitry Andric if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && 57500b57cec5SDimitry Andric RI.isVGPR(MRI, Src1.getReg())) { 57518bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 57520b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 57530b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 57540b57cec5SDimitry Andric .add(Src1); 57550b57cec5SDimitry Andric Src1.ChangeToRegister(Reg, false); 57560b57cec5SDimitry Andric return; 57570b57cec5SDimitry Andric } 57580b57cec5SDimitry Andric 57590b57cec5SDimitry Andric // We do not use commuteInstruction here because it is too aggressive and will 57600b57cec5SDimitry Andric // commute if it is possible. We only want to commute here if it improves 57610b57cec5SDimitry Andric // legality. This can be called a fairly large number of times so don't waste 57620b57cec5SDimitry Andric // compile time pointlessly swapping and checking legality again. 57630b57cec5SDimitry Andric if (HasImplicitSGPR || !MI.isCommutable()) { 57640b57cec5SDimitry Andric legalizeOpWithMove(MI, Src1Idx); 57650b57cec5SDimitry Andric return; 57660b57cec5SDimitry Andric } 57670b57cec5SDimitry Andric 57680b57cec5SDimitry Andric // If src0 can be used as src1, commuting will make the operands legal. 57690b57cec5SDimitry Andric // Otherwise we have to give up and insert a move. 57700b57cec5SDimitry Andric // 57710b57cec5SDimitry Andric // TODO: Other immediate-like operand kinds could be commuted if there was a 57720b57cec5SDimitry Andric // MachineOperand::ChangeTo* for them. 57730b57cec5SDimitry Andric if ((!Src1.isImm() && !Src1.isReg()) || 5774bdd1243dSDimitry Andric !isLegalRegOperand(MRI, InstrDesc.operands()[Src1Idx], Src0)) { 57750b57cec5SDimitry Andric legalizeOpWithMove(MI, Src1Idx); 57760b57cec5SDimitry Andric return; 57770b57cec5SDimitry Andric } 57780b57cec5SDimitry Andric 57790b57cec5SDimitry Andric int CommutedOpc = commuteOpcode(MI); 57800b57cec5SDimitry Andric if (CommutedOpc == -1) { 57810b57cec5SDimitry Andric legalizeOpWithMove(MI, Src1Idx); 57820b57cec5SDimitry Andric return; 57830b57cec5SDimitry Andric } 57840b57cec5SDimitry Andric 57850b57cec5SDimitry Andric MI.setDesc(get(CommutedOpc)); 57860b57cec5SDimitry Andric 57878bcb0991SDimitry Andric Register Src0Reg = Src0.getReg(); 57880b57cec5SDimitry Andric unsigned Src0SubReg = Src0.getSubReg(); 57890b57cec5SDimitry Andric bool Src0Kill = Src0.isKill(); 57900b57cec5SDimitry Andric 57910b57cec5SDimitry Andric if (Src1.isImm()) 57920b57cec5SDimitry Andric Src0.ChangeToImmediate(Src1.getImm()); 57930b57cec5SDimitry Andric else if (Src1.isReg()) { 57940b57cec5SDimitry Andric Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); 57950b57cec5SDimitry Andric Src0.setSubReg(Src1.getSubReg()); 57960b57cec5SDimitry Andric } else 57970b57cec5SDimitry Andric llvm_unreachable("Should only have register or immediate operands"); 57980b57cec5SDimitry Andric 57990b57cec5SDimitry Andric Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); 58000b57cec5SDimitry Andric Src1.setSubReg(Src0SubReg); 58010b57cec5SDimitry Andric fixImplicitOperands(MI); 58020b57cec5SDimitry Andric } 58030b57cec5SDimitry Andric 58040b57cec5SDimitry Andric // Legalize VOP3 operands. All operand types are supported for any operand 58050b57cec5SDimitry Andric // but only one literal constant and only starting from GFX10. 58060b57cec5SDimitry Andric void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, 58070b57cec5SDimitry Andric MachineInstr &MI) const { 58080b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 58090b57cec5SDimitry Andric 58100b57cec5SDimitry Andric int VOP3Idx[3] = { 58110b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), 58120b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), 58130b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) 58140b57cec5SDimitry Andric }; 58150b57cec5SDimitry Andric 5816e8d8bef9SDimitry Andric if (Opc == AMDGPU::V_PERMLANE16_B32_e64 || 5817e8d8bef9SDimitry Andric Opc == AMDGPU::V_PERMLANEX16_B32_e64) { 58180b57cec5SDimitry Andric // src1 and src2 must be scalar 58190b57cec5SDimitry Andric MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]); 58200b57cec5SDimitry Andric MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); 58210b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 58220b57cec5SDimitry Andric if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { 58238bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 58240b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 58250b57cec5SDimitry Andric .add(Src1); 58260b57cec5SDimitry Andric Src1.ChangeToRegister(Reg, false); 58270b57cec5SDimitry Andric } 58280b57cec5SDimitry Andric if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { 58298bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 58300b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 58310b57cec5SDimitry Andric .add(Src2); 58320b57cec5SDimitry Andric Src2.ChangeToRegister(Reg, false); 58330b57cec5SDimitry Andric } 58340b57cec5SDimitry Andric } 58350b57cec5SDimitry Andric 58360b57cec5SDimitry Andric // Find the one SGPR operand we are allowed to use. 58370b57cec5SDimitry Andric int ConstantBusLimit = ST.getConstantBusLimit(Opc); 58380b57cec5SDimitry Andric int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0; 58390b57cec5SDimitry Andric SmallDenseSet<unsigned> SGPRsUsed; 5840e8d8bef9SDimitry Andric Register SGPRReg = findUsedSGPR(MI, VOP3Idx); 5841bdd1243dSDimitry Andric if (SGPRReg) { 58420b57cec5SDimitry Andric SGPRsUsed.insert(SGPRReg); 58430b57cec5SDimitry Andric --ConstantBusLimit; 58440b57cec5SDimitry Andric } 58450b57cec5SDimitry Andric 58460eae32dcSDimitry Andric for (int Idx : VOP3Idx) { 58470b57cec5SDimitry Andric if (Idx == -1) 58480b57cec5SDimitry Andric break; 58490b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(Idx); 58500b57cec5SDimitry Andric 58510b57cec5SDimitry Andric if (!MO.isReg()) { 5852bdd1243dSDimitry Andric if (isInlineConstant(MO, get(Opc).operands()[Idx])) 58530b57cec5SDimitry Andric continue; 58540b57cec5SDimitry Andric 58550b57cec5SDimitry Andric if (LiteralLimit > 0 && ConstantBusLimit > 0) { 58560b57cec5SDimitry Andric --LiteralLimit; 58570b57cec5SDimitry Andric --ConstantBusLimit; 58580b57cec5SDimitry Andric continue; 58590b57cec5SDimitry Andric } 58600b57cec5SDimitry Andric 58610b57cec5SDimitry Andric --LiteralLimit; 58620b57cec5SDimitry Andric --ConstantBusLimit; 58630b57cec5SDimitry Andric legalizeOpWithMove(MI, Idx); 58640b57cec5SDimitry Andric continue; 58650b57cec5SDimitry Andric } 58660b57cec5SDimitry Andric 5867349cc55cSDimitry Andric if (RI.hasAGPRs(RI.getRegClassForReg(MRI, MO.getReg())) && 58680b57cec5SDimitry Andric !isOperandLegal(MI, Idx, &MO)) { 58690b57cec5SDimitry Andric legalizeOpWithMove(MI, Idx); 58700b57cec5SDimitry Andric continue; 58710b57cec5SDimitry Andric } 58720b57cec5SDimitry Andric 5873349cc55cSDimitry Andric if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.getReg()))) 58740b57cec5SDimitry Andric continue; // VGPRs are legal 58750b57cec5SDimitry Andric 58760b57cec5SDimitry Andric // We can use one SGPR in each VOP3 instruction prior to GFX10 58770b57cec5SDimitry Andric // and two starting from GFX10. 58780b57cec5SDimitry Andric if (SGPRsUsed.count(MO.getReg())) 58790b57cec5SDimitry Andric continue; 58800b57cec5SDimitry Andric if (ConstantBusLimit > 0) { 58810b57cec5SDimitry Andric SGPRsUsed.insert(MO.getReg()); 58820b57cec5SDimitry Andric --ConstantBusLimit; 58830b57cec5SDimitry Andric continue; 58840b57cec5SDimitry Andric } 58850b57cec5SDimitry Andric 58860b57cec5SDimitry Andric // If we make it this far, then the operand is not legal and we must 58870b57cec5SDimitry Andric // legalize it. 58880b57cec5SDimitry Andric legalizeOpWithMove(MI, Idx); 58890b57cec5SDimitry Andric } 5890*5f757f3fSDimitry Andric 5891*5f757f3fSDimitry Andric // Special case: V_FMAC_F32 and V_FMAC_F16 have src2 tied to vdst. 5892*5f757f3fSDimitry Andric if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) && 5893*5f757f3fSDimitry Andric !RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg())) 5894*5f757f3fSDimitry Andric legalizeOpWithMove(MI, VOP3Idx[2]); 58950b57cec5SDimitry Andric } 58960b57cec5SDimitry Andric 58975ffd83dbSDimitry Andric Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, 58980b57cec5SDimitry Andric MachineRegisterInfo &MRI) const { 58990b57cec5SDimitry Andric const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); 59000b57cec5SDimitry Andric const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); 59018bcb0991SDimitry Andric Register DstReg = MRI.createVirtualRegister(SRC); 59020b57cec5SDimitry Andric unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; 59030b57cec5SDimitry Andric 59040b57cec5SDimitry Andric if (RI.hasAGPRs(VRC)) { 59050b57cec5SDimitry Andric VRC = RI.getEquivalentVGPRClass(VRC); 59068bcb0991SDimitry Andric Register NewSrcReg = MRI.createVirtualRegister(VRC); 59070b57cec5SDimitry Andric BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 59080b57cec5SDimitry Andric get(TargetOpcode::COPY), NewSrcReg) 59090b57cec5SDimitry Andric .addReg(SrcReg); 59100b57cec5SDimitry Andric SrcReg = NewSrcReg; 59110b57cec5SDimitry Andric } 59120b57cec5SDimitry Andric 59130b57cec5SDimitry Andric if (SubRegs == 1) { 59140b57cec5SDimitry Andric BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 59150b57cec5SDimitry Andric get(AMDGPU::V_READFIRSTLANE_B32), DstReg) 59160b57cec5SDimitry Andric .addReg(SrcReg); 59170b57cec5SDimitry Andric return DstReg; 59180b57cec5SDimitry Andric } 59190b57cec5SDimitry Andric 5920bdd1243dSDimitry Andric SmallVector<Register, 8> SRegs; 59210b57cec5SDimitry Andric for (unsigned i = 0; i < SubRegs; ++i) { 59228bcb0991SDimitry Andric Register SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 59230b57cec5SDimitry Andric BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 59240b57cec5SDimitry Andric get(AMDGPU::V_READFIRSTLANE_B32), SGPR) 59250b57cec5SDimitry Andric .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); 59260b57cec5SDimitry Andric SRegs.push_back(SGPR); 59270b57cec5SDimitry Andric } 59280b57cec5SDimitry Andric 59290b57cec5SDimitry Andric MachineInstrBuilder MIB = 59300b57cec5SDimitry Andric BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 59310b57cec5SDimitry Andric get(AMDGPU::REG_SEQUENCE), DstReg); 59320b57cec5SDimitry Andric for (unsigned i = 0; i < SubRegs; ++i) { 59330b57cec5SDimitry Andric MIB.addReg(SRegs[i]); 59340b57cec5SDimitry Andric MIB.addImm(RI.getSubRegFromChannel(i)); 59350b57cec5SDimitry Andric } 59360b57cec5SDimitry Andric return DstReg; 59370b57cec5SDimitry Andric } 59380b57cec5SDimitry Andric 59390b57cec5SDimitry Andric void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, 59400b57cec5SDimitry Andric MachineInstr &MI) const { 59410b57cec5SDimitry Andric 59420b57cec5SDimitry Andric // If the pointer is store in VGPRs, then we need to move them to 59430b57cec5SDimitry Andric // SGPRs using v_readfirstlane. This is safe because we only select 59440b57cec5SDimitry Andric // loads with uniform pointers to SMRD instruction so we know the 59450b57cec5SDimitry Andric // pointer value is uniform. 59460b57cec5SDimitry Andric MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase); 59470b57cec5SDimitry Andric if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { 5948e8d8bef9SDimitry Andric Register SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); 59490b57cec5SDimitry Andric SBase->setReg(SGPR); 59500b57cec5SDimitry Andric } 595181ad6265SDimitry Andric MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soffset); 59520b57cec5SDimitry Andric if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) { 5953e8d8bef9SDimitry Andric Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI); 59540b57cec5SDimitry Andric SOff->setReg(SGPR); 59550b57cec5SDimitry Andric } 59560b57cec5SDimitry Andric } 59570b57cec5SDimitry Andric 5958fe6060f1SDimitry Andric bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const { 5959fe6060f1SDimitry Andric unsigned Opc = Inst.getOpcode(); 5960fe6060f1SDimitry Andric int OldSAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr); 5961fe6060f1SDimitry Andric if (OldSAddrIdx < 0) 5962fe6060f1SDimitry Andric return false; 5963fe6060f1SDimitry Andric 5964fe6060f1SDimitry Andric assert(isSegmentSpecificFLAT(Inst)); 5965fe6060f1SDimitry Andric 5966fe6060f1SDimitry Andric int NewOpc = AMDGPU::getGlobalVaddrOp(Opc); 5967fe6060f1SDimitry Andric if (NewOpc < 0) 5968fe6060f1SDimitry Andric NewOpc = AMDGPU::getFlatScratchInstSVfromSS(Opc); 5969fe6060f1SDimitry Andric if (NewOpc < 0) 5970fe6060f1SDimitry Andric return false; 5971fe6060f1SDimitry Andric 5972fe6060f1SDimitry Andric MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); 5973fe6060f1SDimitry Andric MachineOperand &SAddr = Inst.getOperand(OldSAddrIdx); 5974fe6060f1SDimitry Andric if (RI.isSGPRReg(MRI, SAddr.getReg())) 5975fe6060f1SDimitry Andric return false; 5976fe6060f1SDimitry Andric 5977fe6060f1SDimitry Andric int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr); 5978fe6060f1SDimitry Andric if (NewVAddrIdx < 0) 5979fe6060f1SDimitry Andric return false; 5980fe6060f1SDimitry Andric 5981fe6060f1SDimitry Andric int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); 5982fe6060f1SDimitry Andric 5983fe6060f1SDimitry Andric // Check vaddr, it shall be zero or absent. 5984fe6060f1SDimitry Andric MachineInstr *VAddrDef = nullptr; 5985fe6060f1SDimitry Andric if (OldVAddrIdx >= 0) { 5986fe6060f1SDimitry Andric MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx); 5987fe6060f1SDimitry Andric VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg()); 5988fe6060f1SDimitry Andric if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 || 5989fe6060f1SDimitry Andric !VAddrDef->getOperand(1).isImm() || 5990fe6060f1SDimitry Andric VAddrDef->getOperand(1).getImm() != 0) 5991fe6060f1SDimitry Andric return false; 5992fe6060f1SDimitry Andric } 5993fe6060f1SDimitry Andric 5994fe6060f1SDimitry Andric const MCInstrDesc &NewDesc = get(NewOpc); 5995fe6060f1SDimitry Andric Inst.setDesc(NewDesc); 5996fe6060f1SDimitry Andric 599781ad6265SDimitry Andric // Callers expect iterator to be valid after this call, so modify the 5998fe6060f1SDimitry Andric // instruction in place. 5999fe6060f1SDimitry Andric if (OldVAddrIdx == NewVAddrIdx) { 6000fe6060f1SDimitry Andric MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx); 6001fe6060f1SDimitry Andric // Clear use list from the old vaddr holding a zero register. 6002fe6060f1SDimitry Andric MRI.removeRegOperandFromUseList(&NewVAddr); 6003fe6060f1SDimitry Andric MRI.moveOperands(&NewVAddr, &SAddr, 1); 600481ad6265SDimitry Andric Inst.removeOperand(OldSAddrIdx); 6005fe6060f1SDimitry Andric // Update the use list with the pointer we have just moved from vaddr to 600681ad6265SDimitry Andric // saddr position. Otherwise new vaddr will be missing from the use list. 6007fe6060f1SDimitry Andric MRI.removeRegOperandFromUseList(&NewVAddr); 6008fe6060f1SDimitry Andric MRI.addRegOperandToUseList(&NewVAddr); 6009fe6060f1SDimitry Andric } else { 6010fe6060f1SDimitry Andric assert(OldSAddrIdx == NewVAddrIdx); 6011fe6060f1SDimitry Andric 6012fe6060f1SDimitry Andric if (OldVAddrIdx >= 0) { 6013fe6060f1SDimitry Andric int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc, 6014fe6060f1SDimitry Andric AMDGPU::OpName::vdst_in); 6015fe6060f1SDimitry Andric 601681ad6265SDimitry Andric // removeOperand doesn't try to fixup tied operand indexes at it goes, so 6017fe6060f1SDimitry Andric // it asserts. Untie the operands for now and retie them afterwards. 6018fe6060f1SDimitry Andric if (NewVDstIn != -1) { 6019fe6060f1SDimitry Andric int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); 6020fe6060f1SDimitry Andric Inst.untieRegOperand(OldVDstIn); 6021fe6060f1SDimitry Andric } 6022fe6060f1SDimitry Andric 602381ad6265SDimitry Andric Inst.removeOperand(OldVAddrIdx); 6024fe6060f1SDimitry Andric 6025fe6060f1SDimitry Andric if (NewVDstIn != -1) { 6026fe6060f1SDimitry Andric int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst); 6027fe6060f1SDimitry Andric Inst.tieOperands(NewVDst, NewVDstIn); 6028fe6060f1SDimitry Andric } 6029fe6060f1SDimitry Andric } 6030fe6060f1SDimitry Andric } 6031fe6060f1SDimitry Andric 6032fe6060f1SDimitry Andric if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg())) 6033fe6060f1SDimitry Andric VAddrDef->eraseFromParent(); 6034fe6060f1SDimitry Andric 6035fe6060f1SDimitry Andric return true; 6036fe6060f1SDimitry Andric } 6037fe6060f1SDimitry Andric 6038e8d8bef9SDimitry Andric // FIXME: Remove this when SelectionDAG is obsoleted. 6039e8d8bef9SDimitry Andric void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI, 6040e8d8bef9SDimitry Andric MachineInstr &MI) const { 6041e8d8bef9SDimitry Andric if (!isSegmentSpecificFLAT(MI)) 6042e8d8bef9SDimitry Andric return; 6043e8d8bef9SDimitry Andric 6044e8d8bef9SDimitry Andric // Fixup SGPR operands in VGPRs. We only select these when the DAG divergence 6045e8d8bef9SDimitry Andric // thinks they are uniform, so a readfirstlane should be valid. 6046e8d8bef9SDimitry Andric MachineOperand *SAddr = getNamedOperand(MI, AMDGPU::OpName::saddr); 6047e8d8bef9SDimitry Andric if (!SAddr || RI.isSGPRClass(MRI.getRegClass(SAddr->getReg()))) 6048e8d8bef9SDimitry Andric return; 6049e8d8bef9SDimitry Andric 6050fe6060f1SDimitry Andric if (moveFlatAddrToVGPR(MI)) 6051fe6060f1SDimitry Andric return; 6052fe6060f1SDimitry Andric 6053e8d8bef9SDimitry Andric Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI); 6054e8d8bef9SDimitry Andric SAddr->setReg(ToSGPR); 6055e8d8bef9SDimitry Andric } 6056e8d8bef9SDimitry Andric 60570b57cec5SDimitry Andric void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, 60580b57cec5SDimitry Andric MachineBasicBlock::iterator I, 60590b57cec5SDimitry Andric const TargetRegisterClass *DstRC, 60600b57cec5SDimitry Andric MachineOperand &Op, 60610b57cec5SDimitry Andric MachineRegisterInfo &MRI, 60620b57cec5SDimitry Andric const DebugLoc &DL) const { 60638bcb0991SDimitry Andric Register OpReg = Op.getReg(); 60640b57cec5SDimitry Andric unsigned OpSubReg = Op.getSubReg(); 60650b57cec5SDimitry Andric 60660b57cec5SDimitry Andric const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg( 60670b57cec5SDimitry Andric RI.getRegClassForReg(MRI, OpReg), OpSubReg); 60680b57cec5SDimitry Andric 60690b57cec5SDimitry Andric // Check if operand is already the correct register class. 60700b57cec5SDimitry Andric if (DstRC == OpRC) 60710b57cec5SDimitry Andric return; 60720b57cec5SDimitry Andric 60738bcb0991SDimitry Andric Register DstReg = MRI.createVirtualRegister(DstRC); 6074349cc55cSDimitry Andric auto Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); 60750b57cec5SDimitry Andric 60760b57cec5SDimitry Andric Op.setReg(DstReg); 60770b57cec5SDimitry Andric Op.setSubReg(0); 60780b57cec5SDimitry Andric 60790b57cec5SDimitry Andric MachineInstr *Def = MRI.getVRegDef(OpReg); 60800b57cec5SDimitry Andric if (!Def) 60810b57cec5SDimitry Andric return; 60820b57cec5SDimitry Andric 60830b57cec5SDimitry Andric // Try to eliminate the copy if it is copying an immediate value. 60848bcb0991SDimitry Andric if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) 60850b57cec5SDimitry Andric FoldImmediate(*Copy, *Def, OpReg, &MRI); 60868bcb0991SDimitry Andric 60878bcb0991SDimitry Andric bool ImpDef = Def->isImplicitDef(); 60888bcb0991SDimitry Andric while (!ImpDef && Def && Def->isCopy()) { 60898bcb0991SDimitry Andric if (Def->getOperand(1).getReg().isPhysical()) 60908bcb0991SDimitry Andric break; 60918bcb0991SDimitry Andric Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); 60928bcb0991SDimitry Andric ImpDef = Def && Def->isImplicitDef(); 60938bcb0991SDimitry Andric } 60948bcb0991SDimitry Andric if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && 60958bcb0991SDimitry Andric !ImpDef) 6096349cc55cSDimitry Andric Copy.addReg(AMDGPU::EXEC, RegState::Implicit); 60970b57cec5SDimitry Andric } 60980b57cec5SDimitry Andric 60990b57cec5SDimitry Andric // Emit the actual waterfall loop, executing the wrapped instruction for each 610006c3fb27SDimitry Andric // unique value of \p ScalarOps across all lanes. In the best case we execute 1 61010b57cec5SDimitry Andric // iteration, in the worst case we execute 64 (once per lane). 610206c3fb27SDimitry Andric static void emitLoadScalarOpsFromVGPRLoop( 610306c3fb27SDimitry Andric const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, 610406c3fb27SDimitry Andric MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, 610506c3fb27SDimitry Andric ArrayRef<MachineOperand *> ScalarOps) { 61060b57cec5SDimitry Andric MachineFunction &MF = *OrigBB.getParent(); 61070b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 61080b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 61090b57cec5SDimitry Andric unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 61100b57cec5SDimitry Andric unsigned SaveExecOpc = 61110b57cec5SDimitry Andric ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; 61120b57cec5SDimitry Andric unsigned XorTermOpc = 61130b57cec5SDimitry Andric ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term; 61140b57cec5SDimitry Andric unsigned AndOpc = 61150b57cec5SDimitry Andric ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; 61160b57cec5SDimitry Andric const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 61170b57cec5SDimitry Andric 61180b57cec5SDimitry Andric MachineBasicBlock::iterator I = LoopBB.begin(); 61190b57cec5SDimitry Andric 6120e8d8bef9SDimitry Andric SmallVector<Register, 8> ReadlanePieces; 6121bdd1243dSDimitry Andric Register CondReg; 6122e8d8bef9SDimitry Andric 612306c3fb27SDimitry Andric for (MachineOperand *ScalarOp : ScalarOps) { 612406c3fb27SDimitry Andric unsigned RegSize = TRI->getRegSizeInBits(ScalarOp->getReg(), MRI); 6125e8d8bef9SDimitry Andric unsigned NumSubRegs = RegSize / 32; 612606c3fb27SDimitry Andric Register VScalarOp = ScalarOp->getReg(); 612706c3fb27SDimitry Andric 612806c3fb27SDimitry Andric if (NumSubRegs == 1) { 612906c3fb27SDimitry Andric Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 613006c3fb27SDimitry Andric 613106c3fb27SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg) 613206c3fb27SDimitry Andric .addReg(VScalarOp); 613306c3fb27SDimitry Andric 613406c3fb27SDimitry Andric Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); 613506c3fb27SDimitry Andric 613606c3fb27SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg) 613706c3fb27SDimitry Andric .addReg(CurReg) 613806c3fb27SDimitry Andric .addReg(VScalarOp); 613906c3fb27SDimitry Andric 614006c3fb27SDimitry Andric // Combine the comparison results with AND. 614106c3fb27SDimitry Andric if (!CondReg) // First. 614206c3fb27SDimitry Andric CondReg = NewCondReg; 614306c3fb27SDimitry Andric else { // If not the first, we create an AND. 614406c3fb27SDimitry Andric Register AndReg = MRI.createVirtualRegister(BoolXExecRC); 614506c3fb27SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg) 614606c3fb27SDimitry Andric .addReg(CondReg) 614706c3fb27SDimitry Andric .addReg(NewCondReg); 614806c3fb27SDimitry Andric CondReg = AndReg; 614906c3fb27SDimitry Andric } 615006c3fb27SDimitry Andric 615106c3fb27SDimitry Andric // Update ScalarOp operand to use the SGPR ScalarOp. 615206c3fb27SDimitry Andric ScalarOp->setReg(CurReg); 615306c3fb27SDimitry Andric ScalarOp->setIsKill(); 615406c3fb27SDimitry Andric } else { 615506c3fb27SDimitry Andric unsigned VScalarOpUndef = getUndefRegState(ScalarOp->isUndef()); 615606c3fb27SDimitry Andric assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 && 615706c3fb27SDimitry Andric "Unhandled register size"); 61580b57cec5SDimitry Andric 6159e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) { 6160e8d8bef9SDimitry Andric Register CurRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 6161e8d8bef9SDimitry Andric Register CurRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 6162e8d8bef9SDimitry Andric 6163e8d8bef9SDimitry Andric // Read the next variant <- also loop target. 6164e8d8bef9SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo) 616506c3fb27SDimitry Andric .addReg(VScalarOp, VScalarOpUndef, TRI->getSubRegFromChannel(Idx)); 6166e8d8bef9SDimitry Andric 6167e8d8bef9SDimitry Andric // Read the next variant <- also loop target. 6168e8d8bef9SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi) 616906c3fb27SDimitry Andric .addReg(VScalarOp, VScalarOpUndef, 617006c3fb27SDimitry Andric TRI->getSubRegFromChannel(Idx + 1)); 6171e8d8bef9SDimitry Andric 6172e8d8bef9SDimitry Andric ReadlanePieces.push_back(CurRegLo); 6173e8d8bef9SDimitry Andric ReadlanePieces.push_back(CurRegHi); 6174e8d8bef9SDimitry Andric 6175e8d8bef9SDimitry Andric // Comparison is to be done as 64-bit. 6176e8d8bef9SDimitry Andric Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); 6177e8d8bef9SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), CurReg) 6178e8d8bef9SDimitry Andric .addReg(CurRegLo) 61790b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 6180e8d8bef9SDimitry Andric .addReg(CurRegHi) 6181e8d8bef9SDimitry Andric .addImm(AMDGPU::sub1); 6182e8d8bef9SDimitry Andric 6183e8d8bef9SDimitry Andric Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); 618406c3fb27SDimitry Andric auto Cmp = BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), 618506c3fb27SDimitry Andric NewCondReg) 6186e8d8bef9SDimitry Andric .addReg(CurReg); 6187e8d8bef9SDimitry Andric if (NumSubRegs <= 2) 618806c3fb27SDimitry Andric Cmp.addReg(VScalarOp); 6189e8d8bef9SDimitry Andric else 619006c3fb27SDimitry Andric Cmp.addReg(VScalarOp, VScalarOpUndef, 619106c3fb27SDimitry Andric TRI->getSubRegFromChannel(Idx, 2)); 6192e8d8bef9SDimitry Andric 619381ad6265SDimitry Andric // Combine the comparison results with AND. 6194bdd1243dSDimitry Andric if (!CondReg) // First. 6195e8d8bef9SDimitry Andric CondReg = NewCondReg; 6196e8d8bef9SDimitry Andric else { // If not the first, we create an AND. 6197e8d8bef9SDimitry Andric Register AndReg = MRI.createVirtualRegister(BoolXExecRC); 6198e8d8bef9SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg) 6199e8d8bef9SDimitry Andric .addReg(CondReg) 6200e8d8bef9SDimitry Andric .addReg(NewCondReg); 6201e8d8bef9SDimitry Andric CondReg = AndReg; 6202e8d8bef9SDimitry Andric } 6203e8d8bef9SDimitry Andric } // End for loop. 6204e8d8bef9SDimitry Andric 620506c3fb27SDimitry Andric auto SScalarOpRC = 620606c3fb27SDimitry Andric TRI->getEquivalentSGPRClass(MRI.getRegClass(VScalarOp)); 620706c3fb27SDimitry Andric Register SScalarOp = MRI.createVirtualRegister(SScalarOpRC); 6208e8d8bef9SDimitry Andric 620906c3fb27SDimitry Andric // Build scalar ScalarOp. 621006c3fb27SDimitry Andric auto Merge = 621106c3fb27SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SScalarOp); 6212e8d8bef9SDimitry Andric unsigned Channel = 0; 6213e8d8bef9SDimitry Andric for (Register Piece : ReadlanePieces) { 621406c3fb27SDimitry Andric Merge.addReg(Piece).addImm(TRI->getSubRegFromChannel(Channel++)); 6215e8d8bef9SDimitry Andric } 62160b57cec5SDimitry Andric 621706c3fb27SDimitry Andric // Update ScalarOp operand to use the SGPR ScalarOp. 621806c3fb27SDimitry Andric ScalarOp->setReg(SScalarOp); 621906c3fb27SDimitry Andric ScalarOp->setIsKill(); 622006c3fb27SDimitry Andric } 622106c3fb27SDimitry Andric } 62220b57cec5SDimitry Andric 6223e8d8bef9SDimitry Andric Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); 6224e8d8bef9SDimitry Andric MRI.setSimpleHint(SaveExec, CondReg); 62250b57cec5SDimitry Andric 62260b57cec5SDimitry Andric // Update EXEC to matching lanes, saving original to SaveExec. 62270b57cec5SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(SaveExecOpc), SaveExec) 6228e8d8bef9SDimitry Andric .addReg(CondReg, RegState::Kill); 62290b57cec5SDimitry Andric 62300b57cec5SDimitry Andric // The original instruction is here; we insert the terminators after it. 623181ad6265SDimitry Andric I = BodyBB.end(); 62320b57cec5SDimitry Andric 62330b57cec5SDimitry Andric // Update EXEC, switch all done bits to 0 and all todo bits to 1. 623481ad6265SDimitry Andric BuildMI(BodyBB, I, DL, TII.get(XorTermOpc), Exec) 62350b57cec5SDimitry Andric .addReg(Exec) 62360b57cec5SDimitry Andric .addReg(SaveExec); 6237e8d8bef9SDimitry Andric 623881ad6265SDimitry Andric BuildMI(BodyBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB); 62390b57cec5SDimitry Andric } 62400b57cec5SDimitry Andric 624106c3fb27SDimitry Andric // Build a waterfall loop around \p MI, replacing the VGPR \p ScalarOp register 62420b57cec5SDimitry Andric // with SGPRs by iterating over all unique values across all lanes. 6243e8d8bef9SDimitry Andric // Returns the loop basic block that now contains \p MI. 6244e8d8bef9SDimitry Andric static MachineBasicBlock * 624506c3fb27SDimitry Andric loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, 624606c3fb27SDimitry Andric ArrayRef<MachineOperand *> ScalarOps, 624706c3fb27SDimitry Andric MachineDominatorTree *MDT, 6248e8d8bef9SDimitry Andric MachineBasicBlock::iterator Begin = nullptr, 6249e8d8bef9SDimitry Andric MachineBasicBlock::iterator End = nullptr) { 62500b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 62510b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 62520b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 62530b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 62540b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 6255e8d8bef9SDimitry Andric if (!Begin.isValid()) 6256e8d8bef9SDimitry Andric Begin = &MI; 6257e8d8bef9SDimitry Andric if (!End.isValid()) { 6258e8d8bef9SDimitry Andric End = &MI; 6259e8d8bef9SDimitry Andric ++End; 6260e8d8bef9SDimitry Andric } 62610b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 62620b57cec5SDimitry Andric unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 62630b57cec5SDimitry Andric unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 62640b57cec5SDimitry Andric const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 62650b57cec5SDimitry Andric 6266*5f757f3fSDimitry Andric // Save SCC. Waterfall Loop may overwrite SCC. 6267*5f757f3fSDimitry Andric Register SaveSCCReg; 6268*5f757f3fSDimitry Andric bool SCCNotDead = (MBB.computeRegisterLiveness(TRI, AMDGPU::SCC, MI, 30) != 6269*5f757f3fSDimitry Andric MachineBasicBlock::LQR_Dead); 6270*5f757f3fSDimitry Andric if (SCCNotDead) { 6271*5f757f3fSDimitry Andric SaveSCCReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 6272*5f757f3fSDimitry Andric BuildMI(MBB, Begin, DL, TII.get(AMDGPU::S_CSELECT_B32), SaveSCCReg) 6273*5f757f3fSDimitry Andric .addImm(1) 6274*5f757f3fSDimitry Andric .addImm(0); 6275*5f757f3fSDimitry Andric } 6276*5f757f3fSDimitry Andric 62778bcb0991SDimitry Andric Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); 62780b57cec5SDimitry Andric 62790b57cec5SDimitry Andric // Save the EXEC mask 6280e8d8bef9SDimitry Andric BuildMI(MBB, Begin, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec); 62810b57cec5SDimitry Andric 62820b57cec5SDimitry Andric // Killed uses in the instruction we are waterfalling around will be 62830b57cec5SDimitry Andric // incorrect due to the added control-flow. 6284e8d8bef9SDimitry Andric MachineBasicBlock::iterator AfterMI = MI; 6285e8d8bef9SDimitry Andric ++AfterMI; 6286e8d8bef9SDimitry Andric for (auto I = Begin; I != AfterMI; I++) { 628706c3fb27SDimitry Andric for (auto &MO : I->all_uses()) 62880b57cec5SDimitry Andric MRI.clearKillFlags(MO.getReg()); 62890b57cec5SDimitry Andric } 62900b57cec5SDimitry Andric 62910b57cec5SDimitry Andric // To insert the loop we need to split the block. Move everything after this 62920b57cec5SDimitry Andric // point to a new block, and insert a new empty block between the two. 62930b57cec5SDimitry Andric MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock(); 629481ad6265SDimitry Andric MachineBasicBlock *BodyBB = MF.CreateMachineBasicBlock(); 62950b57cec5SDimitry Andric MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock(); 62960b57cec5SDimitry Andric MachineFunction::iterator MBBI(MBB); 62970b57cec5SDimitry Andric ++MBBI; 62980b57cec5SDimitry Andric 62990b57cec5SDimitry Andric MF.insert(MBBI, LoopBB); 630081ad6265SDimitry Andric MF.insert(MBBI, BodyBB); 63010b57cec5SDimitry Andric MF.insert(MBBI, RemainderBB); 63020b57cec5SDimitry Andric 630381ad6265SDimitry Andric LoopBB->addSuccessor(BodyBB); 630481ad6265SDimitry Andric BodyBB->addSuccessor(LoopBB); 630581ad6265SDimitry Andric BodyBB->addSuccessor(RemainderBB); 63060b57cec5SDimitry Andric 630781ad6265SDimitry Andric // Move Begin to MI to the BodyBB, and the remainder of the block to 6308e8d8bef9SDimitry Andric // RemainderBB. 63090b57cec5SDimitry Andric RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); 6310e8d8bef9SDimitry Andric RemainderBB->splice(RemainderBB->begin(), &MBB, End, MBB.end()); 631181ad6265SDimitry Andric BodyBB->splice(BodyBB->begin(), &MBB, Begin, MBB.end()); 63120b57cec5SDimitry Andric 63130b57cec5SDimitry Andric MBB.addSuccessor(LoopBB); 63140b57cec5SDimitry Andric 63150b57cec5SDimitry Andric // Update dominators. We know that MBB immediately dominates LoopBB, that 631681ad6265SDimitry Andric // LoopBB immediately dominates BodyBB, and BodyBB immediately dominates 631781ad6265SDimitry Andric // RemainderBB. RemainderBB immediately dominates all of the successors 631881ad6265SDimitry Andric // transferred to it from MBB that MBB used to properly dominate. 63190b57cec5SDimitry Andric if (MDT) { 63200b57cec5SDimitry Andric MDT->addNewBlock(LoopBB, &MBB); 632181ad6265SDimitry Andric MDT->addNewBlock(BodyBB, LoopBB); 632281ad6265SDimitry Andric MDT->addNewBlock(RemainderBB, BodyBB); 63230b57cec5SDimitry Andric for (auto &Succ : RemainderBB->successors()) { 6324480093f4SDimitry Andric if (MDT->properlyDominates(&MBB, Succ)) { 63250b57cec5SDimitry Andric MDT->changeImmediateDominator(Succ, RemainderBB); 63260b57cec5SDimitry Andric } 63270b57cec5SDimitry Andric } 63280b57cec5SDimitry Andric } 63290b57cec5SDimitry Andric 633006c3fb27SDimitry Andric emitLoadScalarOpsFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, ScalarOps); 63310b57cec5SDimitry Andric 63320b57cec5SDimitry Andric MachineBasicBlock::iterator First = RemainderBB->begin(); 6333*5f757f3fSDimitry Andric // Restore SCC 6334*5f757f3fSDimitry Andric if (SCCNotDead) { 6335*5f757f3fSDimitry Andric BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_CMP_LG_U32)) 6336*5f757f3fSDimitry Andric .addReg(SaveSCCReg, RegState::Kill) 6337*5f757f3fSDimitry Andric .addImm(0); 6338*5f757f3fSDimitry Andric } 6339*5f757f3fSDimitry Andric 6340*5f757f3fSDimitry Andric // Restore the EXEC mask 63410b57cec5SDimitry Andric BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec); 634281ad6265SDimitry Andric return BodyBB; 63430b57cec5SDimitry Andric } 63440b57cec5SDimitry Andric 63450b57cec5SDimitry Andric // Extract pointer from Rsrc and return a zero-value Rsrc replacement. 63460b57cec5SDimitry Andric static std::tuple<unsigned, unsigned> 63470b57cec5SDimitry Andric extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) { 63480b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 63490b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 63500b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 63510b57cec5SDimitry Andric 63520b57cec5SDimitry Andric // Extract the ptr from the resource descriptor. 63530b57cec5SDimitry Andric unsigned RsrcPtr = 63540b57cec5SDimitry Andric TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass, 63550b57cec5SDimitry Andric AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); 63560b57cec5SDimitry Andric 63570b57cec5SDimitry Andric // Create an empty resource descriptor 63588bcb0991SDimitry Andric Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 63598bcb0991SDimitry Andric Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 63608bcb0991SDimitry Andric Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 63618bcb0991SDimitry Andric Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); 63620b57cec5SDimitry Andric uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat(); 63630b57cec5SDimitry Andric 63640b57cec5SDimitry Andric // Zero64 = 0 63650b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64) 63660b57cec5SDimitry Andric .addImm(0); 63670b57cec5SDimitry Andric 63680b57cec5SDimitry Andric // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 63690b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo) 63700b57cec5SDimitry Andric .addImm(RsrcDataFormat & 0xFFFFFFFF); 63710b57cec5SDimitry Andric 63720b57cec5SDimitry Andric // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 63730b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi) 63740b57cec5SDimitry Andric .addImm(RsrcDataFormat >> 32); 63750b57cec5SDimitry Andric 63760b57cec5SDimitry Andric // NewSRsrc = {Zero64, SRsrcFormat} 63770b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc) 63780b57cec5SDimitry Andric .addReg(Zero64) 63790b57cec5SDimitry Andric .addImm(AMDGPU::sub0_sub1) 63800b57cec5SDimitry Andric .addReg(SRsrcFormatLo) 63810b57cec5SDimitry Andric .addImm(AMDGPU::sub2) 63820b57cec5SDimitry Andric .addReg(SRsrcFormatHi) 63830b57cec5SDimitry Andric .addImm(AMDGPU::sub3); 63840b57cec5SDimitry Andric 6385bdd1243dSDimitry Andric return std::tuple(RsrcPtr, NewSRsrc); 63860b57cec5SDimitry Andric } 63870b57cec5SDimitry Andric 6388e8d8bef9SDimitry Andric MachineBasicBlock * 6389e8d8bef9SDimitry Andric SIInstrInfo::legalizeOperands(MachineInstr &MI, 63900b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 63910b57cec5SDimitry Andric MachineFunction &MF = *MI.getParent()->getParent(); 63920b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 6393e8d8bef9SDimitry Andric MachineBasicBlock *CreatedBB = nullptr; 63940b57cec5SDimitry Andric 63950b57cec5SDimitry Andric // Legalize VOP2 63960b57cec5SDimitry Andric if (isVOP2(MI) || isVOPC(MI)) { 63970b57cec5SDimitry Andric legalizeOperandsVOP2(MRI, MI); 6398e8d8bef9SDimitry Andric return CreatedBB; 63990b57cec5SDimitry Andric } 64000b57cec5SDimitry Andric 64010b57cec5SDimitry Andric // Legalize VOP3 64020b57cec5SDimitry Andric if (isVOP3(MI)) { 64030b57cec5SDimitry Andric legalizeOperandsVOP3(MRI, MI); 6404e8d8bef9SDimitry Andric return CreatedBB; 64050b57cec5SDimitry Andric } 64060b57cec5SDimitry Andric 64070b57cec5SDimitry Andric // Legalize SMRD 64080b57cec5SDimitry Andric if (isSMRD(MI)) { 64090b57cec5SDimitry Andric legalizeOperandsSMRD(MRI, MI); 6410e8d8bef9SDimitry Andric return CreatedBB; 6411e8d8bef9SDimitry Andric } 6412e8d8bef9SDimitry Andric 6413e8d8bef9SDimitry Andric // Legalize FLAT 6414e8d8bef9SDimitry Andric if (isFLAT(MI)) { 6415e8d8bef9SDimitry Andric legalizeOperandsFLAT(MRI, MI); 6416e8d8bef9SDimitry Andric return CreatedBB; 64170b57cec5SDimitry Andric } 64180b57cec5SDimitry Andric 64190b57cec5SDimitry Andric // Legalize REG_SEQUENCE and PHI 64200b57cec5SDimitry Andric // The register class of the operands much be the same type as the register 64210b57cec5SDimitry Andric // class of the output. 64220b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::PHI) { 64230b57cec5SDimitry Andric const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 64240b57cec5SDimitry Andric for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 6425e8d8bef9SDimitry Andric if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual()) 64260b57cec5SDimitry Andric continue; 64270b57cec5SDimitry Andric const TargetRegisterClass *OpRC = 64280b57cec5SDimitry Andric MRI.getRegClass(MI.getOperand(i).getReg()); 64290b57cec5SDimitry Andric if (RI.hasVectorRegisters(OpRC)) { 64300b57cec5SDimitry Andric VRC = OpRC; 64310b57cec5SDimitry Andric } else { 64320b57cec5SDimitry Andric SRC = OpRC; 64330b57cec5SDimitry Andric } 64340b57cec5SDimitry Andric } 64350b57cec5SDimitry Andric 64360b57cec5SDimitry Andric // If any of the operands are VGPR registers, then they all most be 64370b57cec5SDimitry Andric // otherwise we will create illegal VGPR->SGPR copies when legalizing 64380b57cec5SDimitry Andric // them. 64390b57cec5SDimitry Andric if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { 64400b57cec5SDimitry Andric if (!VRC) { 64410b57cec5SDimitry Andric assert(SRC); 64428bcb0991SDimitry Andric if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { 64438bcb0991SDimitry Andric VRC = &AMDGPU::VReg_1RegClass; 64448bcb0991SDimitry Andric } else 64454824e7fdSDimitry Andric VRC = RI.isAGPRClass(getOpRegClass(MI, 0)) 64468bcb0991SDimitry Andric ? RI.getEquivalentAGPRClass(SRC) 64470b57cec5SDimitry Andric : RI.getEquivalentVGPRClass(SRC); 64488bcb0991SDimitry Andric } else { 64494824e7fdSDimitry Andric VRC = RI.isAGPRClass(getOpRegClass(MI, 0)) 64508bcb0991SDimitry Andric ? RI.getEquivalentAGPRClass(VRC) 64518bcb0991SDimitry Andric : RI.getEquivalentVGPRClass(VRC); 64520b57cec5SDimitry Andric } 64530b57cec5SDimitry Andric RC = VRC; 64540b57cec5SDimitry Andric } else { 64550b57cec5SDimitry Andric RC = SRC; 64560b57cec5SDimitry Andric } 64570b57cec5SDimitry Andric 64580b57cec5SDimitry Andric // Update all the operands so they have the same type. 64590b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 64600b57cec5SDimitry Andric MachineOperand &Op = MI.getOperand(I); 6461e8d8bef9SDimitry Andric if (!Op.isReg() || !Op.getReg().isVirtual()) 64620b57cec5SDimitry Andric continue; 64630b57cec5SDimitry Andric 64640b57cec5SDimitry Andric // MI is a PHI instruction. 64650b57cec5SDimitry Andric MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB(); 64660b57cec5SDimitry Andric MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); 64670b57cec5SDimitry Andric 64680b57cec5SDimitry Andric // Avoid creating no-op copies with the same src and dst reg class. These 64690b57cec5SDimitry Andric // confuse some of the machine passes. 64700b57cec5SDimitry Andric legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc()); 64710b57cec5SDimitry Andric } 64720b57cec5SDimitry Andric } 64730b57cec5SDimitry Andric 64740b57cec5SDimitry Andric // REG_SEQUENCE doesn't really require operand legalization, but if one has a 64750b57cec5SDimitry Andric // VGPR dest type and SGPR sources, insert copies so all operands are 64760b57cec5SDimitry Andric // VGPRs. This seems to help operand folding / the register coalescer. 64770b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) { 64780b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 64790b57cec5SDimitry Andric const TargetRegisterClass *DstRC = getOpRegClass(MI, 0); 64800b57cec5SDimitry Andric if (RI.hasVGPRs(DstRC)) { 64810b57cec5SDimitry Andric // Update all the operands so they are VGPR register classes. These may 64820b57cec5SDimitry Andric // not be the same register class because REG_SEQUENCE supports mixing 64830b57cec5SDimitry Andric // subregister index types e.g. sub0_sub1 + sub2 + sub3 64840b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 64850b57cec5SDimitry Andric MachineOperand &Op = MI.getOperand(I); 6486e8d8bef9SDimitry Andric if (!Op.isReg() || !Op.getReg().isVirtual()) 64870b57cec5SDimitry Andric continue; 64880b57cec5SDimitry Andric 64890b57cec5SDimitry Andric const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); 64900b57cec5SDimitry Andric const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); 64910b57cec5SDimitry Andric if (VRC == OpRC) 64920b57cec5SDimitry Andric continue; 64930b57cec5SDimitry Andric 64940b57cec5SDimitry Andric legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc()); 64950b57cec5SDimitry Andric Op.setIsKill(); 64960b57cec5SDimitry Andric } 64970b57cec5SDimitry Andric } 64980b57cec5SDimitry Andric 6499e8d8bef9SDimitry Andric return CreatedBB; 65000b57cec5SDimitry Andric } 65010b57cec5SDimitry Andric 65020b57cec5SDimitry Andric // Legalize INSERT_SUBREG 65030b57cec5SDimitry Andric // src0 must have the same register class as dst 65040b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { 65058bcb0991SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 65068bcb0991SDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 65070b57cec5SDimitry Andric const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 65080b57cec5SDimitry Andric const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 65090b57cec5SDimitry Andric if (DstRC != Src0RC) { 65100b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 65110b57cec5SDimitry Andric MachineOperand &Op = MI.getOperand(1); 65120b57cec5SDimitry Andric legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc()); 65130b57cec5SDimitry Andric } 6514e8d8bef9SDimitry Andric return CreatedBB; 65150b57cec5SDimitry Andric } 65160b57cec5SDimitry Andric 65170b57cec5SDimitry Andric // Legalize SI_INIT_M0 65180b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::SI_INIT_M0) { 65190b57cec5SDimitry Andric MachineOperand &Src = MI.getOperand(0); 65200b57cec5SDimitry Andric if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg()))) 65210b57cec5SDimitry Andric Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI)); 6522e8d8bef9SDimitry Andric return CreatedBB; 65230b57cec5SDimitry Andric } 65240b57cec5SDimitry Andric 6525*5f757f3fSDimitry Andric // Legalize S_BITREPLICATE, S_QUADMASK and S_WQM 6526*5f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 || 6527*5f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::S_QUADMASK_B32 || 6528*5f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::S_QUADMASK_B64 || 6529*5f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::S_WQM_B32 || 6530*5f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::S_WQM_B64) { 6531*5f757f3fSDimitry Andric MachineOperand &Src = MI.getOperand(1); 6532*5f757f3fSDimitry Andric if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg()))) 6533*5f757f3fSDimitry Andric Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI)); 6534*5f757f3fSDimitry Andric return CreatedBB; 6535*5f757f3fSDimitry Andric } 6536*5f757f3fSDimitry Andric 6537*5f757f3fSDimitry Andric // Legalize MIMG/VIMAGE/VSAMPLE and MUBUF/MTBUF for shaders. 65380b57cec5SDimitry Andric // 65390b57cec5SDimitry Andric // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via 65400b57cec5SDimitry Andric // scratch memory access. In both cases, the legalization never involves 65410b57cec5SDimitry Andric // conversion to the addr64 form. 6542*5f757f3fSDimitry Andric if (isImage(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) && 65430b57cec5SDimitry Andric (isMUBUF(MI) || isMTBUF(MI)))) { 6544*5f757f3fSDimitry Andric int RSrcOpName = (isVIMAGE(MI) || isVSAMPLE(MI)) ? AMDGPU::OpName::rsrc 6545*5f757f3fSDimitry Andric : AMDGPU::OpName::srsrc; 6546*5f757f3fSDimitry Andric MachineOperand *SRsrc = getNamedOperand(MI, RSrcOpName); 6547e8d8bef9SDimitry Andric if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) 654806c3fb27SDimitry Andric CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SRsrc}, MDT); 65490b57cec5SDimitry Andric 6550*5f757f3fSDimitry Andric int SampOpName = isMIMG(MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp; 6551*5f757f3fSDimitry Andric MachineOperand *SSamp = getNamedOperand(MI, SampOpName); 6552e8d8bef9SDimitry Andric if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) 655306c3fb27SDimitry Andric CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SSamp}, MDT); 6554e8d8bef9SDimitry Andric 6555e8d8bef9SDimitry Andric return CreatedBB; 65560b57cec5SDimitry Andric } 6557e8d8bef9SDimitry Andric 6558e8d8bef9SDimitry Andric // Legalize SI_CALL 6559e8d8bef9SDimitry Andric if (MI.getOpcode() == AMDGPU::SI_CALL_ISEL) { 6560e8d8bef9SDimitry Andric MachineOperand *Dest = &MI.getOperand(0); 6561e8d8bef9SDimitry Andric if (!RI.isSGPRClass(MRI.getRegClass(Dest->getReg()))) { 6562e8d8bef9SDimitry Andric // Move everything between ADJCALLSTACKUP and ADJCALLSTACKDOWN and 6563e8d8bef9SDimitry Andric // following copies, we also need to move copies from and to physical 6564e8d8bef9SDimitry Andric // registers into the loop block. 6565e8d8bef9SDimitry Andric unsigned FrameSetupOpcode = getCallFrameSetupOpcode(); 6566e8d8bef9SDimitry Andric unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode(); 6567e8d8bef9SDimitry Andric 6568e8d8bef9SDimitry Andric // Also move the copies to physical registers into the loop block 6569e8d8bef9SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 6570e8d8bef9SDimitry Andric MachineBasicBlock::iterator Start(&MI); 6571e8d8bef9SDimitry Andric while (Start->getOpcode() != FrameSetupOpcode) 6572e8d8bef9SDimitry Andric --Start; 6573e8d8bef9SDimitry Andric MachineBasicBlock::iterator End(&MI); 6574e8d8bef9SDimitry Andric while (End->getOpcode() != FrameDestroyOpcode) 6575e8d8bef9SDimitry Andric ++End; 6576e8d8bef9SDimitry Andric // Also include following copies of the return value 6577e8d8bef9SDimitry Andric ++End; 6578e8d8bef9SDimitry Andric while (End != MBB.end() && End->isCopy() && End->getOperand(1).isReg() && 6579e8d8bef9SDimitry Andric MI.definesRegister(End->getOperand(1).getReg())) 6580e8d8bef9SDimitry Andric ++End; 658106c3fb27SDimitry Andric CreatedBB = 658206c3fb27SDimitry Andric loadMBUFScalarOperandsFromVGPR(*this, MI, {Dest}, MDT, Start, End); 6583e8d8bef9SDimitry Andric } 65840b57cec5SDimitry Andric } 65850b57cec5SDimitry Andric 6586*5f757f3fSDimitry Andric // Legalize s_sleep_var. 6587*5f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::S_SLEEP_VAR) { 6588*5f757f3fSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 6589*5f757f3fSDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 6590*5f757f3fSDimitry Andric int Src0Idx = 6591*5f757f3fSDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); 6592*5f757f3fSDimitry Andric MachineOperand &Src0 = MI.getOperand(Src0Idx); 6593*5f757f3fSDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 6594*5f757f3fSDimitry Andric .add(Src0); 6595*5f757f3fSDimitry Andric Src0.ChangeToRegister(Reg, false); 6596*5f757f3fSDimitry Andric return nullptr; 6597*5f757f3fSDimitry Andric } 6598*5f757f3fSDimitry Andric 659906c3fb27SDimitry Andric // Legalize MUBUF instructions. 660006c3fb27SDimitry Andric bool isSoffsetLegal = true; 660106c3fb27SDimitry Andric int SoffsetIdx = 660206c3fb27SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::soffset); 660306c3fb27SDimitry Andric if (SoffsetIdx != -1) { 660406c3fb27SDimitry Andric MachineOperand *Soffset = &MI.getOperand(SoffsetIdx); 6605*5f757f3fSDimitry Andric if (Soffset->isReg() && Soffset->getReg().isVirtual() && 660606c3fb27SDimitry Andric !RI.isSGPRClass(MRI.getRegClass(Soffset->getReg()))) { 660706c3fb27SDimitry Andric isSoffsetLegal = false; 660806c3fb27SDimitry Andric } 660906c3fb27SDimitry Andric } 661006c3fb27SDimitry Andric 661106c3fb27SDimitry Andric bool isRsrcLegal = true; 66120b57cec5SDimitry Andric int RsrcIdx = 66130b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); 66140b57cec5SDimitry Andric if (RsrcIdx != -1) { 66150b57cec5SDimitry Andric MachineOperand *Rsrc = &MI.getOperand(RsrcIdx); 661606c3fb27SDimitry Andric if (Rsrc->isReg() && !RI.isSGPRClass(MRI.getRegClass(Rsrc->getReg()))) { 661706c3fb27SDimitry Andric isRsrcLegal = false; 661806c3fb27SDimitry Andric } 66190b57cec5SDimitry Andric } 66200b57cec5SDimitry Andric 662106c3fb27SDimitry Andric // The operands are legal. 662206c3fb27SDimitry Andric if (isRsrcLegal && isSoffsetLegal) 662306c3fb27SDimitry Andric return CreatedBB; 662406c3fb27SDimitry Andric 662506c3fb27SDimitry Andric if (!isRsrcLegal) { 662606c3fb27SDimitry Andric // Legalize a VGPR Rsrc 66270b57cec5SDimitry Andric // 66280b57cec5SDimitry Andric // If the instruction is _ADDR64, we can avoid a waterfall by extracting 66290b57cec5SDimitry Andric // the base pointer from the VGPR Rsrc, adding it to the VAddr, then using 66300b57cec5SDimitry Andric // a zero-value SRsrc. 66310b57cec5SDimitry Andric // 66320b57cec5SDimitry Andric // If the instruction is _OFFSET (both idxen and offen disabled), and we 66330b57cec5SDimitry Andric // support ADDR64 instructions, we can convert to ADDR64 and do the same as 66340b57cec5SDimitry Andric // above. 66350b57cec5SDimitry Andric // 66360b57cec5SDimitry Andric // Otherwise we are on non-ADDR64 hardware, and/or we have 66370b57cec5SDimitry Andric // idxen/offen/bothen and we fall back to a waterfall loop. 66380b57cec5SDimitry Andric 663906c3fb27SDimitry Andric MachineOperand *Rsrc = &MI.getOperand(RsrcIdx); 66400b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 66410b57cec5SDimitry Andric 66420b57cec5SDimitry Andric MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr); 66430b57cec5SDimitry Andric if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) { 66440b57cec5SDimitry Andric // This is already an ADDR64 instruction so we need to add the pointer 66450b57cec5SDimitry Andric // extracted from the resource descriptor to the current value of VAddr. 66468bcb0991SDimitry Andric Register NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 66478bcb0991SDimitry Andric Register NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 66488bcb0991SDimitry Andric Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 66490b57cec5SDimitry Andric 66500b57cec5SDimitry Andric const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 66518bcb0991SDimitry Andric Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); 66528bcb0991SDimitry Andric Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); 66530b57cec5SDimitry Andric 66540b57cec5SDimitry Andric unsigned RsrcPtr, NewSRsrc; 66550b57cec5SDimitry Andric std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); 66560b57cec5SDimitry Andric 66570b57cec5SDimitry Andric // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0 66580b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 6659e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_CO_U32_e64), NewVAddrLo) 66600b57cec5SDimitry Andric .addDef(CondReg0) 66610b57cec5SDimitry Andric .addReg(RsrcPtr, 0, AMDGPU::sub0) 66620b57cec5SDimitry Andric .addReg(VAddr->getReg(), 0, AMDGPU::sub0) 66630b57cec5SDimitry Andric .addImm(0); 66640b57cec5SDimitry Andric 66650b57cec5SDimitry Andric // NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1 66660b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e64), NewVAddrHi) 66670b57cec5SDimitry Andric .addDef(CondReg1, RegState::Dead) 66680b57cec5SDimitry Andric .addReg(RsrcPtr, 0, AMDGPU::sub1) 66690b57cec5SDimitry Andric .addReg(VAddr->getReg(), 0, AMDGPU::sub1) 66700b57cec5SDimitry Andric .addReg(CondReg0, RegState::Kill) 66710b57cec5SDimitry Andric .addImm(0); 66720b57cec5SDimitry Andric 66730b57cec5SDimitry Andric // NewVaddr = {NewVaddrHi, NewVaddrLo} 66740b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) 66750b57cec5SDimitry Andric .addReg(NewVAddrLo) 66760b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 66770b57cec5SDimitry Andric .addReg(NewVAddrHi) 66780b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 66790b57cec5SDimitry Andric 66800b57cec5SDimitry Andric VAddr->setReg(NewVAddr); 66810b57cec5SDimitry Andric Rsrc->setReg(NewSRsrc); 66820b57cec5SDimitry Andric } else if (!VAddr && ST.hasAddr64()) { 66830b57cec5SDimitry Andric // This instructions is the _OFFSET variant, so we need to convert it to 66840b57cec5SDimitry Andric // ADDR64. 6685e8d8bef9SDimitry Andric assert(ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS && 66860b57cec5SDimitry Andric "FIXME: Need to emit flat atomics here"); 66870b57cec5SDimitry Andric 66880b57cec5SDimitry Andric unsigned RsrcPtr, NewSRsrc; 66890b57cec5SDimitry Andric std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); 66900b57cec5SDimitry Andric 66918bcb0991SDimitry Andric Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 66920b57cec5SDimitry Andric MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); 66930b57cec5SDimitry Andric MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); 66940b57cec5SDimitry Andric MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); 66950b57cec5SDimitry Andric unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode()); 66960b57cec5SDimitry Andric 669781ad6265SDimitry Andric // Atomics with return have an additional tied operand and are 66980b57cec5SDimitry Andric // missing some of the special bits. 66990b57cec5SDimitry Andric MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in); 67000b57cec5SDimitry Andric MachineInstr *Addr64; 67010b57cec5SDimitry Andric 67020b57cec5SDimitry Andric if (!VDataIn) { 67030b57cec5SDimitry Andric // Regular buffer load / store. 67040b57cec5SDimitry Andric MachineInstrBuilder MIB = 67050b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) 67060b57cec5SDimitry Andric .add(*VData) 67070b57cec5SDimitry Andric .addReg(NewVAddr) 67080b57cec5SDimitry Andric .addReg(NewSRsrc) 67090b57cec5SDimitry Andric .add(*SOffset) 67100b57cec5SDimitry Andric .add(*Offset); 67110b57cec5SDimitry Andric 6712fe6060f1SDimitry Andric if (const MachineOperand *CPol = 6713fe6060f1SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::cpol)) { 6714fe6060f1SDimitry Andric MIB.addImm(CPol->getImm()); 67150b57cec5SDimitry Andric } 67160b57cec5SDimitry Andric 67170b57cec5SDimitry Andric if (const MachineOperand *TFE = 67180b57cec5SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::tfe)) { 67190b57cec5SDimitry Andric MIB.addImm(TFE->getImm()); 67200b57cec5SDimitry Andric } 67210b57cec5SDimitry Andric 67228bcb0991SDimitry Andric MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz)); 67238bcb0991SDimitry Andric 67240b57cec5SDimitry Andric MIB.cloneMemRefs(MI); 67250b57cec5SDimitry Andric Addr64 = MIB; 67260b57cec5SDimitry Andric } else { 67270b57cec5SDimitry Andric // Atomics with return. 67280b57cec5SDimitry Andric Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) 67290b57cec5SDimitry Andric .add(*VData) 67300b57cec5SDimitry Andric .add(*VDataIn) 67310b57cec5SDimitry Andric .addReg(NewVAddr) 67320b57cec5SDimitry Andric .addReg(NewSRsrc) 67330b57cec5SDimitry Andric .add(*SOffset) 67340b57cec5SDimitry Andric .add(*Offset) 6735fe6060f1SDimitry Andric .addImm(getNamedImmOperand(MI, AMDGPU::OpName::cpol)) 67360b57cec5SDimitry Andric .cloneMemRefs(MI); 67370b57cec5SDimitry Andric } 67380b57cec5SDimitry Andric 67390b57cec5SDimitry Andric MI.removeFromParent(); 67400b57cec5SDimitry Andric 67410b57cec5SDimitry Andric // NewVaddr = {NewVaddrHi, NewVaddrLo} 67420b57cec5SDimitry Andric BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 67430b57cec5SDimitry Andric NewVAddr) 67440b57cec5SDimitry Andric .addReg(RsrcPtr, 0, AMDGPU::sub0) 67450b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 67460b57cec5SDimitry Andric .addReg(RsrcPtr, 0, AMDGPU::sub1) 67470b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 67480b57cec5SDimitry Andric } else { 674906c3fb27SDimitry Andric // Legalize a VGPR Rsrc and soffset together. 675006c3fb27SDimitry Andric if (!isSoffsetLegal) { 675106c3fb27SDimitry Andric MachineOperand *Soffset = getNamedOperand(MI, AMDGPU::OpName::soffset); 675206c3fb27SDimitry Andric CreatedBB = 675306c3fb27SDimitry Andric loadMBUFScalarOperandsFromVGPR(*this, MI, {Rsrc, Soffset}, MDT); 6754e8d8bef9SDimitry Andric return CreatedBB; 67550b57cec5SDimitry Andric } 675606c3fb27SDimitry Andric CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {Rsrc}, MDT); 675706c3fb27SDimitry Andric return CreatedBB; 675806c3fb27SDimitry Andric } 675906c3fb27SDimitry Andric } 676006c3fb27SDimitry Andric 676106c3fb27SDimitry Andric // Legalize a VGPR soffset. 676206c3fb27SDimitry Andric if (!isSoffsetLegal) { 676306c3fb27SDimitry Andric MachineOperand *Soffset = getNamedOperand(MI, AMDGPU::OpName::soffset); 676406c3fb27SDimitry Andric CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {Soffset}, MDT); 676506c3fb27SDimitry Andric return CreatedBB; 67660b57cec5SDimitry Andric } 6767e8d8bef9SDimitry Andric return CreatedBB; 67680b57cec5SDimitry Andric } 67690b57cec5SDimitry Andric 677006c3fb27SDimitry Andric void SIInstrWorklist::insert(MachineInstr *MI) { 677106c3fb27SDimitry Andric InstrList.insert(MI); 677206c3fb27SDimitry Andric // Add MBUF instructiosn to deferred list. 677306c3fb27SDimitry Andric int RsrcIdx = 677406c3fb27SDimitry Andric AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); 677506c3fb27SDimitry Andric if (RsrcIdx != -1) { 677606c3fb27SDimitry Andric DeferredList.insert(MI); 677706c3fb27SDimitry Andric } 677806c3fb27SDimitry Andric } 677906c3fb27SDimitry Andric 678006c3fb27SDimitry Andric bool SIInstrWorklist::isDeferred(MachineInstr *MI) { 678106c3fb27SDimitry Andric return DeferredList.contains(MI); 678206c3fb27SDimitry Andric } 678306c3fb27SDimitry Andric 678406c3fb27SDimitry Andric void SIInstrInfo::moveToVALU(SIInstrWorklist &Worklist, 67850b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 67860b57cec5SDimitry Andric 67870b57cec5SDimitry Andric while (!Worklist.empty()) { 678806c3fb27SDimitry Andric MachineInstr &Inst = *Worklist.top(); 678906c3fb27SDimitry Andric Worklist.erase_top(); 679006c3fb27SDimitry Andric // Skip MachineInstr in the deferred list. 679106c3fb27SDimitry Andric if (Worklist.isDeferred(&Inst)) 679206c3fb27SDimitry Andric continue; 679306c3fb27SDimitry Andric moveToVALUImpl(Worklist, MDT, Inst); 679406c3fb27SDimitry Andric } 67950b57cec5SDimitry Andric 679606c3fb27SDimitry Andric // Deferred list of instructions will be processed once 679706c3fb27SDimitry Andric // all the MachineInstr in the worklist are done. 679806c3fb27SDimitry Andric for (MachineInstr *Inst : Worklist.getDeferredList()) { 679906c3fb27SDimitry Andric moveToVALUImpl(Worklist, MDT, *Inst); 680006c3fb27SDimitry Andric assert(Worklist.empty() && 680106c3fb27SDimitry Andric "Deferred MachineInstr are not supposed to re-populate worklist"); 680206c3fb27SDimitry Andric } 680306c3fb27SDimitry Andric } 680406c3fb27SDimitry Andric 680506c3fb27SDimitry Andric void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, 680606c3fb27SDimitry Andric MachineDominatorTree *MDT, 680706c3fb27SDimitry Andric MachineInstr &Inst) const { 680806c3fb27SDimitry Andric 680906c3fb27SDimitry Andric MachineBasicBlock *MBB = Inst.getParent(); 681006c3fb27SDimitry Andric if (!MBB) 681106c3fb27SDimitry Andric return; 681206c3fb27SDimitry Andric MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 68130b57cec5SDimitry Andric unsigned Opcode = Inst.getOpcode(); 68140b57cec5SDimitry Andric unsigned NewOpcode = getVALUOp(Inst); 68150b57cec5SDimitry Andric // Handle some special cases 68160b57cec5SDimitry Andric switch (Opcode) { 68170b57cec5SDimitry Andric default: 68180b57cec5SDimitry Andric break; 68190b57cec5SDimitry Andric case AMDGPU::S_ADD_U64_PSEUDO: 6820*5f757f3fSDimitry Andric NewOpcode = AMDGPU::V_ADD_U64_PSEUDO; 6821*5f757f3fSDimitry Andric break; 68220b57cec5SDimitry Andric case AMDGPU::S_SUB_U64_PSEUDO: 6823*5f757f3fSDimitry Andric NewOpcode = AMDGPU::V_SUB_U64_PSEUDO; 6824*5f757f3fSDimitry Andric break; 68250b57cec5SDimitry Andric case AMDGPU::S_ADD_I32: 6826e8d8bef9SDimitry Andric case AMDGPU::S_SUB_I32: { 68270b57cec5SDimitry Andric // FIXME: The u32 versions currently selected use the carry. 6828e8d8bef9SDimitry Andric bool Changed; 682906c3fb27SDimitry Andric MachineBasicBlock *CreatedBBTmp = nullptr; 6830e8d8bef9SDimitry Andric std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT); 6831e8d8bef9SDimitry Andric if (Changed) 683206c3fb27SDimitry Andric return; 68330b57cec5SDimitry Andric 68340b57cec5SDimitry Andric // Default handling 68350b57cec5SDimitry Andric break; 6836e8d8bef9SDimitry Andric } 68370b57cec5SDimitry Andric case AMDGPU::S_AND_B64: 68380b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT); 68390b57cec5SDimitry Andric Inst.eraseFromParent(); 684006c3fb27SDimitry Andric return; 68410b57cec5SDimitry Andric 68420b57cec5SDimitry Andric case AMDGPU::S_OR_B64: 68430b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT); 68440b57cec5SDimitry Andric Inst.eraseFromParent(); 684506c3fb27SDimitry Andric return; 68460b57cec5SDimitry Andric 68470b57cec5SDimitry Andric case AMDGPU::S_XOR_B64: 68480b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT); 68490b57cec5SDimitry Andric Inst.eraseFromParent(); 685006c3fb27SDimitry Andric return; 68510b57cec5SDimitry Andric 68520b57cec5SDimitry Andric case AMDGPU::S_NAND_B64: 68530b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT); 68540b57cec5SDimitry Andric Inst.eraseFromParent(); 685506c3fb27SDimitry Andric return; 68560b57cec5SDimitry Andric 68570b57cec5SDimitry Andric case AMDGPU::S_NOR_B64: 68580b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT); 68590b57cec5SDimitry Andric Inst.eraseFromParent(); 686006c3fb27SDimitry Andric return; 68610b57cec5SDimitry Andric 68620b57cec5SDimitry Andric case AMDGPU::S_XNOR_B64: 68630b57cec5SDimitry Andric if (ST.hasDLInsts()) 68640b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT); 68650b57cec5SDimitry Andric else 68660b57cec5SDimitry Andric splitScalar64BitXnor(Worklist, Inst, MDT); 68670b57cec5SDimitry Andric Inst.eraseFromParent(); 686806c3fb27SDimitry Andric return; 68690b57cec5SDimitry Andric 68700b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B64: 68710b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT); 68720b57cec5SDimitry Andric Inst.eraseFromParent(); 687306c3fb27SDimitry Andric return; 68740b57cec5SDimitry Andric 68750b57cec5SDimitry Andric case AMDGPU::S_ORN2_B64: 68760b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT); 68770b57cec5SDimitry Andric Inst.eraseFromParent(); 687806c3fb27SDimitry Andric return; 68790b57cec5SDimitry Andric 6880fe6060f1SDimitry Andric case AMDGPU::S_BREV_B64: 6881fe6060f1SDimitry Andric splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32, true); 6882fe6060f1SDimitry Andric Inst.eraseFromParent(); 688306c3fb27SDimitry Andric return; 6884fe6060f1SDimitry Andric 68850b57cec5SDimitry Andric case AMDGPU::S_NOT_B64: 68860b57cec5SDimitry Andric splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); 68870b57cec5SDimitry Andric Inst.eraseFromParent(); 688806c3fb27SDimitry Andric return; 68890b57cec5SDimitry Andric 68900b57cec5SDimitry Andric case AMDGPU::S_BCNT1_I32_B64: 68910b57cec5SDimitry Andric splitScalar64BitBCNT(Worklist, Inst); 68920b57cec5SDimitry Andric Inst.eraseFromParent(); 689306c3fb27SDimitry Andric return; 68940b57cec5SDimitry Andric 68950b57cec5SDimitry Andric case AMDGPU::S_BFE_I64: 68960b57cec5SDimitry Andric splitScalar64BitBFE(Worklist, Inst); 68970b57cec5SDimitry Andric Inst.eraseFromParent(); 689806c3fb27SDimitry Andric return; 68990b57cec5SDimitry Andric 69000b57cec5SDimitry Andric case AMDGPU::S_LSHL_B32: 69010b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 69020b57cec5SDimitry Andric NewOpcode = AMDGPU::V_LSHLREV_B32_e64; 69030b57cec5SDimitry Andric swapOperands(Inst); 69040b57cec5SDimitry Andric } 69050b57cec5SDimitry Andric break; 69060b57cec5SDimitry Andric case AMDGPU::S_ASHR_I32: 69070b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 69080b57cec5SDimitry Andric NewOpcode = AMDGPU::V_ASHRREV_I32_e64; 69090b57cec5SDimitry Andric swapOperands(Inst); 69100b57cec5SDimitry Andric } 69110b57cec5SDimitry Andric break; 69120b57cec5SDimitry Andric case AMDGPU::S_LSHR_B32: 69130b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 69140b57cec5SDimitry Andric NewOpcode = AMDGPU::V_LSHRREV_B32_e64; 69150b57cec5SDimitry Andric swapOperands(Inst); 69160b57cec5SDimitry Andric } 69170b57cec5SDimitry Andric break; 69180b57cec5SDimitry Andric case AMDGPU::S_LSHL_B64: 69190b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 6920*5f757f3fSDimitry Andric NewOpcode = ST.getGeneration() >= AMDGPUSubtarget::GFX12 6921*5f757f3fSDimitry Andric ? AMDGPU::V_LSHLREV_B64_pseudo_e64 6922*5f757f3fSDimitry Andric : AMDGPU::V_LSHLREV_B64_e64; 69230b57cec5SDimitry Andric swapOperands(Inst); 69240b57cec5SDimitry Andric } 69250b57cec5SDimitry Andric break; 69260b57cec5SDimitry Andric case AMDGPU::S_ASHR_I64: 69270b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 6928e8d8bef9SDimitry Andric NewOpcode = AMDGPU::V_ASHRREV_I64_e64; 69290b57cec5SDimitry Andric swapOperands(Inst); 69300b57cec5SDimitry Andric } 69310b57cec5SDimitry Andric break; 69320b57cec5SDimitry Andric case AMDGPU::S_LSHR_B64: 69330b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 6934e8d8bef9SDimitry Andric NewOpcode = AMDGPU::V_LSHRREV_B64_e64; 69350b57cec5SDimitry Andric swapOperands(Inst); 69360b57cec5SDimitry Andric } 69370b57cec5SDimitry Andric break; 69380b57cec5SDimitry Andric 69390b57cec5SDimitry Andric case AMDGPU::S_ABS_I32: 69400b57cec5SDimitry Andric lowerScalarAbs(Worklist, Inst); 69410b57cec5SDimitry Andric Inst.eraseFromParent(); 694206c3fb27SDimitry Andric return; 69430b57cec5SDimitry Andric 69440b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC0: 6945349cc55cSDimitry Andric case AMDGPU::S_CBRANCH_SCC1: { 69460b57cec5SDimitry Andric // Clear unused bits of vcc 6947349cc55cSDimitry Andric Register CondReg = Inst.getOperand(1).getReg(); 6948349cc55cSDimitry Andric bool IsSCC = CondReg == AMDGPU::SCC; 6949349cc55cSDimitry Andric Register VCC = RI.getVCC(); 6950349cc55cSDimitry Andric Register EXEC = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 6951349cc55cSDimitry Andric unsigned Opc = ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; 6952349cc55cSDimitry Andric BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(Opc), VCC) 6953349cc55cSDimitry Andric .addReg(EXEC) 6954349cc55cSDimitry Andric .addReg(IsSCC ? VCC : CondReg); 695581ad6265SDimitry Andric Inst.removeOperand(1); 695606c3fb27SDimitry Andric } break; 69570b57cec5SDimitry Andric 69580b57cec5SDimitry Andric case AMDGPU::S_BFE_U64: 69590b57cec5SDimitry Andric case AMDGPU::S_BFM_B64: 69600b57cec5SDimitry Andric llvm_unreachable("Moving this op to VALU not implemented"); 69610b57cec5SDimitry Andric 69620b57cec5SDimitry Andric case AMDGPU::S_PACK_LL_B32_B16: 69630b57cec5SDimitry Andric case AMDGPU::S_PACK_LH_B32_B16: 696481ad6265SDimitry Andric case AMDGPU::S_PACK_HL_B32_B16: 69650b57cec5SDimitry Andric case AMDGPU::S_PACK_HH_B32_B16: 69660b57cec5SDimitry Andric movePackToVALU(Worklist, MRI, Inst); 69670b57cec5SDimitry Andric Inst.eraseFromParent(); 696806c3fb27SDimitry Andric return; 69690b57cec5SDimitry Andric 69700b57cec5SDimitry Andric case AMDGPU::S_XNOR_B32: 69710b57cec5SDimitry Andric lowerScalarXnor(Worklist, Inst); 69720b57cec5SDimitry Andric Inst.eraseFromParent(); 697306c3fb27SDimitry Andric return; 69740b57cec5SDimitry Andric 69750b57cec5SDimitry Andric case AMDGPU::S_NAND_B32: 69760b57cec5SDimitry Andric splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32); 69770b57cec5SDimitry Andric Inst.eraseFromParent(); 697806c3fb27SDimitry Andric return; 69790b57cec5SDimitry Andric 69800b57cec5SDimitry Andric case AMDGPU::S_NOR_B32: 69810b57cec5SDimitry Andric splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32); 69820b57cec5SDimitry Andric Inst.eraseFromParent(); 698306c3fb27SDimitry Andric return; 69840b57cec5SDimitry Andric 69850b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B32: 69860b57cec5SDimitry Andric splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32); 69870b57cec5SDimitry Andric Inst.eraseFromParent(); 698806c3fb27SDimitry Andric return; 69890b57cec5SDimitry Andric 69900b57cec5SDimitry Andric case AMDGPU::S_ORN2_B32: 69910b57cec5SDimitry Andric splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32); 69920b57cec5SDimitry Andric Inst.eraseFromParent(); 699306c3fb27SDimitry Andric return; 69945ffd83dbSDimitry Andric 69955ffd83dbSDimitry Andric // TODO: remove as soon as everything is ready 69965ffd83dbSDimitry Andric // to replace VGPR to SGPR copy with V_READFIRSTLANEs. 69975ffd83dbSDimitry Andric // S_ADD/SUB_CO_PSEUDO as well as S_UADDO/USUBO_PSEUDO 69985ffd83dbSDimitry Andric // can only be selected from the uniform SDNode. 69995ffd83dbSDimitry Andric case AMDGPU::S_ADD_CO_PSEUDO: 70005ffd83dbSDimitry Andric case AMDGPU::S_SUB_CO_PSEUDO: { 70015ffd83dbSDimitry Andric unsigned Opc = (Inst.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) 70025ffd83dbSDimitry Andric ? AMDGPU::V_ADDC_U32_e64 70035ffd83dbSDimitry Andric : AMDGPU::V_SUBB_U32_e64; 70045ffd83dbSDimitry Andric const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 70055ffd83dbSDimitry Andric 70065ffd83dbSDimitry Andric Register CarryInReg = Inst.getOperand(4).getReg(); 70075ffd83dbSDimitry Andric if (!MRI.constrainRegClass(CarryInReg, CarryRC)) { 70085ffd83dbSDimitry Andric Register NewCarryReg = MRI.createVirtualRegister(CarryRC); 700906c3fb27SDimitry Andric BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg) 70105ffd83dbSDimitry Andric .addReg(CarryInReg); 70115ffd83dbSDimitry Andric } 70125ffd83dbSDimitry Andric 70135ffd83dbSDimitry Andric Register CarryOutReg = Inst.getOperand(1).getReg(); 70145ffd83dbSDimitry Andric 70155ffd83dbSDimitry Andric Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass( 70165ffd83dbSDimitry Andric MRI.getRegClass(Inst.getOperand(0).getReg()))); 70175ffd83dbSDimitry Andric MachineInstr *CarryOp = 70185ffd83dbSDimitry Andric BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg) 70195ffd83dbSDimitry Andric .addReg(CarryOutReg, RegState::Define) 70205ffd83dbSDimitry Andric .add(Inst.getOperand(2)) 70215ffd83dbSDimitry Andric .add(Inst.getOperand(3)) 70225ffd83dbSDimitry Andric .addReg(CarryInReg) 70235ffd83dbSDimitry Andric .addImm(0); 702406c3fb27SDimitry Andric legalizeOperands(*CarryOp); 70255ffd83dbSDimitry Andric MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg); 70265ffd83dbSDimitry Andric addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist); 70275ffd83dbSDimitry Andric Inst.eraseFromParent(); 70285ffd83dbSDimitry Andric } 702906c3fb27SDimitry Andric return; 70305ffd83dbSDimitry Andric case AMDGPU::S_UADDO_PSEUDO: 70315ffd83dbSDimitry Andric case AMDGPU::S_USUBO_PSEUDO: { 70325ffd83dbSDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 70335ffd83dbSDimitry Andric MachineOperand &Dest0 = Inst.getOperand(0); 70345ffd83dbSDimitry Andric MachineOperand &Dest1 = Inst.getOperand(1); 70355ffd83dbSDimitry Andric MachineOperand &Src0 = Inst.getOperand(2); 70365ffd83dbSDimitry Andric MachineOperand &Src1 = Inst.getOperand(3); 70375ffd83dbSDimitry Andric 70385ffd83dbSDimitry Andric unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO) 7039e8d8bef9SDimitry Andric ? AMDGPU::V_ADD_CO_U32_e64 7040e8d8bef9SDimitry Andric : AMDGPU::V_SUB_CO_U32_e64; 70415ffd83dbSDimitry Andric const TargetRegisterClass *NewRC = 70425ffd83dbSDimitry Andric RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg())); 70435ffd83dbSDimitry Andric Register DestReg = MRI.createVirtualRegister(NewRC); 70445ffd83dbSDimitry Andric MachineInstr *NewInstr = BuildMI(*MBB, &Inst, DL, get(Opc), DestReg) 70455ffd83dbSDimitry Andric .addReg(Dest1.getReg(), RegState::Define) 70465ffd83dbSDimitry Andric .add(Src0) 70475ffd83dbSDimitry Andric .add(Src1) 70485ffd83dbSDimitry Andric .addImm(0); // clamp bit 70495ffd83dbSDimitry Andric 705006c3fb27SDimitry Andric legalizeOperands(*NewInstr, MDT); 70515ffd83dbSDimitry Andric MRI.replaceRegWith(Dest0.getReg(), DestReg); 70525ffd83dbSDimitry Andric addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI, 70535ffd83dbSDimitry Andric Worklist); 70545ffd83dbSDimitry Andric Inst.eraseFromParent(); 70555ffd83dbSDimitry Andric } 705606c3fb27SDimitry Andric return; 70575ffd83dbSDimitry Andric 70585ffd83dbSDimitry Andric case AMDGPU::S_CSELECT_B32: 7059349cc55cSDimitry Andric case AMDGPU::S_CSELECT_B64: 706004eeddc0SDimitry Andric lowerSelect(Worklist, Inst, MDT); 7061349cc55cSDimitry Andric Inst.eraseFromParent(); 706206c3fb27SDimitry Andric return; 7063349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_I32: 7064349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_I32: 7065349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_I32: 7066349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_I32: 7067349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_I32: 7068349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_I32: 7069349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U32: 7070349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U32: 7071349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_U32: 7072349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_U32: 7073349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_U32: 7074349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_U32: 7075349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U64: 7076*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_U64: 7077*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LT_F32: 7078*5f757f3fSDimitry Andric case AMDGPU::S_CMP_EQ_F32: 7079*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LE_F32: 7080*5f757f3fSDimitry Andric case AMDGPU::S_CMP_GT_F32: 7081*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_F32: 7082*5f757f3fSDimitry Andric case AMDGPU::S_CMP_GE_F32: 7083*5f757f3fSDimitry Andric case AMDGPU::S_CMP_O_F32: 7084*5f757f3fSDimitry Andric case AMDGPU::S_CMP_U_F32: 7085*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NGE_F32: 7086*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLG_F32: 7087*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NGT_F32: 7088*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLE_F32: 7089*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NEQ_F32: 7090*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLT_F32: 7091*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LT_F16: 7092*5f757f3fSDimitry Andric case AMDGPU::S_CMP_EQ_F16: 7093*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LE_F16: 7094*5f757f3fSDimitry Andric case AMDGPU::S_CMP_GT_F16: 7095*5f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_F16: 7096*5f757f3fSDimitry Andric case AMDGPU::S_CMP_GE_F16: 7097*5f757f3fSDimitry Andric case AMDGPU::S_CMP_O_F16: 7098*5f757f3fSDimitry Andric case AMDGPU::S_CMP_U_F16: 7099*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NGE_F16: 7100*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLG_F16: 7101*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NGT_F16: 7102*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLE_F16: 7103*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NEQ_F16: 7104*5f757f3fSDimitry Andric case AMDGPU::S_CMP_NLT_F16: { 7105349cc55cSDimitry Andric Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass()); 7106*5f757f3fSDimitry Andric auto NewInstr = 7107*5f757f3fSDimitry Andric BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg) 7108*5f757f3fSDimitry Andric .setMIFlags(Inst.getFlags()); 7109*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, 7110*5f757f3fSDimitry Andric AMDGPU::OpName::src0_modifiers) >= 0) { 7111*5f757f3fSDimitry Andric NewInstr 7112*5f757f3fSDimitry Andric .addImm(0) // src0_modifiers 7113*5f757f3fSDimitry Andric .add(Inst.getOperand(0)) // src0 7114*5f757f3fSDimitry Andric .addImm(0) // src1_modifiers 7115*5f757f3fSDimitry Andric .add(Inst.getOperand(1)) // src1 7116*5f757f3fSDimitry Andric .addImm(0); // clamp 7117*5f757f3fSDimitry Andric } else { 7118*5f757f3fSDimitry Andric NewInstr 7119349cc55cSDimitry Andric .add(Inst.getOperand(0)) 7120349cc55cSDimitry Andric .add(Inst.getOperand(1)); 7121*5f757f3fSDimitry Andric } 7122349cc55cSDimitry Andric legalizeOperands(*NewInstr, MDT); 7123349cc55cSDimitry Andric int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC); 7124349cc55cSDimitry Andric MachineOperand SCCOp = Inst.getOperand(SCCIdx); 7125349cc55cSDimitry Andric addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg); 7126349cc55cSDimitry Andric Inst.eraseFromParent(); 712706c3fb27SDimitry Andric return; 7128349cc55cSDimitry Andric } 7129*5f757f3fSDimitry Andric case AMDGPU::S_CVT_HI_F32_F16: { 7130*5f757f3fSDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 7131*5f757f3fSDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 7132*5f757f3fSDimitry Andric Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 7133*5f757f3fSDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) 7134*5f757f3fSDimitry Andric .addImm(16) 7135*5f757f3fSDimitry Andric .add(Inst.getOperand(1)); 7136*5f757f3fSDimitry Andric BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst) 7137*5f757f3fSDimitry Andric .addImm(0) // src0_modifiers 7138*5f757f3fSDimitry Andric .addReg(TmpReg) 7139*5f757f3fSDimitry Andric .addImm(0) // clamp 7140*5f757f3fSDimitry Andric .addImm(0); // omod 7141*5f757f3fSDimitry Andric 7142*5f757f3fSDimitry Andric MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst); 7143*5f757f3fSDimitry Andric addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist); 7144*5f757f3fSDimitry Andric Inst.eraseFromParent(); 7145*5f757f3fSDimitry Andric return; 7146*5f757f3fSDimitry Andric } 7147*5f757f3fSDimitry Andric case AMDGPU::S_MINIMUM_F32: 7148*5f757f3fSDimitry Andric case AMDGPU::S_MAXIMUM_F32: 7149*5f757f3fSDimitry Andric case AMDGPU::S_MINIMUM_F16: 7150*5f757f3fSDimitry Andric case AMDGPU::S_MAXIMUM_F16: { 7151*5f757f3fSDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 7152*5f757f3fSDimitry Andric Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 7153*5f757f3fSDimitry Andric MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst) 7154*5f757f3fSDimitry Andric .addImm(0) // src0_modifiers 7155*5f757f3fSDimitry Andric .add(Inst.getOperand(1)) 7156*5f757f3fSDimitry Andric .addImm(0) // src1_modifiers 7157*5f757f3fSDimitry Andric .add(Inst.getOperand(2)) 7158*5f757f3fSDimitry Andric .addImm(0) // clamp 7159*5f757f3fSDimitry Andric .addImm(0); // omod 7160*5f757f3fSDimitry Andric MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst); 7161*5f757f3fSDimitry Andric 7162*5f757f3fSDimitry Andric legalizeOperands(*NewInstr, MDT); 7163*5f757f3fSDimitry Andric addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist); 7164*5f757f3fSDimitry Andric Inst.eraseFromParent(); 7165*5f757f3fSDimitry Andric return; 7166*5f757f3fSDimitry Andric } 7167*5f757f3fSDimitry Andric } 7168349cc55cSDimitry Andric 71690b57cec5SDimitry Andric if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 71700b57cec5SDimitry Andric // We cannot move this instruction to the VALU, so we should try to 71710b57cec5SDimitry Andric // legalize its operands instead. 717206c3fb27SDimitry Andric legalizeOperands(Inst, MDT); 717306c3fb27SDimitry Andric return; 71740b57cec5SDimitry Andric } 7175bdd1243dSDimitry Andric // Handle converting generic instructions like COPY-to-SGPR into 7176bdd1243dSDimitry Andric // COPY-to-VGPR. 7177bdd1243dSDimitry Andric if (NewOpcode == Opcode) { 71788bcb0991SDimitry Andric Register DstReg = Inst.getOperand(0).getReg(); 71790b57cec5SDimitry Andric const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); 71800b57cec5SDimitry Andric 7181e8d8bef9SDimitry Andric if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && 71820b57cec5SDimitry Andric NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { 71830b57cec5SDimitry Andric // Instead of creating a copy where src and dst are the same register 71840b57cec5SDimitry Andric // class, we just replace all uses of dst with src. These kinds of 71850b57cec5SDimitry Andric // copies interfere with the heuristics MachineSink uses to decide 71860b57cec5SDimitry Andric // whether or not to split a critical edge. Since the pass assumes 71870b57cec5SDimitry Andric // that copies will end up as machine instructions and not be 71880b57cec5SDimitry Andric // eliminated. 71890b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); 71900b57cec5SDimitry Andric MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); 71910b57cec5SDimitry Andric MRI.clearKillFlags(Inst.getOperand(1).getReg()); 71920b57cec5SDimitry Andric Inst.getOperand(0).setReg(DstReg); 71930b57cec5SDimitry Andric // Make sure we don't leave around a dead VGPR->SGPR copy. Normally 71940b57cec5SDimitry Andric // these are deleted later, but at -O0 it would leave a suspicious 71950b57cec5SDimitry Andric // looking illegal copy of an undef register. 71960b57cec5SDimitry Andric for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) 719781ad6265SDimitry Andric Inst.removeOperand(I); 71980b57cec5SDimitry Andric Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); 719906c3fb27SDimitry Andric return; 72000b57cec5SDimitry Andric } 7201bdd1243dSDimitry Andric Register NewDstReg = MRI.createVirtualRegister(NewDstRC); 7202bdd1243dSDimitry Andric MRI.replaceRegWith(DstReg, NewDstReg); 7203bdd1243dSDimitry Andric legalizeOperands(Inst, MDT); 7204bdd1243dSDimitry Andric addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); 720506c3fb27SDimitry Andric return; 7206bdd1243dSDimitry Andric } 7207bdd1243dSDimitry Andric 7208bdd1243dSDimitry Andric // Use the new VALU Opcode. 7209bdd1243dSDimitry Andric auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode)) 7210bdd1243dSDimitry Andric .setMIFlags(Inst.getFlags()); 7211*5f757f3fSDimitry Andric if (isVOP3(NewOpcode) && !isVOP3(Opcode)) { 7212*5f757f3fSDimitry Andric // Intersperse VOP3 modifiers among the SALU operands. 7213*5f757f3fSDimitry Andric NewInstr->addOperand(Inst.getOperand(0)); 7214*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, 7215*5f757f3fSDimitry Andric AMDGPU::OpName::src0_modifiers) >= 0) 7216*5f757f3fSDimitry Andric NewInstr.addImm(0); 7217*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0) >= 0) 7218*5f757f3fSDimitry Andric NewInstr->addOperand(Inst.getOperand(1)); 7219*5f757f3fSDimitry Andric 7220*5f757f3fSDimitry Andric if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 7221*5f757f3fSDimitry Andric // We are converting these to a BFE, so we need to add the missing 7222*5f757f3fSDimitry Andric // operands for the size and offset. 7223*5f757f3fSDimitry Andric unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 7224*5f757f3fSDimitry Andric NewInstr.addImm(0); 7225*5f757f3fSDimitry Andric NewInstr.addImm(Size); 7226*5f757f3fSDimitry Andric } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 7227*5f757f3fSDimitry Andric // The VALU version adds the second operand to the result, so insert an 7228*5f757f3fSDimitry Andric // extra 0 operand. 7229*5f757f3fSDimitry Andric NewInstr.addImm(0); 7230*5f757f3fSDimitry Andric } else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 7231*5f757f3fSDimitry Andric const MachineOperand &OffsetWidthOp = Inst.getOperand(2); 7232*5f757f3fSDimitry Andric // If we need to move this to VGPRs, we need to unpack the second 7233*5f757f3fSDimitry Andric // operand back into the 2 separate ones for bit offset and width. 7234*5f757f3fSDimitry Andric assert(OffsetWidthOp.isImm() && 7235*5f757f3fSDimitry Andric "Scalar BFE is only implemented for constant width and offset"); 7236*5f757f3fSDimitry Andric uint32_t Imm = OffsetWidthOp.getImm(); 7237*5f757f3fSDimitry Andric 7238*5f757f3fSDimitry Andric uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 7239*5f757f3fSDimitry Andric uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 7240*5f757f3fSDimitry Andric NewInstr.addImm(Offset); 7241*5f757f3fSDimitry Andric NewInstr.addImm(BitWidth); 7242*5f757f3fSDimitry Andric } else { 7243*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, 7244*5f757f3fSDimitry Andric AMDGPU::OpName::src1_modifiers) >= 0) 7245*5f757f3fSDimitry Andric NewInstr.addImm(0); 7246*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0) 7247*5f757f3fSDimitry Andric NewInstr->addOperand(Inst.getOperand(2)); 7248*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, 7249*5f757f3fSDimitry Andric AMDGPU::OpName::src2_modifiers) >= 0) 7250*5f757f3fSDimitry Andric NewInstr.addImm(0); 7251*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0) 7252*5f757f3fSDimitry Andric NewInstr->addOperand(Inst.getOperand(3)); 7253*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0) 7254*5f757f3fSDimitry Andric NewInstr.addImm(0); 7255*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0) 7256*5f757f3fSDimitry Andric NewInstr.addImm(0); 7257*5f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0) 7258*5f757f3fSDimitry Andric NewInstr.addImm(0); 7259*5f757f3fSDimitry Andric } 7260*5f757f3fSDimitry Andric } else { 7261*5f757f3fSDimitry Andric // Just copy the SALU operands. 7262bdd1243dSDimitry Andric for (const MachineOperand &Op : Inst.explicit_operands()) 7263bdd1243dSDimitry Andric NewInstr->addOperand(Op); 7264*5f757f3fSDimitry Andric } 7265*5f757f3fSDimitry Andric 7266bdd1243dSDimitry Andric // Remove any references to SCC. Vector instructions can't read from it, and 7267bdd1243dSDimitry Andric // We're just about to add the implicit use / defs of VCC, and we don't want 7268bdd1243dSDimitry Andric // both. 7269bdd1243dSDimitry Andric for (MachineOperand &Op : Inst.implicit_operands()) { 7270bdd1243dSDimitry Andric if (Op.getReg() == AMDGPU::SCC) { 7271bdd1243dSDimitry Andric // Only propagate through live-def of SCC. 7272bdd1243dSDimitry Andric if (Op.isDef() && !Op.isDead()) 7273bdd1243dSDimitry Andric addSCCDefUsersToVALUWorklist(Op, Inst, Worklist); 7274bdd1243dSDimitry Andric if (Op.isUse()) 7275bdd1243dSDimitry Andric addSCCDefsToVALUWorklist(NewInstr, Worklist); 7276bdd1243dSDimitry Andric } 7277bdd1243dSDimitry Andric } 7278bdd1243dSDimitry Andric Inst.eraseFromParent(); 7279bdd1243dSDimitry Andric Register NewDstReg; 7280bdd1243dSDimitry Andric if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) { 7281bdd1243dSDimitry Andric Register DstReg = NewInstr->getOperand(0).getReg(); 7282bdd1243dSDimitry Andric assert(DstReg.isVirtual()); 7283bdd1243dSDimitry Andric // Update the destination register class. 728406c3fb27SDimitry Andric const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*NewInstr); 7285bdd1243dSDimitry Andric assert(NewDstRC); 72860b57cec5SDimitry Andric NewDstReg = MRI.createVirtualRegister(NewDstRC); 72870b57cec5SDimitry Andric MRI.replaceRegWith(DstReg, NewDstReg); 72880b57cec5SDimitry Andric } 7289bdd1243dSDimitry Andric fixImplicitOperands(*NewInstr); 72900b57cec5SDimitry Andric // Legalize the operands 729106c3fb27SDimitry Andric legalizeOperands(*NewInstr, MDT); 7292bdd1243dSDimitry Andric if (NewDstReg) 72930b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); 72940b57cec5SDimitry Andric } 72950b57cec5SDimitry Andric 72960b57cec5SDimitry Andric // Add/sub require special handling to deal with carry outs. 7297e8d8bef9SDimitry Andric std::pair<bool, MachineBasicBlock *> 729806c3fb27SDimitry Andric SIInstrInfo::moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 72990b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 73000b57cec5SDimitry Andric if (ST.hasAddNoCarry()) { 73010b57cec5SDimitry Andric // Assume there is no user of scc since we don't select this in that case. 73020b57cec5SDimitry Andric // Since scc isn't used, it doesn't really matter if the i32 or u32 variant 73030b57cec5SDimitry Andric // is used. 73040b57cec5SDimitry Andric 73050b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 73060b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 73070b57cec5SDimitry Andric 73088bcb0991SDimitry Andric Register OldDstReg = Inst.getOperand(0).getReg(); 73098bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 73100b57cec5SDimitry Andric 73110b57cec5SDimitry Andric unsigned Opc = Inst.getOpcode(); 73120b57cec5SDimitry Andric assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32); 73130b57cec5SDimitry Andric 73140b57cec5SDimitry Andric unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ? 73150b57cec5SDimitry Andric AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64; 73160b57cec5SDimitry Andric 73170b57cec5SDimitry Andric assert(Inst.getOperand(3).getReg() == AMDGPU::SCC); 731881ad6265SDimitry Andric Inst.removeOperand(3); 73190b57cec5SDimitry Andric 73200b57cec5SDimitry Andric Inst.setDesc(get(NewOpc)); 73210b57cec5SDimitry Andric Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit 73220b57cec5SDimitry Andric Inst.addImplicitDefUseOperands(*MBB.getParent()); 73230b57cec5SDimitry Andric MRI.replaceRegWith(OldDstReg, ResultReg); 7324e8d8bef9SDimitry Andric MachineBasicBlock *NewBB = legalizeOperands(Inst, MDT); 73250b57cec5SDimitry Andric 73260b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 7327bdd1243dSDimitry Andric return std::pair(true, NewBB); 73280b57cec5SDimitry Andric } 73290b57cec5SDimitry Andric 7330bdd1243dSDimitry Andric return std::pair(false, nullptr); 73310b57cec5SDimitry Andric } 73320b57cec5SDimitry Andric 733306c3fb27SDimitry Andric void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, 73345ffd83dbSDimitry Andric MachineDominatorTree *MDT) const { 73355ffd83dbSDimitry Andric 73365ffd83dbSDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 73375ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 73385ffd83dbSDimitry Andric MachineBasicBlock::iterator MII = Inst; 73395ffd83dbSDimitry Andric DebugLoc DL = Inst.getDebugLoc(); 73405ffd83dbSDimitry Andric 73415ffd83dbSDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 73425ffd83dbSDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 73435ffd83dbSDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 73445ffd83dbSDimitry Andric MachineOperand &Cond = Inst.getOperand(3); 73455ffd83dbSDimitry Andric 7346*5f757f3fSDimitry Andric Register CondReg = Cond.getReg(); 7347*5f757f3fSDimitry Andric bool IsSCC = (CondReg == AMDGPU::SCC); 7348349cc55cSDimitry Andric 7349349cc55cSDimitry Andric // If this is a trivial select where the condition is effectively not SCC 7350*5f757f3fSDimitry Andric // (CondReg is a source of copy to SCC), then the select is semantically 7351*5f757f3fSDimitry Andric // equivalent to copying CondReg. Hence, there is no need to create 7352349cc55cSDimitry Andric // V_CNDMASK, we can just use that and bail out. 7353349cc55cSDimitry Andric if (!IsSCC && Src0.isImm() && (Src0.getImm() == -1) && Src1.isImm() && 7354349cc55cSDimitry Andric (Src1.getImm() == 0)) { 7355*5f757f3fSDimitry Andric MRI.replaceRegWith(Dest.getReg(), CondReg); 7356349cc55cSDimitry Andric return; 7357349cc55cSDimitry Andric } 7358349cc55cSDimitry Andric 7359*5f757f3fSDimitry Andric Register NewCondReg = CondReg; 7360*5f757f3fSDimitry Andric if (IsSCC) { 7361349cc55cSDimitry Andric const TargetRegisterClass *TC = 7362349cc55cSDimitry Andric RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 7363*5f757f3fSDimitry Andric NewCondReg = MRI.createVirtualRegister(TC); 7364349cc55cSDimitry Andric 7365349cc55cSDimitry Andric // Now look for the closest SCC def if it is a copy 7366*5f757f3fSDimitry Andric // replacing the CondReg with the COPY source register 7367349cc55cSDimitry Andric bool CopyFound = false; 73685ffd83dbSDimitry Andric for (MachineInstr &CandI : 73695ffd83dbSDimitry Andric make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)), 73705ffd83dbSDimitry Andric Inst.getParent()->rend())) { 73715ffd83dbSDimitry Andric if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != 73725ffd83dbSDimitry Andric -1) { 73735ffd83dbSDimitry Andric if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) { 7374*5f757f3fSDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::COPY), NewCondReg) 7375349cc55cSDimitry Andric .addReg(CandI.getOperand(1).getReg()); 7376349cc55cSDimitry Andric CopyFound = true; 73775ffd83dbSDimitry Andric } 73785ffd83dbSDimitry Andric break; 73795ffd83dbSDimitry Andric } 73805ffd83dbSDimitry Andric } 7381349cc55cSDimitry Andric if (!CopyFound) { 7382349cc55cSDimitry Andric // SCC def is not a copy 73835ffd83dbSDimitry Andric // Insert a trivial select instead of creating a copy, because a copy from 73845ffd83dbSDimitry Andric // SCC would semantically mean just copying a single bit, but we may need 73855ffd83dbSDimitry Andric // the result to be a vector condition mask that needs preserving. 73865ffd83dbSDimitry Andric unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64 73875ffd83dbSDimitry Andric : AMDGPU::S_CSELECT_B32; 73885ffd83dbSDimitry Andric auto NewSelect = 7389*5f757f3fSDimitry Andric BuildMI(MBB, MII, DL, get(Opcode), NewCondReg).addImm(-1).addImm(0); 73905ffd83dbSDimitry Andric NewSelect->getOperand(3).setIsUndef(Cond.isUndef()); 7391349cc55cSDimitry Andric } 73925ffd83dbSDimitry Andric } 73935ffd83dbSDimitry Andric 7394*5f757f3fSDimitry Andric Register NewDestReg = MRI.createVirtualRegister( 7395*5f757f3fSDimitry Andric RI.getEquivalentVGPRClass(MRI.getRegClass(Dest.getReg()))); 7396*5f757f3fSDimitry Andric MachineInstr *NewInst; 7397*5f757f3fSDimitry Andric if (Inst.getOpcode() == AMDGPU::S_CSELECT_B32) { 7398*5f757f3fSDimitry Andric NewInst = BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg) 73995ffd83dbSDimitry Andric .addImm(0) 74005ffd83dbSDimitry Andric .add(Src1) // False 74015ffd83dbSDimitry Andric .addImm(0) 74025ffd83dbSDimitry Andric .add(Src0) // True 7403*5f757f3fSDimitry Andric .addReg(NewCondReg); 7404*5f757f3fSDimitry Andric } else { 7405*5f757f3fSDimitry Andric NewInst = 7406*5f757f3fSDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B64_PSEUDO), NewDestReg) 7407*5f757f3fSDimitry Andric .add(Src1) // False 7408*5f757f3fSDimitry Andric .add(Src0) // True 7409*5f757f3fSDimitry Andric .addReg(NewCondReg); 7410*5f757f3fSDimitry Andric } 7411*5f757f3fSDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDestReg); 7412*5f757f3fSDimitry Andric legalizeOperands(*NewInst, MDT); 7413*5f757f3fSDimitry Andric addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist); 74145ffd83dbSDimitry Andric } 74155ffd83dbSDimitry Andric 741606c3fb27SDimitry Andric void SIInstrInfo::lowerScalarAbs(SIInstrWorklist &Worklist, 74170b57cec5SDimitry Andric MachineInstr &Inst) const { 74180b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 74190b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 74200b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 74210b57cec5SDimitry Andric DebugLoc DL = Inst.getDebugLoc(); 74220b57cec5SDimitry Andric 74230b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 74240b57cec5SDimitry Andric MachineOperand &Src = Inst.getOperand(1); 74258bcb0991SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 74268bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 74270b57cec5SDimitry Andric 74280b57cec5SDimitry Andric unsigned SubOp = ST.hasAddNoCarry() ? 7429e8d8bef9SDimitry Andric AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32; 74300b57cec5SDimitry Andric 74310b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(SubOp), TmpReg) 74320b57cec5SDimitry Andric .addImm(0) 74330b57cec5SDimitry Andric .addReg(Src.getReg()); 74340b57cec5SDimitry Andric 74350b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) 74360b57cec5SDimitry Andric .addReg(Src.getReg()) 74370b57cec5SDimitry Andric .addReg(TmpReg); 74380b57cec5SDimitry Andric 74390b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 74400b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 74410b57cec5SDimitry Andric } 74420b57cec5SDimitry Andric 744306c3fb27SDimitry Andric void SIInstrInfo::lowerScalarXnor(SIInstrWorklist &Worklist, 74440b57cec5SDimitry Andric MachineInstr &Inst) const { 74450b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 74460b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 74470b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 74480b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 74490b57cec5SDimitry Andric 74500b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 74510b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 74520b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 74530b57cec5SDimitry Andric 74540b57cec5SDimitry Andric if (ST.hasDLInsts()) { 74558bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 74560b57cec5SDimitry Andric legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); 74570b57cec5SDimitry Andric legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); 74580b57cec5SDimitry Andric 74590b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest) 74600b57cec5SDimitry Andric .add(Src0) 74610b57cec5SDimitry Andric .add(Src1); 74620b57cec5SDimitry Andric 74630b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 74640b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); 74650b57cec5SDimitry Andric } else { 74660b57cec5SDimitry Andric // Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can 74670b57cec5SDimitry Andric // invert either source and then perform the XOR. If either source is a 74680b57cec5SDimitry Andric // scalar register, then we can leave the inversion on the scalar unit to 746981ad6265SDimitry Andric // achieve a better distribution of scalar and vector instructions. 74700b57cec5SDimitry Andric bool Src0IsSGPR = Src0.isReg() && 74710b57cec5SDimitry Andric RI.isSGPRClass(MRI.getRegClass(Src0.getReg())); 74720b57cec5SDimitry Andric bool Src1IsSGPR = Src1.isReg() && 74730b57cec5SDimitry Andric RI.isSGPRClass(MRI.getRegClass(Src1.getReg())); 74740b57cec5SDimitry Andric MachineInstr *Xor; 74758bcb0991SDimitry Andric Register Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 74768bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 74770b57cec5SDimitry Andric 74780b57cec5SDimitry Andric // Build a pair of scalar instructions and add them to the work list. 74790b57cec5SDimitry Andric // The next iteration over the work list will lower these to the vector 74800b57cec5SDimitry Andric // unit as necessary. 74810b57cec5SDimitry Andric if (Src0IsSGPR) { 74820b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src0); 74830b57cec5SDimitry Andric Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest) 74840b57cec5SDimitry Andric .addReg(Temp) 74850b57cec5SDimitry Andric .add(Src1); 74860b57cec5SDimitry Andric } else if (Src1IsSGPR) { 74870b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src1); 74880b57cec5SDimitry Andric Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest) 74890b57cec5SDimitry Andric .add(Src0) 74900b57cec5SDimitry Andric .addReg(Temp); 74910b57cec5SDimitry Andric } else { 74920b57cec5SDimitry Andric Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp) 74930b57cec5SDimitry Andric .add(Src0) 74940b57cec5SDimitry Andric .add(Src1); 74950b57cec5SDimitry Andric MachineInstr *Not = 74960b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest).addReg(Temp); 74970b57cec5SDimitry Andric Worklist.insert(Not); 74980b57cec5SDimitry Andric } 74990b57cec5SDimitry Andric 75000b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 75010b57cec5SDimitry Andric 75020b57cec5SDimitry Andric Worklist.insert(Xor); 75030b57cec5SDimitry Andric 75040b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); 75050b57cec5SDimitry Andric } 75060b57cec5SDimitry Andric } 75070b57cec5SDimitry Andric 750806c3fb27SDimitry Andric void SIInstrInfo::splitScalarNotBinop(SIInstrWorklist &Worklist, 75090b57cec5SDimitry Andric MachineInstr &Inst, 75100b57cec5SDimitry Andric unsigned Opcode) const { 75110b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 75120b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 75130b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 75140b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 75150b57cec5SDimitry Andric 75160b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 75170b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 75180b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 75190b57cec5SDimitry Andric 75208bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 75218bcb0991SDimitry Andric Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 75220b57cec5SDimitry Andric 75230b57cec5SDimitry Andric MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm) 75240b57cec5SDimitry Andric .add(Src0) 75250b57cec5SDimitry Andric .add(Src1); 75260b57cec5SDimitry Andric 75270b57cec5SDimitry Andric MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest) 75280b57cec5SDimitry Andric .addReg(Interm); 75290b57cec5SDimitry Andric 75300b57cec5SDimitry Andric Worklist.insert(&Op); 75310b57cec5SDimitry Andric Worklist.insert(&Not); 75320b57cec5SDimitry Andric 75330b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 75340b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); 75350b57cec5SDimitry Andric } 75360b57cec5SDimitry Andric 753706c3fb27SDimitry Andric void SIInstrInfo::splitScalarBinOpN2(SIInstrWorklist &Worklist, 75380b57cec5SDimitry Andric MachineInstr &Inst, 75390b57cec5SDimitry Andric unsigned Opcode) const { 75400b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 75410b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 75420b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 75430b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 75440b57cec5SDimitry Andric 75450b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 75460b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 75470b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 75480b57cec5SDimitry Andric 75498bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 75508bcb0991SDimitry Andric Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 75510b57cec5SDimitry Andric 75520b57cec5SDimitry Andric MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm) 75530b57cec5SDimitry Andric .add(Src1); 75540b57cec5SDimitry Andric 75550b57cec5SDimitry Andric MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest) 75560b57cec5SDimitry Andric .add(Src0) 75570b57cec5SDimitry Andric .addReg(Interm); 75580b57cec5SDimitry Andric 75590b57cec5SDimitry Andric Worklist.insert(&Not); 75600b57cec5SDimitry Andric Worklist.insert(&Op); 75610b57cec5SDimitry Andric 75620b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 75630b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); 75640b57cec5SDimitry Andric } 75650b57cec5SDimitry Andric 756606c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, 756706c3fb27SDimitry Andric MachineInstr &Inst, unsigned Opcode, 756806c3fb27SDimitry Andric bool Swap) const { 75690b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 75700b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 75710b57cec5SDimitry Andric 75720b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 75730b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 75740b57cec5SDimitry Andric DebugLoc DL = Inst.getDebugLoc(); 75750b57cec5SDimitry Andric 75760b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 75770b57cec5SDimitry Andric 75780b57cec5SDimitry Andric const MCInstrDesc &InstDesc = get(Opcode); 75790b57cec5SDimitry Andric const TargetRegisterClass *Src0RC = Src0.isReg() ? 75800b57cec5SDimitry Andric MRI.getRegClass(Src0.getReg()) : 75810b57cec5SDimitry Andric &AMDGPU::SGPR_32RegClass; 75820b57cec5SDimitry Andric 7583bdd1243dSDimitry Andric const TargetRegisterClass *Src0SubRC = 7584bdd1243dSDimitry Andric RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); 75850b57cec5SDimitry Andric 75860b57cec5SDimitry Andric MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 75870b57cec5SDimitry Andric AMDGPU::sub0, Src0SubRC); 75880b57cec5SDimitry Andric 75890b57cec5SDimitry Andric const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 75900b57cec5SDimitry Andric const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 7591bdd1243dSDimitry Andric const TargetRegisterClass *NewDestSubRC = 7592bdd1243dSDimitry Andric RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0); 75930b57cec5SDimitry Andric 75948bcb0991SDimitry Andric Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 75950b57cec5SDimitry Andric MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); 75960b57cec5SDimitry Andric 75970b57cec5SDimitry Andric MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 75980b57cec5SDimitry Andric AMDGPU::sub1, Src0SubRC); 75990b57cec5SDimitry Andric 76008bcb0991SDimitry Andric Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 76010b57cec5SDimitry Andric MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1); 76020b57cec5SDimitry Andric 7603fe6060f1SDimitry Andric if (Swap) 7604fe6060f1SDimitry Andric std::swap(DestSub0, DestSub1); 7605fe6060f1SDimitry Andric 76068bcb0991SDimitry Andric Register FullDestReg = MRI.createVirtualRegister(NewDestRC); 76070b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 76080b57cec5SDimitry Andric .addReg(DestSub0) 76090b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 76100b57cec5SDimitry Andric .addReg(DestSub1) 76110b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 76120b57cec5SDimitry Andric 76130b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), FullDestReg); 76140b57cec5SDimitry Andric 76150b57cec5SDimitry Andric Worklist.insert(&LoHalf); 76160b57cec5SDimitry Andric Worklist.insert(&HiHalf); 76170b57cec5SDimitry Andric 76180b57cec5SDimitry Andric // We don't need to legalizeOperands here because for a single operand, src0 76190b57cec5SDimitry Andric // will support any kind of input. 76200b57cec5SDimitry Andric 76210b57cec5SDimitry Andric // Move all users of this moved value. 76220b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 76230b57cec5SDimitry Andric } 76240b57cec5SDimitry Andric 762506c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, 76260b57cec5SDimitry Andric MachineInstr &Inst, unsigned Opcode, 76270b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 76280b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 76290b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 76300b57cec5SDimitry Andric 76310b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 76320b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 76330b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 76340b57cec5SDimitry Andric DebugLoc DL = Inst.getDebugLoc(); 76350b57cec5SDimitry Andric 76360b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 76370b57cec5SDimitry Andric 76380b57cec5SDimitry Andric const MCInstrDesc &InstDesc = get(Opcode); 76390b57cec5SDimitry Andric const TargetRegisterClass *Src0RC = Src0.isReg() ? 76400b57cec5SDimitry Andric MRI.getRegClass(Src0.getReg()) : 76410b57cec5SDimitry Andric &AMDGPU::SGPR_32RegClass; 76420b57cec5SDimitry Andric 7643bdd1243dSDimitry Andric const TargetRegisterClass *Src0SubRC = 7644bdd1243dSDimitry Andric RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); 76450b57cec5SDimitry Andric const TargetRegisterClass *Src1RC = Src1.isReg() ? 76460b57cec5SDimitry Andric MRI.getRegClass(Src1.getReg()) : 76470b57cec5SDimitry Andric &AMDGPU::SGPR_32RegClass; 76480b57cec5SDimitry Andric 7649bdd1243dSDimitry Andric const TargetRegisterClass *Src1SubRC = 7650bdd1243dSDimitry Andric RI.getSubRegisterClass(Src1RC, AMDGPU::sub0); 76510b57cec5SDimitry Andric 76520b57cec5SDimitry Andric MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 76530b57cec5SDimitry Andric AMDGPU::sub0, Src0SubRC); 76540b57cec5SDimitry Andric MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 76550b57cec5SDimitry Andric AMDGPU::sub0, Src1SubRC); 76560b57cec5SDimitry Andric MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 76570b57cec5SDimitry Andric AMDGPU::sub1, Src0SubRC); 76580b57cec5SDimitry Andric MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 76590b57cec5SDimitry Andric AMDGPU::sub1, Src1SubRC); 76600b57cec5SDimitry Andric 76610b57cec5SDimitry Andric const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 76620b57cec5SDimitry Andric const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 7663bdd1243dSDimitry Andric const TargetRegisterClass *NewDestSubRC = 7664bdd1243dSDimitry Andric RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0); 76650b57cec5SDimitry Andric 76668bcb0991SDimitry Andric Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 76670b57cec5SDimitry Andric MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) 76680b57cec5SDimitry Andric .add(SrcReg0Sub0) 76690b57cec5SDimitry Andric .add(SrcReg1Sub0); 76700b57cec5SDimitry Andric 76718bcb0991SDimitry Andric Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 76720b57cec5SDimitry Andric MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) 76730b57cec5SDimitry Andric .add(SrcReg0Sub1) 76740b57cec5SDimitry Andric .add(SrcReg1Sub1); 76750b57cec5SDimitry Andric 76768bcb0991SDimitry Andric Register FullDestReg = MRI.createVirtualRegister(NewDestRC); 76770b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 76780b57cec5SDimitry Andric .addReg(DestSub0) 76790b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 76800b57cec5SDimitry Andric .addReg(DestSub1) 76810b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 76820b57cec5SDimitry Andric 76830b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), FullDestReg); 76840b57cec5SDimitry Andric 76850b57cec5SDimitry Andric Worklist.insert(&LoHalf); 76860b57cec5SDimitry Andric Worklist.insert(&HiHalf); 76870b57cec5SDimitry Andric 768881ad6265SDimitry Andric // Move all users of this moved value. 76890b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 76900b57cec5SDimitry Andric } 76910b57cec5SDimitry Andric 769206c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitXnor(SIInstrWorklist &Worklist, 76930b57cec5SDimitry Andric MachineInstr &Inst, 76940b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 76950b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 76960b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 76970b57cec5SDimitry Andric 76980b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 76990b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 77000b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 77010b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 77020b57cec5SDimitry Andric 77030b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 77040b57cec5SDimitry Andric 77050b57cec5SDimitry Andric const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 77060b57cec5SDimitry Andric 77078bcb0991SDimitry Andric Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 77080b57cec5SDimitry Andric 77090b57cec5SDimitry Andric MachineOperand* Op0; 77100b57cec5SDimitry Andric MachineOperand* Op1; 77110b57cec5SDimitry Andric 77120b57cec5SDimitry Andric if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) { 77130b57cec5SDimitry Andric Op0 = &Src0; 77140b57cec5SDimitry Andric Op1 = &Src1; 77150b57cec5SDimitry Andric } else { 77160b57cec5SDimitry Andric Op0 = &Src1; 77170b57cec5SDimitry Andric Op1 = &Src0; 77180b57cec5SDimitry Andric } 77190b57cec5SDimitry Andric 77200b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm) 77210b57cec5SDimitry Andric .add(*Op0); 77220b57cec5SDimitry Andric 77238bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(DestRC); 77240b57cec5SDimitry Andric 77250b57cec5SDimitry Andric MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest) 77260b57cec5SDimitry Andric .addReg(Interm) 77270b57cec5SDimitry Andric .add(*Op1); 77280b57cec5SDimitry Andric 77290b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 77300b57cec5SDimitry Andric 77310b57cec5SDimitry Andric Worklist.insert(&Xor); 77320b57cec5SDimitry Andric } 77330b57cec5SDimitry Andric 773406c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitBCNT(SIInstrWorklist &Worklist, 773506c3fb27SDimitry Andric MachineInstr &Inst) const { 77360b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 77370b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 77380b57cec5SDimitry Andric 77390b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 77400b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 77410b57cec5SDimitry Andric 77420b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 77430b57cec5SDimitry Andric MachineOperand &Src = Inst.getOperand(1); 77440b57cec5SDimitry Andric 77450b57cec5SDimitry Andric const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); 77460b57cec5SDimitry Andric const TargetRegisterClass *SrcRC = Src.isReg() ? 77470b57cec5SDimitry Andric MRI.getRegClass(Src.getReg()) : 77480b57cec5SDimitry Andric &AMDGPU::SGPR_32RegClass; 77490b57cec5SDimitry Andric 77508bcb0991SDimitry Andric Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 77518bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 77520b57cec5SDimitry Andric 7753bdd1243dSDimitry Andric const TargetRegisterClass *SrcSubRC = 7754bdd1243dSDimitry Andric RI.getSubRegisterClass(SrcRC, AMDGPU::sub0); 77550b57cec5SDimitry Andric 77560b57cec5SDimitry Andric MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 77570b57cec5SDimitry Andric AMDGPU::sub0, SrcSubRC); 77580b57cec5SDimitry Andric MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 77590b57cec5SDimitry Andric AMDGPU::sub1, SrcSubRC); 77600b57cec5SDimitry Andric 77610b57cec5SDimitry Andric BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0); 77620b57cec5SDimitry Andric 77630b57cec5SDimitry Andric BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg); 77640b57cec5SDimitry Andric 77650b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 77660b57cec5SDimitry Andric 776781ad6265SDimitry Andric // We don't need to legalize operands here. src0 for either instruction can be 77680b57cec5SDimitry Andric // an SGPR, and the second input is unused or determined here. 77690b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 77700b57cec5SDimitry Andric } 77710b57cec5SDimitry Andric 777206c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist, 77730b57cec5SDimitry Andric MachineInstr &Inst) const { 77740b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 77750b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 77760b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 77770b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 77780b57cec5SDimitry Andric 77790b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 77800b57cec5SDimitry Andric uint32_t Imm = Inst.getOperand(2).getImm(); 77810b57cec5SDimitry Andric uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 77820b57cec5SDimitry Andric uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 77830b57cec5SDimitry Andric 77840b57cec5SDimitry Andric (void) Offset; 77850b57cec5SDimitry Andric 77860b57cec5SDimitry Andric // Only sext_inreg cases handled. 77870b57cec5SDimitry Andric assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && 77880b57cec5SDimitry Andric Offset == 0 && "Not implemented"); 77890b57cec5SDimitry Andric 77900b57cec5SDimitry Andric if (BitWidth < 32) { 77918bcb0991SDimitry Andric Register MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 77928bcb0991SDimitry Andric Register MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 77938bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 77940b57cec5SDimitry Andric 7795e8d8bef9SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32_e64), MidRegLo) 77960b57cec5SDimitry Andric .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) 77970b57cec5SDimitry Andric .addImm(0) 77980b57cec5SDimitry Andric .addImm(BitWidth); 77990b57cec5SDimitry Andric 78000b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) 78010b57cec5SDimitry Andric .addImm(31) 78020b57cec5SDimitry Andric .addReg(MidRegLo); 78030b57cec5SDimitry Andric 78040b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 78050b57cec5SDimitry Andric .addReg(MidRegLo) 78060b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 78070b57cec5SDimitry Andric .addReg(MidRegHi) 78080b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 78090b57cec5SDimitry Andric 78100b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 78110b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 78120b57cec5SDimitry Andric return; 78130b57cec5SDimitry Andric } 78140b57cec5SDimitry Andric 78150b57cec5SDimitry Andric MachineOperand &Src = Inst.getOperand(1); 78168bcb0991SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 78178bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 78180b57cec5SDimitry Andric 78190b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) 78200b57cec5SDimitry Andric .addImm(31) 78210b57cec5SDimitry Andric .addReg(Src.getReg(), 0, AMDGPU::sub0); 78220b57cec5SDimitry Andric 78230b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 78240b57cec5SDimitry Andric .addReg(Src.getReg(), 0, AMDGPU::sub0) 78250b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 78260b57cec5SDimitry Andric .addReg(TmpReg) 78270b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 78280b57cec5SDimitry Andric 78290b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 78300b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 78310b57cec5SDimitry Andric } 78320b57cec5SDimitry Andric 78330b57cec5SDimitry Andric void SIInstrInfo::addUsersToMoveToVALUWorklist( 783406c3fb27SDimitry Andric Register DstReg, MachineRegisterInfo &MRI, 783506c3fb27SDimitry Andric SIInstrWorklist &Worklist) const { 78360b57cec5SDimitry Andric for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), 78370b57cec5SDimitry Andric E = MRI.use_end(); I != E;) { 78380b57cec5SDimitry Andric MachineInstr &UseMI = *I->getParent(); 78390b57cec5SDimitry Andric 78400b57cec5SDimitry Andric unsigned OpNo = 0; 78410b57cec5SDimitry Andric 78420b57cec5SDimitry Andric switch (UseMI.getOpcode()) { 78430b57cec5SDimitry Andric case AMDGPU::COPY: 78440b57cec5SDimitry Andric case AMDGPU::WQM: 78458bcb0991SDimitry Andric case AMDGPU::SOFT_WQM: 7846fe6060f1SDimitry Andric case AMDGPU::STRICT_WWM: 7847fe6060f1SDimitry Andric case AMDGPU::STRICT_WQM: 78480b57cec5SDimitry Andric case AMDGPU::REG_SEQUENCE: 78490b57cec5SDimitry Andric case AMDGPU::PHI: 78500b57cec5SDimitry Andric case AMDGPU::INSERT_SUBREG: 78510b57cec5SDimitry Andric break; 78520b57cec5SDimitry Andric default: 78530b57cec5SDimitry Andric OpNo = I.getOperandNo(); 78540b57cec5SDimitry Andric break; 78550b57cec5SDimitry Andric } 78560b57cec5SDimitry Andric 78570b57cec5SDimitry Andric if (!RI.hasVectorRegisters(getOpRegClass(UseMI, OpNo))) { 78580b57cec5SDimitry Andric Worklist.insert(&UseMI); 78590b57cec5SDimitry Andric 78600b57cec5SDimitry Andric do { 78610b57cec5SDimitry Andric ++I; 78620b57cec5SDimitry Andric } while (I != E && I->getParent() == &UseMI); 78630b57cec5SDimitry Andric } else { 78640b57cec5SDimitry Andric ++I; 78650b57cec5SDimitry Andric } 78660b57cec5SDimitry Andric } 78670b57cec5SDimitry Andric } 78680b57cec5SDimitry Andric 786906c3fb27SDimitry Andric void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist, 78700b57cec5SDimitry Andric MachineRegisterInfo &MRI, 78710b57cec5SDimitry Andric MachineInstr &Inst) const { 78728bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 78730b57cec5SDimitry Andric MachineBasicBlock *MBB = Inst.getParent(); 78740b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 78750b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 78760b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 78770b57cec5SDimitry Andric 78780b57cec5SDimitry Andric switch (Inst.getOpcode()) { 78790b57cec5SDimitry Andric case AMDGPU::S_PACK_LL_B32_B16: { 78808bcb0991SDimitry Andric Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 78818bcb0991SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 78820b57cec5SDimitry Andric 78830b57cec5SDimitry Andric // FIXME: Can do a lot better if we know the high bits of src0 or src1 are 78840b57cec5SDimitry Andric // 0. 78850b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) 78860b57cec5SDimitry Andric .addImm(0xffff); 78870b57cec5SDimitry Andric 78880b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg) 78890b57cec5SDimitry Andric .addReg(ImmReg, RegState::Kill) 78900b57cec5SDimitry Andric .add(Src0); 78910b57cec5SDimitry Andric 7892e8d8bef9SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32_e64), ResultReg) 78930b57cec5SDimitry Andric .add(Src1) 78940b57cec5SDimitry Andric .addImm(16) 78950b57cec5SDimitry Andric .addReg(TmpReg, RegState::Kill); 78960b57cec5SDimitry Andric break; 78970b57cec5SDimitry Andric } 78980b57cec5SDimitry Andric case AMDGPU::S_PACK_LH_B32_B16: { 78998bcb0991SDimitry Andric Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 79000b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) 79010b57cec5SDimitry Andric .addImm(0xffff); 7902e8d8bef9SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32_e64), ResultReg) 79030b57cec5SDimitry Andric .addReg(ImmReg, RegState::Kill) 79040b57cec5SDimitry Andric .add(Src0) 79050b57cec5SDimitry Andric .add(Src1); 79060b57cec5SDimitry Andric break; 79070b57cec5SDimitry Andric } 790881ad6265SDimitry Andric case AMDGPU::S_PACK_HL_B32_B16: { 790981ad6265SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 791081ad6265SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) 791181ad6265SDimitry Andric .addImm(16) 791281ad6265SDimitry Andric .add(Src0); 791381ad6265SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32_e64), ResultReg) 791481ad6265SDimitry Andric .add(Src1) 791581ad6265SDimitry Andric .addImm(16) 791681ad6265SDimitry Andric .addReg(TmpReg, RegState::Kill); 791781ad6265SDimitry Andric break; 791881ad6265SDimitry Andric } 79190b57cec5SDimitry Andric case AMDGPU::S_PACK_HH_B32_B16: { 79208bcb0991SDimitry Andric Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 79218bcb0991SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 79220b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) 79230b57cec5SDimitry Andric .addImm(16) 79240b57cec5SDimitry Andric .add(Src0); 79250b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) 79260b57cec5SDimitry Andric .addImm(0xffff0000); 7927e8d8bef9SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32_e64), ResultReg) 79280b57cec5SDimitry Andric .add(Src1) 79290b57cec5SDimitry Andric .addReg(ImmReg, RegState::Kill) 79300b57cec5SDimitry Andric .addReg(TmpReg, RegState::Kill); 79310b57cec5SDimitry Andric break; 79320b57cec5SDimitry Andric } 79330b57cec5SDimitry Andric default: 79340b57cec5SDimitry Andric llvm_unreachable("unhandled s_pack_* instruction"); 79350b57cec5SDimitry Andric } 79360b57cec5SDimitry Andric 79370b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 79380b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 79390b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 79400b57cec5SDimitry Andric } 79410b57cec5SDimitry Andric 79420b57cec5SDimitry Andric void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op, 79430b57cec5SDimitry Andric MachineInstr &SCCDefInst, 794406c3fb27SDimitry Andric SIInstrWorklist &Worklist, 7945349cc55cSDimitry Andric Register NewCond) const { 79465ffd83dbSDimitry Andric 79470b57cec5SDimitry Andric // Ensure that def inst defines SCC, which is still live. 79480b57cec5SDimitry Andric assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() && 79490b57cec5SDimitry Andric !Op.isDead() && Op.getParent() == &SCCDefInst); 79505ffd83dbSDimitry Andric SmallVector<MachineInstr *, 4> CopyToDelete; 79510b57cec5SDimitry Andric // This assumes that all the users of SCC are in the same block 79520b57cec5SDimitry Andric // as the SCC def. 79530b57cec5SDimitry Andric for (MachineInstr &MI : // Skip the def inst itself. 79540b57cec5SDimitry Andric make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)), 79550b57cec5SDimitry Andric SCCDefInst.getParent()->end())) { 79560b57cec5SDimitry Andric // Check if SCC is used first. 7957349cc55cSDimitry Andric int SCCIdx = MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI); 7958349cc55cSDimitry Andric if (SCCIdx != -1) { 79595ffd83dbSDimitry Andric if (MI.isCopy()) { 79605ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 7961e8d8bef9SDimitry Andric Register DestReg = MI.getOperand(0).getReg(); 79625ffd83dbSDimitry Andric 7963349cc55cSDimitry Andric MRI.replaceRegWith(DestReg, NewCond); 79645ffd83dbSDimitry Andric CopyToDelete.push_back(&MI); 79655ffd83dbSDimitry Andric } else { 7966349cc55cSDimitry Andric 7967349cc55cSDimitry Andric if (NewCond.isValid()) 7968349cc55cSDimitry Andric MI.getOperand(SCCIdx).setReg(NewCond); 79695ffd83dbSDimitry Andric 79700b57cec5SDimitry Andric Worklist.insert(&MI); 79715ffd83dbSDimitry Andric } 79725ffd83dbSDimitry Andric } 79730b57cec5SDimitry Andric // Exit if we find another SCC def. 79740b57cec5SDimitry Andric if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1) 79755ffd83dbSDimitry Andric break; 79765ffd83dbSDimitry Andric } 79775ffd83dbSDimitry Andric for (auto &Copy : CopyToDelete) 79785ffd83dbSDimitry Andric Copy->eraseFromParent(); 79790b57cec5SDimitry Andric } 79800b57cec5SDimitry Andric 7981fe6060f1SDimitry Andric // Instructions that use SCC may be converted to VALU instructions. When that 7982fe6060f1SDimitry Andric // happens, the SCC register is changed to VCC_LO. The instruction that defines 7983fe6060f1SDimitry Andric // SCC must be changed to an instruction that defines VCC. This function makes 7984fe6060f1SDimitry Andric // sure that the instruction that defines SCC is added to the moveToVALU 7985fe6060f1SDimitry Andric // worklist. 7986bdd1243dSDimitry Andric void SIInstrInfo::addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, 798706c3fb27SDimitry Andric SIInstrWorklist &Worklist) const { 798881ad6265SDimitry Andric // Look for a preceding instruction that either defines VCC or SCC. If VCC 7989fe6060f1SDimitry Andric // then there is nothing to do because the defining instruction has been 7990fe6060f1SDimitry Andric // converted to a VALU already. If SCC then that instruction needs to be 7991fe6060f1SDimitry Andric // converted to a VALU. 7992fe6060f1SDimitry Andric for (MachineInstr &MI : 7993fe6060f1SDimitry Andric make_range(std::next(MachineBasicBlock::reverse_iterator(SCCUseInst)), 7994fe6060f1SDimitry Andric SCCUseInst->getParent()->rend())) { 7995fe6060f1SDimitry Andric if (MI.modifiesRegister(AMDGPU::VCC, &RI)) 7996fe6060f1SDimitry Andric break; 7997fe6060f1SDimitry Andric if (MI.definesRegister(AMDGPU::SCC, &RI)) { 7998fe6060f1SDimitry Andric Worklist.insert(&MI); 7999fe6060f1SDimitry Andric break; 8000fe6060f1SDimitry Andric } 8001fe6060f1SDimitry Andric } 8002fe6060f1SDimitry Andric } 8003fe6060f1SDimitry Andric 80040b57cec5SDimitry Andric const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( 80050b57cec5SDimitry Andric const MachineInstr &Inst) const { 80060b57cec5SDimitry Andric const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); 80070b57cec5SDimitry Andric 80080b57cec5SDimitry Andric switch (Inst.getOpcode()) { 80090b57cec5SDimitry Andric // For target instructions, getOpRegClass just returns the virtual register 80100b57cec5SDimitry Andric // class associated with the operand, so we need to find an equivalent VGPR 80110b57cec5SDimitry Andric // register class in order to move the instruction to the VALU. 80120b57cec5SDimitry Andric case AMDGPU::COPY: 80130b57cec5SDimitry Andric case AMDGPU::PHI: 80140b57cec5SDimitry Andric case AMDGPU::REG_SEQUENCE: 80150b57cec5SDimitry Andric case AMDGPU::INSERT_SUBREG: 80160b57cec5SDimitry Andric case AMDGPU::WQM: 80178bcb0991SDimitry Andric case AMDGPU::SOFT_WQM: 8018fe6060f1SDimitry Andric case AMDGPU::STRICT_WWM: 8019fe6060f1SDimitry Andric case AMDGPU::STRICT_WQM: { 80200b57cec5SDimitry Andric const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1); 80214824e7fdSDimitry Andric if (RI.isAGPRClass(SrcRC)) { 80224824e7fdSDimitry Andric if (RI.isAGPRClass(NewDstRC)) 80230b57cec5SDimitry Andric return nullptr; 80240b57cec5SDimitry Andric 80258bcb0991SDimitry Andric switch (Inst.getOpcode()) { 80268bcb0991SDimitry Andric case AMDGPU::PHI: 80278bcb0991SDimitry Andric case AMDGPU::REG_SEQUENCE: 80288bcb0991SDimitry Andric case AMDGPU::INSERT_SUBREG: 80290b57cec5SDimitry Andric NewDstRC = RI.getEquivalentAGPRClass(NewDstRC); 80308bcb0991SDimitry Andric break; 80318bcb0991SDimitry Andric default: 80328bcb0991SDimitry Andric NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 80338bcb0991SDimitry Andric } 80348bcb0991SDimitry Andric 80350b57cec5SDimitry Andric if (!NewDstRC) 80360b57cec5SDimitry Andric return nullptr; 80370b57cec5SDimitry Andric } else { 80384824e7fdSDimitry Andric if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) 80390b57cec5SDimitry Andric return nullptr; 80400b57cec5SDimitry Andric 80410b57cec5SDimitry Andric NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 80420b57cec5SDimitry Andric if (!NewDstRC) 80430b57cec5SDimitry Andric return nullptr; 80440b57cec5SDimitry Andric } 80450b57cec5SDimitry Andric 80460b57cec5SDimitry Andric return NewDstRC; 80470b57cec5SDimitry Andric } 80480b57cec5SDimitry Andric default: 80490b57cec5SDimitry Andric return NewDstRC; 80500b57cec5SDimitry Andric } 80510b57cec5SDimitry Andric } 80520b57cec5SDimitry Andric 80530b57cec5SDimitry Andric // Find the one SGPR operand we are allowed to use. 80545ffd83dbSDimitry Andric Register SIInstrInfo::findUsedSGPR(const MachineInstr &MI, 80550b57cec5SDimitry Andric int OpIndices[3]) const { 80560b57cec5SDimitry Andric const MCInstrDesc &Desc = MI.getDesc(); 80570b57cec5SDimitry Andric 80580b57cec5SDimitry Andric // Find the one SGPR operand we are allowed to use. 80590b57cec5SDimitry Andric // 80600b57cec5SDimitry Andric // First we need to consider the instruction's operand requirements before 80610b57cec5SDimitry Andric // legalizing. Some operands are required to be SGPRs, such as implicit uses 80620b57cec5SDimitry Andric // of VCC, but we are still bound by the constant bus requirement to only use 80630b57cec5SDimitry Andric // one. 80640b57cec5SDimitry Andric // 80650b57cec5SDimitry Andric // If the operand's class is an SGPR, we can never move it. 80660b57cec5SDimitry Andric 80675ffd83dbSDimitry Andric Register SGPRReg = findImplicitSGPRRead(MI); 8068bdd1243dSDimitry Andric if (SGPRReg) 80690b57cec5SDimitry Andric return SGPRReg; 80700b57cec5SDimitry Andric 8071bdd1243dSDimitry Andric Register UsedSGPRs[3] = {Register()}; 80720b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 80730b57cec5SDimitry Andric 80740b57cec5SDimitry Andric for (unsigned i = 0; i < 3; ++i) { 80750b57cec5SDimitry Andric int Idx = OpIndices[i]; 80760b57cec5SDimitry Andric if (Idx == -1) 80770b57cec5SDimitry Andric break; 80780b57cec5SDimitry Andric 80790b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(Idx); 80800b57cec5SDimitry Andric if (!MO.isReg()) 80810b57cec5SDimitry Andric continue; 80820b57cec5SDimitry Andric 80830b57cec5SDimitry Andric // Is this operand statically required to be an SGPR based on the operand 80840b57cec5SDimitry Andric // constraints? 8085bdd1243dSDimitry Andric const TargetRegisterClass *OpRC = 8086bdd1243dSDimitry Andric RI.getRegClass(Desc.operands()[Idx].RegClass); 80870b57cec5SDimitry Andric bool IsRequiredSGPR = RI.isSGPRClass(OpRC); 80880b57cec5SDimitry Andric if (IsRequiredSGPR) 80890b57cec5SDimitry Andric return MO.getReg(); 80900b57cec5SDimitry Andric 80910b57cec5SDimitry Andric // If this could be a VGPR or an SGPR, Check the dynamic register class. 80928bcb0991SDimitry Andric Register Reg = MO.getReg(); 80930b57cec5SDimitry Andric const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); 80940b57cec5SDimitry Andric if (RI.isSGPRClass(RegRC)) 80950b57cec5SDimitry Andric UsedSGPRs[i] = Reg; 80960b57cec5SDimitry Andric } 80970b57cec5SDimitry Andric 80980b57cec5SDimitry Andric // We don't have a required SGPR operand, so we have a bit more freedom in 80990b57cec5SDimitry Andric // selecting operands to move. 81000b57cec5SDimitry Andric 81010b57cec5SDimitry Andric // Try to select the most used SGPR. If an SGPR is equal to one of the 81020b57cec5SDimitry Andric // others, we choose that. 81030b57cec5SDimitry Andric // 81040b57cec5SDimitry Andric // e.g. 81050b57cec5SDimitry Andric // V_FMA_F32 v0, s0, s0, s0 -> No moves 81060b57cec5SDimitry Andric // V_FMA_F32 v0, s0, s1, s0 -> Move s1 81070b57cec5SDimitry Andric 81080b57cec5SDimitry Andric // TODO: If some of the operands are 64-bit SGPRs and some 32, we should 81090b57cec5SDimitry Andric // prefer those. 81100b57cec5SDimitry Andric 8111bdd1243dSDimitry Andric if (UsedSGPRs[0]) { 81120b57cec5SDimitry Andric if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) 81130b57cec5SDimitry Andric SGPRReg = UsedSGPRs[0]; 81140b57cec5SDimitry Andric } 81150b57cec5SDimitry Andric 8116bdd1243dSDimitry Andric if (!SGPRReg && UsedSGPRs[1]) { 81170b57cec5SDimitry Andric if (UsedSGPRs[1] == UsedSGPRs[2]) 81180b57cec5SDimitry Andric SGPRReg = UsedSGPRs[1]; 81190b57cec5SDimitry Andric } 81200b57cec5SDimitry Andric 81210b57cec5SDimitry Andric return SGPRReg; 81220b57cec5SDimitry Andric } 81230b57cec5SDimitry Andric 81240b57cec5SDimitry Andric MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 81250b57cec5SDimitry Andric unsigned OperandName) const { 81260b57cec5SDimitry Andric int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 81270b57cec5SDimitry Andric if (Idx == -1) 81280b57cec5SDimitry Andric return nullptr; 81290b57cec5SDimitry Andric 81300b57cec5SDimitry Andric return &MI.getOperand(Idx); 81310b57cec5SDimitry Andric } 81320b57cec5SDimitry Andric 81330b57cec5SDimitry Andric uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { 81340b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 8135bdd1243dSDimitry Andric int64_t Format = ST.getGeneration() >= AMDGPUSubtarget::GFX11 8136bdd1243dSDimitry Andric ? (int64_t)AMDGPU::UfmtGFX11::UFMT_32_FLOAT 8137bdd1243dSDimitry Andric : (int64_t)AMDGPU::UfmtGFX10::UFMT_32_FLOAT; 813881ad6265SDimitry Andric return (Format << 44) | 81390b57cec5SDimitry Andric (1ULL << 56) | // RESOURCE_LEVEL = 1 81400b57cec5SDimitry Andric (3ULL << 60); // OOB_SELECT = 3 81410b57cec5SDimitry Andric } 81420b57cec5SDimitry Andric 81430b57cec5SDimitry Andric uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; 81440b57cec5SDimitry Andric if (ST.isAmdHsaOS()) { 81450b57cec5SDimitry Andric // Set ATC = 1. GFX9 doesn't have this bit. 81460b57cec5SDimitry Andric if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) 81470b57cec5SDimitry Andric RsrcDataFormat |= (1ULL << 56); 81480b57cec5SDimitry Andric 81490b57cec5SDimitry Andric // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this. 81500b57cec5SDimitry Andric // BTW, it disables TC L2 and therefore decreases performance. 81510b57cec5SDimitry Andric if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) 81520b57cec5SDimitry Andric RsrcDataFormat |= (2ULL << 59); 81530b57cec5SDimitry Andric } 81540b57cec5SDimitry Andric 81550b57cec5SDimitry Andric return RsrcDataFormat; 81560b57cec5SDimitry Andric } 81570b57cec5SDimitry Andric 81580b57cec5SDimitry Andric uint64_t SIInstrInfo::getScratchRsrcWords23() const { 81590b57cec5SDimitry Andric uint64_t Rsrc23 = getDefaultRsrcDataFormat() | 81600b57cec5SDimitry Andric AMDGPU::RSRC_TID_ENABLE | 81610b57cec5SDimitry Andric 0xffffffff; // Size; 81620b57cec5SDimitry Andric 81630b57cec5SDimitry Andric // GFX9 doesn't have ELEMENT_SIZE. 81640b57cec5SDimitry Andric if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 8165e8d8bef9SDimitry Andric uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize(true)) - 1; 81660b57cec5SDimitry Andric Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT; 81670b57cec5SDimitry Andric } 81680b57cec5SDimitry Andric 81690b57cec5SDimitry Andric // IndexStride = 64 / 32. 81700b57cec5SDimitry Andric uint64_t IndexStride = ST.getWavefrontSize() == 64 ? 3 : 2; 81710b57cec5SDimitry Andric Rsrc23 |= IndexStride << AMDGPU::RSRC_INDEX_STRIDE_SHIFT; 81720b57cec5SDimitry Andric 81730b57cec5SDimitry Andric // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. 81740b57cec5SDimitry Andric // Clear them unless we want a huge stride. 81750b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && 81760b57cec5SDimitry Andric ST.getGeneration() <= AMDGPUSubtarget::GFX9) 81770b57cec5SDimitry Andric Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; 81780b57cec5SDimitry Andric 81790b57cec5SDimitry Andric return Rsrc23; 81800b57cec5SDimitry Andric } 81810b57cec5SDimitry Andric 81820b57cec5SDimitry Andric bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const { 81830b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 81840b57cec5SDimitry Andric 81850b57cec5SDimitry Andric return isSMRD(Opc); 81860b57cec5SDimitry Andric } 81870b57cec5SDimitry Andric 81885ffd83dbSDimitry Andric bool SIInstrInfo::isHighLatencyDef(int Opc) const { 81895ffd83dbSDimitry Andric return get(Opc).mayLoad() && 81905ffd83dbSDimitry Andric (isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc) || isFLAT(Opc)); 81910b57cec5SDimitry Andric } 81920b57cec5SDimitry Andric 81930b57cec5SDimitry Andric unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI, 81940b57cec5SDimitry Andric int &FrameIndex) const { 81950b57cec5SDimitry Andric const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr); 81960b57cec5SDimitry Andric if (!Addr || !Addr->isFI()) 8197bdd1243dSDimitry Andric return Register(); 81980b57cec5SDimitry Andric 81990b57cec5SDimitry Andric assert(!MI.memoperands_empty() && 82000b57cec5SDimitry Andric (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); 82010b57cec5SDimitry Andric 82020b57cec5SDimitry Andric FrameIndex = Addr->getIndex(); 82030b57cec5SDimitry Andric return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); 82040b57cec5SDimitry Andric } 82050b57cec5SDimitry Andric 82060b57cec5SDimitry Andric unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI, 82070b57cec5SDimitry Andric int &FrameIndex) const { 82080b57cec5SDimitry Andric const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr); 82090b57cec5SDimitry Andric assert(Addr && Addr->isFI()); 82100b57cec5SDimitry Andric FrameIndex = Addr->getIndex(); 82110b57cec5SDimitry Andric return getNamedOperand(MI, AMDGPU::OpName::data)->getReg(); 82120b57cec5SDimitry Andric } 82130b57cec5SDimitry Andric 82140b57cec5SDimitry Andric unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 82150b57cec5SDimitry Andric int &FrameIndex) const { 82160b57cec5SDimitry Andric if (!MI.mayLoad()) 8217bdd1243dSDimitry Andric return Register(); 82180b57cec5SDimitry Andric 82190b57cec5SDimitry Andric if (isMUBUF(MI) || isVGPRSpill(MI)) 82200b57cec5SDimitry Andric return isStackAccess(MI, FrameIndex); 82210b57cec5SDimitry Andric 82220b57cec5SDimitry Andric if (isSGPRSpill(MI)) 82230b57cec5SDimitry Andric return isSGPRStackAccess(MI, FrameIndex); 82240b57cec5SDimitry Andric 8225bdd1243dSDimitry Andric return Register(); 82260b57cec5SDimitry Andric } 82270b57cec5SDimitry Andric 82280b57cec5SDimitry Andric unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 82290b57cec5SDimitry Andric int &FrameIndex) const { 82300b57cec5SDimitry Andric if (!MI.mayStore()) 8231bdd1243dSDimitry Andric return Register(); 82320b57cec5SDimitry Andric 82330b57cec5SDimitry Andric if (isMUBUF(MI) || isVGPRSpill(MI)) 82340b57cec5SDimitry Andric return isStackAccess(MI, FrameIndex); 82350b57cec5SDimitry Andric 82360b57cec5SDimitry Andric if (isSGPRSpill(MI)) 82370b57cec5SDimitry Andric return isSGPRStackAccess(MI, FrameIndex); 82380b57cec5SDimitry Andric 8239bdd1243dSDimitry Andric return Register(); 82400b57cec5SDimitry Andric } 82410b57cec5SDimitry Andric 82420b57cec5SDimitry Andric unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const { 82430b57cec5SDimitry Andric unsigned Size = 0; 82440b57cec5SDimitry Andric MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 82450b57cec5SDimitry Andric MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 82460b57cec5SDimitry Andric while (++I != E && I->isInsideBundle()) { 82470b57cec5SDimitry Andric assert(!I->isBundle() && "No nested bundle!"); 82480b57cec5SDimitry Andric Size += getInstSizeInBytes(*I); 82490b57cec5SDimitry Andric } 82500b57cec5SDimitry Andric 82510b57cec5SDimitry Andric return Size; 82520b57cec5SDimitry Andric } 82530b57cec5SDimitry Andric 82540b57cec5SDimitry Andric unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 82550b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 82560b57cec5SDimitry Andric const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); 82570b57cec5SDimitry Andric unsigned DescSize = Desc.getSize(); 82580b57cec5SDimitry Andric 82590b57cec5SDimitry Andric // If we have a definitive size, we can use it. Otherwise we need to inspect 82600b57cec5SDimitry Andric // the operands to know the size. 8261e8d8bef9SDimitry Andric if (isFixedSize(MI)) { 8262e8d8bef9SDimitry Andric unsigned Size = DescSize; 8263e8d8bef9SDimitry Andric 8264e8d8bef9SDimitry Andric // If we hit the buggy offset, an extra nop will be inserted in MC so 8265e8d8bef9SDimitry Andric // estimate the worst case. 8266e8d8bef9SDimitry Andric if (MI.isBranch() && ST.hasOffset3fBug()) 8267e8d8bef9SDimitry Andric Size += 4; 8268e8d8bef9SDimitry Andric 8269e8d8bef9SDimitry Andric return Size; 8270e8d8bef9SDimitry Andric } 82710b57cec5SDimitry Andric 8272349cc55cSDimitry Andric // Instructions may have a 32-bit literal encoded after them. Check 8273349cc55cSDimitry Andric // operands that could ever be literals. 82740b57cec5SDimitry Andric if (isVALU(MI) || isSALU(MI)) { 8275349cc55cSDimitry Andric if (isDPP(MI)) 82760b57cec5SDimitry Andric return DescSize; 8277349cc55cSDimitry Andric bool HasLiteral = false; 8278349cc55cSDimitry Andric for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { 827981ad6265SDimitry Andric const MachineOperand &Op = MI.getOperand(I); 8280bdd1243dSDimitry Andric const MCOperandInfo &OpInfo = Desc.operands()[I]; 8281bdd1243dSDimitry Andric if (!Op.isReg() && !isInlineConstant(Op, OpInfo)) { 8282349cc55cSDimitry Andric HasLiteral = true; 8283349cc55cSDimitry Andric break; 8284349cc55cSDimitry Andric } 8285349cc55cSDimitry Andric } 8286349cc55cSDimitry Andric return HasLiteral ? DescSize + 4 : DescSize; 82870b57cec5SDimitry Andric } 82880b57cec5SDimitry Andric 82890b57cec5SDimitry Andric // Check whether we have extra NSA words. 82900b57cec5SDimitry Andric if (isMIMG(MI)) { 82910b57cec5SDimitry Andric int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 82920b57cec5SDimitry Andric if (VAddr0Idx < 0) 82930b57cec5SDimitry Andric return 8; 82940b57cec5SDimitry Andric 82950b57cec5SDimitry Andric int RSrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 82960b57cec5SDimitry Andric return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4); 82970b57cec5SDimitry Andric } 82980b57cec5SDimitry Andric 82990b57cec5SDimitry Andric switch (Opc) { 83000b57cec5SDimitry Andric case TargetOpcode::BUNDLE: 83010b57cec5SDimitry Andric return getInstBundleSize(MI); 83020b57cec5SDimitry Andric case TargetOpcode::INLINEASM: 83030b57cec5SDimitry Andric case TargetOpcode::INLINEASM_BR: { 83040b57cec5SDimitry Andric const MachineFunction *MF = MI.getParent()->getParent(); 83050b57cec5SDimitry Andric const char *AsmStr = MI.getOperand(0).getSymbolName(); 8306e8d8bef9SDimitry Andric return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(), &ST); 83070b57cec5SDimitry Andric } 83080b57cec5SDimitry Andric default: 8309fe6060f1SDimitry Andric if (MI.isMetaInstruction()) 8310fe6060f1SDimitry Andric return 0; 83110b57cec5SDimitry Andric return DescSize; 83120b57cec5SDimitry Andric } 83130b57cec5SDimitry Andric } 83140b57cec5SDimitry Andric 83150b57cec5SDimitry Andric bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { 83160b57cec5SDimitry Andric if (!isFLAT(MI)) 83170b57cec5SDimitry Andric return false; 83180b57cec5SDimitry Andric 83190b57cec5SDimitry Andric if (MI.memoperands_empty()) 83200b57cec5SDimitry Andric return true; 83210b57cec5SDimitry Andric 83220b57cec5SDimitry Andric for (const MachineMemOperand *MMO : MI.memoperands()) { 83230b57cec5SDimitry Andric if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) 83240b57cec5SDimitry Andric return true; 83250b57cec5SDimitry Andric } 83260b57cec5SDimitry Andric return false; 83270b57cec5SDimitry Andric } 83280b57cec5SDimitry Andric 83290b57cec5SDimitry Andric bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const { 83300b57cec5SDimitry Andric return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO; 83310b57cec5SDimitry Andric } 83320b57cec5SDimitry Andric 83330b57cec5SDimitry Andric void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 83340b57cec5SDimitry Andric MachineBasicBlock *IfEnd) const { 83350b57cec5SDimitry Andric MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); 83360b57cec5SDimitry Andric assert(TI != IfEntry->end()); 83370b57cec5SDimitry Andric 83380b57cec5SDimitry Andric MachineInstr *Branch = &(*TI); 83390b57cec5SDimitry Andric MachineFunction *MF = IfEntry->getParent(); 83400b57cec5SDimitry Andric MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); 83410b57cec5SDimitry Andric 83420b57cec5SDimitry Andric if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { 83438bcb0991SDimitry Andric Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); 83440b57cec5SDimitry Andric MachineInstr *SIIF = 83450b57cec5SDimitry Andric BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) 83460b57cec5SDimitry Andric .add(Branch->getOperand(0)) 83470b57cec5SDimitry Andric .add(Branch->getOperand(1)); 83480b57cec5SDimitry Andric MachineInstr *SIEND = 83490b57cec5SDimitry Andric BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF)) 83500b57cec5SDimitry Andric .addReg(DstReg); 83510b57cec5SDimitry Andric 83520b57cec5SDimitry Andric IfEntry->erase(TI); 83530b57cec5SDimitry Andric IfEntry->insert(IfEntry->end(), SIIF); 83540b57cec5SDimitry Andric IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); 83550b57cec5SDimitry Andric } 83560b57cec5SDimitry Andric } 83570b57cec5SDimitry Andric 83580b57cec5SDimitry Andric void SIInstrInfo::convertNonUniformLoopRegion( 83590b57cec5SDimitry Andric MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const { 83600b57cec5SDimitry Andric MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); 83610b57cec5SDimitry Andric // We expect 2 terminators, one conditional and one unconditional. 83620b57cec5SDimitry Andric assert(TI != LoopEnd->end()); 83630b57cec5SDimitry Andric 83640b57cec5SDimitry Andric MachineInstr *Branch = &(*TI); 83650b57cec5SDimitry Andric MachineFunction *MF = LoopEnd->getParent(); 83660b57cec5SDimitry Andric MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); 83670b57cec5SDimitry Andric 83680b57cec5SDimitry Andric if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { 83690b57cec5SDimitry Andric 83708bcb0991SDimitry Andric Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); 83718bcb0991SDimitry Andric Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); 83720b57cec5SDimitry Andric MachineInstrBuilder HeaderPHIBuilder = 83730b57cec5SDimitry Andric BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); 8374349cc55cSDimitry Andric for (MachineBasicBlock *PMBB : LoopEntry->predecessors()) { 8375349cc55cSDimitry Andric if (PMBB == LoopEnd) { 83760b57cec5SDimitry Andric HeaderPHIBuilder.addReg(BackEdgeReg); 83770b57cec5SDimitry Andric } else { 83788bcb0991SDimitry Andric Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); 83790b57cec5SDimitry Andric materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), 83800b57cec5SDimitry Andric ZeroReg, 0); 83810b57cec5SDimitry Andric HeaderPHIBuilder.addReg(ZeroReg); 83820b57cec5SDimitry Andric } 8383349cc55cSDimitry Andric HeaderPHIBuilder.addMBB(PMBB); 83840b57cec5SDimitry Andric } 83850b57cec5SDimitry Andric MachineInstr *HeaderPhi = HeaderPHIBuilder; 83860b57cec5SDimitry Andric MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), 83870b57cec5SDimitry Andric get(AMDGPU::SI_IF_BREAK), BackEdgeReg) 83880b57cec5SDimitry Andric .addReg(DstReg) 83890b57cec5SDimitry Andric .add(Branch->getOperand(0)); 83900b57cec5SDimitry Andric MachineInstr *SILOOP = 83910b57cec5SDimitry Andric BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP)) 83920b57cec5SDimitry Andric .addReg(BackEdgeReg) 83930b57cec5SDimitry Andric .addMBB(LoopEntry); 83940b57cec5SDimitry Andric 83950b57cec5SDimitry Andric LoopEntry->insert(LoopEntry->begin(), HeaderPhi); 83960b57cec5SDimitry Andric LoopEnd->erase(TI); 83970b57cec5SDimitry Andric LoopEnd->insert(LoopEnd->end(), SIIFBREAK); 83980b57cec5SDimitry Andric LoopEnd->insert(LoopEnd->end(), SILOOP); 83990b57cec5SDimitry Andric } 84000b57cec5SDimitry Andric } 84010b57cec5SDimitry Andric 84020b57cec5SDimitry Andric ArrayRef<std::pair<int, const char *>> 84030b57cec5SDimitry Andric SIInstrInfo::getSerializableTargetIndices() const { 84040b57cec5SDimitry Andric static const std::pair<int, const char *> TargetIndices[] = { 84050b57cec5SDimitry Andric {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, 84060b57cec5SDimitry Andric {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, 84070b57cec5SDimitry Andric {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, 84080b57cec5SDimitry Andric {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, 84090b57cec5SDimitry Andric {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; 8410bdd1243dSDimitry Andric return ArrayRef(TargetIndices); 84110b57cec5SDimitry Andric } 84120b57cec5SDimitry Andric 84130b57cec5SDimitry Andric /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The 84140b57cec5SDimitry Andric /// post-RA version of misched uses CreateTargetMIHazardRecognizer. 84150b57cec5SDimitry Andric ScheduleHazardRecognizer * 84160b57cec5SDimitry Andric SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 84170b57cec5SDimitry Andric const ScheduleDAG *DAG) const { 84180b57cec5SDimitry Andric return new GCNHazardRecognizer(DAG->MF); 84190b57cec5SDimitry Andric } 84200b57cec5SDimitry Andric 84210b57cec5SDimitry Andric /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer 84220b57cec5SDimitry Andric /// pass. 84230b57cec5SDimitry Andric ScheduleHazardRecognizer * 84240b57cec5SDimitry Andric SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { 84250b57cec5SDimitry Andric return new GCNHazardRecognizer(MF); 84260b57cec5SDimitry Andric } 84270b57cec5SDimitry Andric 8428349cc55cSDimitry Andric // Called during: 8429349cc55cSDimitry Andric // - pre-RA scheduling and post-RA scheduling 8430349cc55cSDimitry Andric ScheduleHazardRecognizer * 8431349cc55cSDimitry Andric SIInstrInfo::CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 8432349cc55cSDimitry Andric const ScheduleDAGMI *DAG) const { 8433349cc55cSDimitry Andric // Borrowed from Arm Target 8434349cc55cSDimitry Andric // We would like to restrict this hazard recognizer to only 8435349cc55cSDimitry Andric // post-RA scheduling; we can tell that we're post-RA because we don't 8436349cc55cSDimitry Andric // track VRegLiveness. 8437349cc55cSDimitry Andric if (!DAG->hasVRegLiveness()) 8438349cc55cSDimitry Andric return new GCNHazardRecognizer(DAG->MF); 8439349cc55cSDimitry Andric return TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG); 8440349cc55cSDimitry Andric } 8441349cc55cSDimitry Andric 84420b57cec5SDimitry Andric std::pair<unsigned, unsigned> 84430b57cec5SDimitry Andric SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 8444bdd1243dSDimitry Andric return std::pair(TF & MO_MASK, TF & ~MO_MASK); 84450b57cec5SDimitry Andric } 84460b57cec5SDimitry Andric 84470b57cec5SDimitry Andric ArrayRef<std::pair<unsigned, const char *>> 84480b57cec5SDimitry Andric SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 84490b57cec5SDimitry Andric static const std::pair<unsigned, const char *> TargetFlags[] = { 84500b57cec5SDimitry Andric { MO_GOTPCREL, "amdgpu-gotprel" }, 84510b57cec5SDimitry Andric { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" }, 84520b57cec5SDimitry Andric { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" }, 84530b57cec5SDimitry Andric { MO_REL32_LO, "amdgpu-rel32-lo" }, 84540b57cec5SDimitry Andric { MO_REL32_HI, "amdgpu-rel32-hi" }, 84550b57cec5SDimitry Andric { MO_ABS32_LO, "amdgpu-abs32-lo" }, 84560b57cec5SDimitry Andric { MO_ABS32_HI, "amdgpu-abs32-hi" }, 84570b57cec5SDimitry Andric }; 84580b57cec5SDimitry Andric 8459bdd1243dSDimitry Andric return ArrayRef(TargetFlags); 84600b57cec5SDimitry Andric } 84610b57cec5SDimitry Andric 846281ad6265SDimitry Andric ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 846381ad6265SDimitry Andric SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const { 846481ad6265SDimitry Andric static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 846581ad6265SDimitry Andric { 846681ad6265SDimitry Andric {MONoClobber, "amdgpu-noclobber"}, 846781ad6265SDimitry Andric }; 846881ad6265SDimitry Andric 8469bdd1243dSDimitry Andric return ArrayRef(TargetFlags); 847081ad6265SDimitry Andric } 847181ad6265SDimitry Andric 8472*5f757f3fSDimitry Andric unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg, 8473*5f757f3fSDimitry Andric const MachineFunction &MF) const { 8474*5f757f3fSDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 8475*5f757f3fSDimitry Andric assert(SrcReg.isVirtual()); 8476*5f757f3fSDimitry Andric if (MFI->checkFlag(SrcReg, AMDGPU::VirtRegFlag::WWM_REG)) 8477*5f757f3fSDimitry Andric return AMDGPU::WWM_COPY; 8478*5f757f3fSDimitry Andric 8479*5f757f3fSDimitry Andric return AMDGPU::COPY; 8480*5f757f3fSDimitry Andric } 8481*5f757f3fSDimitry Andric 8482*5f757f3fSDimitry Andric bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI, 8483*5f757f3fSDimitry Andric Register Reg) const { 8484*5f757f3fSDimitry Andric // We need to handle instructions which may be inserted during register 8485*5f757f3fSDimitry Andric // allocation to handle the prolog. The initial prolog instruction may have 8486*5f757f3fSDimitry Andric // been separated from the start of the block by spills and copies inserted 8487*5f757f3fSDimitry Andric // needed by the prolog. However, the insertions for scalar registers can 8488*5f757f3fSDimitry Andric // always be placed at the BB top as they are independent of the exec mask 8489*5f757f3fSDimitry Andric // value. 8490*5f757f3fSDimitry Andric bool IsNullOrVectorRegister = true; 8491*5f757f3fSDimitry Andric if (Reg) { 8492*5f757f3fSDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 8493*5f757f3fSDimitry Andric IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)); 8494*5f757f3fSDimitry Andric } 8495*5f757f3fSDimitry Andric 8496*5f757f3fSDimitry Andric uint16_t Opc = MI.getOpcode(); 8497*5f757f3fSDimitry Andric // FIXME: Copies inserted in the block prolog for live-range split should also 8498*5f757f3fSDimitry Andric // be included. 8499*5f757f3fSDimitry Andric return IsNullOrVectorRegister && 8500*5f757f3fSDimitry Andric (isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY && 8501*5f757f3fSDimitry Andric MI.modifiesRegister(AMDGPU::EXEC, &RI))); 85020b57cec5SDimitry Andric } 85030b57cec5SDimitry Andric 85040b57cec5SDimitry Andric MachineInstrBuilder 85050b57cec5SDimitry Andric SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, 85060b57cec5SDimitry Andric MachineBasicBlock::iterator I, 85070b57cec5SDimitry Andric const DebugLoc &DL, 85085ffd83dbSDimitry Andric Register DestReg) const { 85090b57cec5SDimitry Andric if (ST.hasAddNoCarry()) 85100b57cec5SDimitry Andric return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg); 85110b57cec5SDimitry Andric 85120b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 85138bcb0991SDimitry Andric Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); 85140b57cec5SDimitry Andric MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC()); 85150b57cec5SDimitry Andric 8516e8d8bef9SDimitry Andric return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg) 85170b57cec5SDimitry Andric .addReg(UnusedCarry, RegState::Define | RegState::Dead); 85180b57cec5SDimitry Andric } 85190b57cec5SDimitry Andric 85208bcb0991SDimitry Andric MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, 85218bcb0991SDimitry Andric MachineBasicBlock::iterator I, 85228bcb0991SDimitry Andric const DebugLoc &DL, 85238bcb0991SDimitry Andric Register DestReg, 85248bcb0991SDimitry Andric RegScavenger &RS) const { 85258bcb0991SDimitry Andric if (ST.hasAddNoCarry()) 85268bcb0991SDimitry Andric return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); 85278bcb0991SDimitry Andric 8528480093f4SDimitry Andric // If available, prefer to use vcc. 8529480093f4SDimitry Andric Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC) 8530480093f4SDimitry Andric ? Register(RI.getVCC()) 853106c3fb27SDimitry Andric : RS.scavengeRegisterBackwards( 853206c3fb27SDimitry Andric *RI.getBoolRC(), I, /* RestoreAfter */ false, 853306c3fb27SDimitry Andric 0, /* AllowSpill */ false); 8534480093f4SDimitry Andric 85358bcb0991SDimitry Andric // TODO: Users need to deal with this. 85368bcb0991SDimitry Andric if (!UnusedCarry.isValid()) 85378bcb0991SDimitry Andric return MachineInstrBuilder(); 85388bcb0991SDimitry Andric 8539e8d8bef9SDimitry Andric return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg) 85408bcb0991SDimitry Andric .addReg(UnusedCarry, RegState::Define | RegState::Dead); 85418bcb0991SDimitry Andric } 85428bcb0991SDimitry Andric 85430b57cec5SDimitry Andric bool SIInstrInfo::isKillTerminator(unsigned Opcode) { 85440b57cec5SDimitry Andric switch (Opcode) { 85450b57cec5SDimitry Andric case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: 85460b57cec5SDimitry Andric case AMDGPU::SI_KILL_I1_TERMINATOR: 85470b57cec5SDimitry Andric return true; 85480b57cec5SDimitry Andric default: 85490b57cec5SDimitry Andric return false; 85500b57cec5SDimitry Andric } 85510b57cec5SDimitry Andric } 85520b57cec5SDimitry Andric 85530b57cec5SDimitry Andric const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const { 85540b57cec5SDimitry Andric switch (Opcode) { 85550b57cec5SDimitry Andric case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO: 85560b57cec5SDimitry Andric return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR); 85570b57cec5SDimitry Andric case AMDGPU::SI_KILL_I1_PSEUDO: 85580b57cec5SDimitry Andric return get(AMDGPU::SI_KILL_I1_TERMINATOR); 85590b57cec5SDimitry Andric default: 85600b57cec5SDimitry Andric llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO"); 85610b57cec5SDimitry Andric } 85620b57cec5SDimitry Andric } 85630b57cec5SDimitry Andric 8564*5f757f3fSDimitry Andric bool SIInstrInfo::isLegalMUBUFImmOffset(unsigned Imm) const { 8565*5f757f3fSDimitry Andric return Imm <= getMaxMUBUFImmOffset(ST); 8566*5f757f3fSDimitry Andric } 8567*5f757f3fSDimitry Andric 8568*5f757f3fSDimitry Andric unsigned SIInstrInfo::getMaxMUBUFImmOffset(const GCNSubtarget &ST) { 8569*5f757f3fSDimitry Andric // GFX12 field is non-negative 24-bit signed byte offset. 8570*5f757f3fSDimitry Andric const unsigned OffsetBits = 8571*5f757f3fSDimitry Andric ST.getGeneration() >= AMDGPUSubtarget::GFX12 ? 23 : 12; 8572*5f757f3fSDimitry Andric return (1 << OffsetBits) - 1; 8573*5f757f3fSDimitry Andric } 857406c3fb27SDimitry Andric 85750b57cec5SDimitry Andric void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const { 85760b57cec5SDimitry Andric if (!ST.isWave32()) 85770b57cec5SDimitry Andric return; 85780b57cec5SDimitry Andric 857906c3fb27SDimitry Andric if (MI.isInlineAsm()) 858006c3fb27SDimitry Andric return; 858106c3fb27SDimitry Andric 85820b57cec5SDimitry Andric for (auto &Op : MI.implicit_operands()) { 85830b57cec5SDimitry Andric if (Op.isReg() && Op.getReg() == AMDGPU::VCC) 85840b57cec5SDimitry Andric Op.setReg(AMDGPU::VCC_LO); 85850b57cec5SDimitry Andric } 85860b57cec5SDimitry Andric } 85870b57cec5SDimitry Andric 85880b57cec5SDimitry Andric bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { 85890b57cec5SDimitry Andric if (!isSMRD(MI)) 85900b57cec5SDimitry Andric return false; 85910b57cec5SDimitry Andric 85920b57cec5SDimitry Andric // Check that it is using a buffer resource. 85930b57cec5SDimitry Andric int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sbase); 85940b57cec5SDimitry Andric if (Idx == -1) // e.g. s_memtime 85950b57cec5SDimitry Andric return false; 85960b57cec5SDimitry Andric 8597bdd1243dSDimitry Andric const auto RCID = MI.getDesc().operands()[Idx].RegClass; 85988bcb0991SDimitry Andric return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); 85998bcb0991SDimitry Andric } 86008bcb0991SDimitry Andric 860106c3fb27SDimitry Andric // Given Imm, split it into the values to put into the SOffset and ImmOffset 860206c3fb27SDimitry Andric // fields in an MUBUF instruction. Return false if it is not possible (due to a 860306c3fb27SDimitry Andric // hardware bug needing a workaround). 860406c3fb27SDimitry Andric // 860506c3fb27SDimitry Andric // The required alignment ensures that individual address components remain 860606c3fb27SDimitry Andric // aligned if they are aligned to begin with. It also ensures that additional 860706c3fb27SDimitry Andric // offsets within the given alignment can be added to the resulting ImmOffset. 860806c3fb27SDimitry Andric bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, 860906c3fb27SDimitry Andric uint32_t &ImmOffset, Align Alignment) const { 8610*5f757f3fSDimitry Andric const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(ST); 861106c3fb27SDimitry Andric const uint32_t MaxImm = alignDown(MaxOffset, Alignment.value()); 861206c3fb27SDimitry Andric uint32_t Overflow = 0; 861306c3fb27SDimitry Andric 861406c3fb27SDimitry Andric if (Imm > MaxImm) { 861506c3fb27SDimitry Andric if (Imm <= MaxImm + 64) { 861606c3fb27SDimitry Andric // Use an SOffset inline constant for 4..64 861706c3fb27SDimitry Andric Overflow = Imm - MaxImm; 861806c3fb27SDimitry Andric Imm = MaxImm; 861906c3fb27SDimitry Andric } else { 862006c3fb27SDimitry Andric // Try to keep the same value in SOffset for adjacent loads, so that 862106c3fb27SDimitry Andric // the corresponding register contents can be re-used. 862206c3fb27SDimitry Andric // 862306c3fb27SDimitry Andric // Load values with all low-bits (except for alignment bits) set into 862406c3fb27SDimitry Andric // SOffset, so that a larger range of values can be covered using 862506c3fb27SDimitry Andric // s_movk_i32. 862606c3fb27SDimitry Andric // 862706c3fb27SDimitry Andric // Atomic operations fail to work correctly when individual address 862806c3fb27SDimitry Andric // components are unaligned, even if their sum is aligned. 862906c3fb27SDimitry Andric uint32_t High = (Imm + Alignment.value()) & ~MaxOffset; 863006c3fb27SDimitry Andric uint32_t Low = (Imm + Alignment.value()) & MaxOffset; 863106c3fb27SDimitry Andric Imm = Low; 863206c3fb27SDimitry Andric Overflow = High - Alignment.value(); 863306c3fb27SDimitry Andric } 863406c3fb27SDimitry Andric } 863506c3fb27SDimitry Andric 8636*5f757f3fSDimitry Andric if (Overflow > 0) { 863706c3fb27SDimitry Andric // There is a hardware bug in SI and CI which prevents address clamping in 863806c3fb27SDimitry Andric // MUBUF instructions from working correctly with SOffsets. The immediate 863906c3fb27SDimitry Andric // offset is unaffected. 8640*5f757f3fSDimitry Andric if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 864106c3fb27SDimitry Andric return false; 864206c3fb27SDimitry Andric 8643*5f757f3fSDimitry Andric // It is not possible to set immediate in SOffset field on some targets. 8644*5f757f3fSDimitry Andric if (ST.hasRestrictedSOffset()) 8645*5f757f3fSDimitry Andric return false; 8646*5f757f3fSDimitry Andric } 8647*5f757f3fSDimitry Andric 864806c3fb27SDimitry Andric ImmOffset = Imm; 864906c3fb27SDimitry Andric SOffset = Overflow; 865006c3fb27SDimitry Andric return true; 865106c3fb27SDimitry Andric } 865206c3fb27SDimitry Andric 8653fe6060f1SDimitry Andric // Depending on the used address space and instructions, some immediate offsets 8654fe6060f1SDimitry Andric // are allowed and some are not. 8655fe6060f1SDimitry Andric // In general, flat instruction offsets can only be non-negative, global and 8656fe6060f1SDimitry Andric // scratch instruction offsets can also be negative. 8657fe6060f1SDimitry Andric // 8658fe6060f1SDimitry Andric // There are several bugs related to these offsets: 8659fe6060f1SDimitry Andric // On gfx10.1, flat instructions that go into the global address space cannot 8660fe6060f1SDimitry Andric // use an offset. 8661fe6060f1SDimitry Andric // 8662fe6060f1SDimitry Andric // For scratch instructions, the address can be either an SGPR or a VGPR. 8663fe6060f1SDimitry Andric // The following offsets can be used, depending on the architecture (x means 8664fe6060f1SDimitry Andric // cannot be used): 8665fe6060f1SDimitry Andric // +----------------------------+------+------+ 8666fe6060f1SDimitry Andric // | Address-Mode | SGPR | VGPR | 8667fe6060f1SDimitry Andric // +----------------------------+------+------+ 8668fe6060f1SDimitry Andric // | gfx9 | | | 8669fe6060f1SDimitry Andric // | negative, 4-aligned offset | x | ok | 8670fe6060f1SDimitry Andric // | negative, unaligned offset | x | ok | 8671fe6060f1SDimitry Andric // +----------------------------+------+------+ 8672fe6060f1SDimitry Andric // | gfx10 | | | 8673fe6060f1SDimitry Andric // | negative, 4-aligned offset | ok | ok | 8674fe6060f1SDimitry Andric // | negative, unaligned offset | ok | x | 8675fe6060f1SDimitry Andric // +----------------------------+------+------+ 8676fe6060f1SDimitry Andric // | gfx10.3 | | | 8677fe6060f1SDimitry Andric // | negative, 4-aligned offset | ok | ok | 8678fe6060f1SDimitry Andric // | negative, unaligned offset | ok | ok | 8679fe6060f1SDimitry Andric // +----------------------------+------+------+ 8680fe6060f1SDimitry Andric // 8681fe6060f1SDimitry Andric // This function ignores the addressing mode, so if an offset cannot be used in 8682fe6060f1SDimitry Andric // one addressing mode, it is considered illegal. 86830b57cec5SDimitry Andric bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 8684fe6060f1SDimitry Andric uint64_t FlatVariant) const { 86850b57cec5SDimitry Andric // TODO: Should 0 be special cased? 86860b57cec5SDimitry Andric if (!ST.hasFlatInstOffsets()) 86870b57cec5SDimitry Andric return false; 86880b57cec5SDimitry Andric 8689fe6060f1SDimitry Andric if (ST.hasFlatSegmentOffsetBug() && FlatVariant == SIInstrFlags::FLAT && 8690fe6060f1SDimitry Andric (AddrSpace == AMDGPUAS::FLAT_ADDRESS || 8691fe6060f1SDimitry Andric AddrSpace == AMDGPUAS::GLOBAL_ADDRESS)) 86920b57cec5SDimitry Andric return false; 86930b57cec5SDimitry Andric 8694fe6060f1SDimitry Andric if (ST.hasNegativeUnalignedScratchOffsetBug() && 8695fe6060f1SDimitry Andric FlatVariant == SIInstrFlags::FlatScratch && Offset < 0 && 8696fe6060f1SDimitry Andric (Offset % 4) != 0) { 8697fe6060f1SDimitry Andric return false; 8698fe6060f1SDimitry Andric } 8699fe6060f1SDimitry Andric 8700*5f757f3fSDimitry Andric bool AllowNegative = allowNegativeFlatOffset(FlatVariant); 8701bdd1243dSDimitry Andric unsigned N = AMDGPU::getNumFlatOffsetBits(ST); 8702bdd1243dSDimitry Andric return isIntN(N, Offset) && (AllowNegative || Offset >= 0); 87030b57cec5SDimitry Andric } 87040b57cec5SDimitry Andric 8705fe6060f1SDimitry Andric // See comment on SIInstrInfo::isLegalFLATOffset for what is legal and what not. 8706fe6060f1SDimitry Andric std::pair<int64_t, int64_t> 8707fe6060f1SDimitry Andric SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, 8708fe6060f1SDimitry Andric uint64_t FlatVariant) const { 8709e8d8bef9SDimitry Andric int64_t RemainderOffset = COffsetVal; 8710e8d8bef9SDimitry Andric int64_t ImmField = 0; 8711fe6060f1SDimitry Andric 8712*5f757f3fSDimitry Andric bool AllowNegative = allowNegativeFlatOffset(FlatVariant); 8713bdd1243dSDimitry Andric const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST) - 1; 8714*5f757f3fSDimitry Andric 8715bdd1243dSDimitry Andric if (AllowNegative) { 8716e8d8bef9SDimitry Andric // Use signed division by a power of two to truncate towards 0. 8717bdd1243dSDimitry Andric int64_t D = 1LL << NumBits; 8718e8d8bef9SDimitry Andric RemainderOffset = (COffsetVal / D) * D; 8719e8d8bef9SDimitry Andric ImmField = COffsetVal - RemainderOffset; 8720fe6060f1SDimitry Andric 8721fe6060f1SDimitry Andric if (ST.hasNegativeUnalignedScratchOffsetBug() && 8722fe6060f1SDimitry Andric FlatVariant == SIInstrFlags::FlatScratch && ImmField < 0 && 8723fe6060f1SDimitry Andric (ImmField % 4) != 0) { 8724fe6060f1SDimitry Andric // Make ImmField a multiple of 4 8725fe6060f1SDimitry Andric RemainderOffset += ImmField % 4; 8726fe6060f1SDimitry Andric ImmField -= ImmField % 4; 8727fe6060f1SDimitry Andric } 8728e8d8bef9SDimitry Andric } else if (COffsetVal >= 0) { 8729e8d8bef9SDimitry Andric ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits); 8730e8d8bef9SDimitry Andric RemainderOffset = COffsetVal - ImmField; 87310b57cec5SDimitry Andric } 87320b57cec5SDimitry Andric 8733fe6060f1SDimitry Andric assert(isLegalFLATOffset(ImmField, AddrSpace, FlatVariant)); 8734e8d8bef9SDimitry Andric assert(RemainderOffset + ImmField == COffsetVal); 8735e8d8bef9SDimitry Andric return {ImmField, RemainderOffset}; 8736e8d8bef9SDimitry Andric } 87370b57cec5SDimitry Andric 8738*5f757f3fSDimitry Andric bool SIInstrInfo::allowNegativeFlatOffset(uint64_t FlatVariant) const { 8739*5f757f3fSDimitry Andric if (ST.hasNegativeScratchOffsetBug() && 8740*5f757f3fSDimitry Andric FlatVariant == SIInstrFlags::FlatScratch) 8741*5f757f3fSDimitry Andric return false; 8742*5f757f3fSDimitry Andric 8743*5f757f3fSDimitry Andric return FlatVariant != SIInstrFlags::FLAT || AMDGPU::isGFX12Plus(ST); 8744*5f757f3fSDimitry Andric } 8745*5f757f3fSDimitry Andric 874606c3fb27SDimitry Andric static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) { 87470b57cec5SDimitry Andric switch (ST.getGeneration()) { 87480b57cec5SDimitry Andric default: 87490b57cec5SDimitry Andric break; 87500b57cec5SDimitry Andric case AMDGPUSubtarget::SOUTHERN_ISLANDS: 87510b57cec5SDimitry Andric case AMDGPUSubtarget::SEA_ISLANDS: 87520b57cec5SDimitry Andric return SIEncodingFamily::SI; 87530b57cec5SDimitry Andric case AMDGPUSubtarget::VOLCANIC_ISLANDS: 87540b57cec5SDimitry Andric case AMDGPUSubtarget::GFX9: 87550b57cec5SDimitry Andric return SIEncodingFamily::VI; 87560b57cec5SDimitry Andric case AMDGPUSubtarget::GFX10: 87570b57cec5SDimitry Andric return SIEncodingFamily::GFX10; 875881ad6265SDimitry Andric case AMDGPUSubtarget::GFX11: 875981ad6265SDimitry Andric return SIEncodingFamily::GFX11; 8760*5f757f3fSDimitry Andric case AMDGPUSubtarget::GFX12: 8761*5f757f3fSDimitry Andric return SIEncodingFamily::GFX12; 87620b57cec5SDimitry Andric } 87630b57cec5SDimitry Andric llvm_unreachable("Unknown subtarget generation!"); 87640b57cec5SDimitry Andric } 87650b57cec5SDimitry Andric 8766480093f4SDimitry Andric bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const { 8767480093f4SDimitry Andric switch(MCOp) { 8768480093f4SDimitry Andric // These opcodes use indirect register addressing so 8769480093f4SDimitry Andric // they need special handling by codegen (currently missing). 8770480093f4SDimitry Andric // Therefore it is too risky to allow these opcodes 8771480093f4SDimitry Andric // to be selected by dpp combiner or sdwa peepholer. 8772480093f4SDimitry Andric case AMDGPU::V_MOVRELS_B32_dpp_gfx10: 8773480093f4SDimitry Andric case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 8774480093f4SDimitry Andric case AMDGPU::V_MOVRELD_B32_dpp_gfx10: 8775480093f4SDimitry Andric case AMDGPU::V_MOVRELD_B32_sdwa_gfx10: 8776480093f4SDimitry Andric case AMDGPU::V_MOVRELSD_B32_dpp_gfx10: 8777480093f4SDimitry Andric case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 8778480093f4SDimitry Andric case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10: 8779480093f4SDimitry Andric case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 8780480093f4SDimitry Andric return true; 8781480093f4SDimitry Andric default: 8782480093f4SDimitry Andric return false; 8783480093f4SDimitry Andric } 8784480093f4SDimitry Andric } 8785480093f4SDimitry Andric 87860b57cec5SDimitry Andric int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { 8787*5f757f3fSDimitry Andric if (SIInstrInfo::isSoftWaitcnt(Opcode)) 8788*5f757f3fSDimitry Andric Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(Opcode); 8789*5f757f3fSDimitry Andric 879006c3fb27SDimitry Andric unsigned Gen = subtargetEncodingFamily(ST); 87910b57cec5SDimitry Andric 87920b57cec5SDimitry Andric if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && 87930b57cec5SDimitry Andric ST.getGeneration() == AMDGPUSubtarget::GFX9) 87940b57cec5SDimitry Andric Gen = SIEncodingFamily::GFX9; 87950b57cec5SDimitry Andric 87960b57cec5SDimitry Andric // Adjust the encoding family to GFX80 for D16 buffer instructions when the 87970b57cec5SDimitry Andric // subtarget has UnpackedD16VMem feature. 87980b57cec5SDimitry Andric // TODO: remove this when we discard GFX80 encoding. 87990b57cec5SDimitry Andric if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf)) 88000b57cec5SDimitry Andric Gen = SIEncodingFamily::GFX80; 88010b57cec5SDimitry Andric 88020b57cec5SDimitry Andric if (get(Opcode).TSFlags & SIInstrFlags::SDWA) { 88030b57cec5SDimitry Andric switch (ST.getGeneration()) { 88040b57cec5SDimitry Andric default: 88050b57cec5SDimitry Andric Gen = SIEncodingFamily::SDWA; 88060b57cec5SDimitry Andric break; 88070b57cec5SDimitry Andric case AMDGPUSubtarget::GFX9: 88080b57cec5SDimitry Andric Gen = SIEncodingFamily::SDWA9; 88090b57cec5SDimitry Andric break; 88100b57cec5SDimitry Andric case AMDGPUSubtarget::GFX10: 88110b57cec5SDimitry Andric Gen = SIEncodingFamily::SDWA10; 88120b57cec5SDimitry Andric break; 88130b57cec5SDimitry Andric } 88140b57cec5SDimitry Andric } 88150b57cec5SDimitry Andric 881604eeddc0SDimitry Andric if (isMAI(Opcode)) { 881704eeddc0SDimitry Andric int MFMAOp = AMDGPU::getMFMAEarlyClobberOp(Opcode); 881804eeddc0SDimitry Andric if (MFMAOp != -1) 881904eeddc0SDimitry Andric Opcode = MFMAOp; 882004eeddc0SDimitry Andric } 882104eeddc0SDimitry Andric 88220b57cec5SDimitry Andric int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); 88230b57cec5SDimitry Andric 8824*5f757f3fSDimitry Andric // TODO-GFX12: Remove this. 8825*5f757f3fSDimitry Andric // Hack to allow some GFX12 codegen tests to run before all the encodings are 8826*5f757f3fSDimitry Andric // implemented. 8827*5f757f3fSDimitry Andric if (MCOp == (uint16_t)-1 && Gen == SIEncodingFamily::GFX12) 8828*5f757f3fSDimitry Andric MCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX11); 8829*5f757f3fSDimitry Andric 88300b57cec5SDimitry Andric // -1 means that Opcode is already a native instruction. 88310b57cec5SDimitry Andric if (MCOp == -1) 88320b57cec5SDimitry Andric return Opcode; 88330b57cec5SDimitry Andric 8834fe6060f1SDimitry Andric if (ST.hasGFX90AInsts()) { 8835fe6060f1SDimitry Andric uint16_t NMCOp = (uint16_t)-1; 883681ad6265SDimitry Andric if (ST.hasGFX940Insts()) 883781ad6265SDimitry Andric NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX940); 883881ad6265SDimitry Andric if (NMCOp == (uint16_t)-1) 8839fe6060f1SDimitry Andric NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A); 8840fe6060f1SDimitry Andric if (NMCOp == (uint16_t)-1) 8841fe6060f1SDimitry Andric NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX9); 8842fe6060f1SDimitry Andric if (NMCOp != (uint16_t)-1) 8843fe6060f1SDimitry Andric MCOp = NMCOp; 8844fe6060f1SDimitry Andric } 8845fe6060f1SDimitry Andric 88460b57cec5SDimitry Andric // (uint16_t)-1 means that Opcode is a pseudo instruction that has 88470b57cec5SDimitry Andric // no encoding in the given subtarget generation. 88480b57cec5SDimitry Andric if (MCOp == (uint16_t)-1) 88490b57cec5SDimitry Andric return -1; 88500b57cec5SDimitry Andric 8851480093f4SDimitry Andric if (isAsmOnlyOpcode(MCOp)) 8852480093f4SDimitry Andric return -1; 8853480093f4SDimitry Andric 88540b57cec5SDimitry Andric return MCOp; 88550b57cec5SDimitry Andric } 88560b57cec5SDimitry Andric 88570b57cec5SDimitry Andric static 88580b57cec5SDimitry Andric TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) { 88590b57cec5SDimitry Andric assert(RegOpnd.isReg()); 88600b57cec5SDimitry Andric return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() : 88610b57cec5SDimitry Andric getRegSubRegPair(RegOpnd); 88620b57cec5SDimitry Andric } 88630b57cec5SDimitry Andric 88640b57cec5SDimitry Andric TargetInstrInfo::RegSubRegPair 88650b57cec5SDimitry Andric llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) { 88660b57cec5SDimitry Andric assert(MI.isRegSequence()); 88670b57cec5SDimitry Andric for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I) 88680b57cec5SDimitry Andric if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) { 88690b57cec5SDimitry Andric auto &RegOp = MI.getOperand(1 + 2 * I); 88700b57cec5SDimitry Andric return getRegOrUndef(RegOp); 88710b57cec5SDimitry Andric } 88720b57cec5SDimitry Andric return TargetInstrInfo::RegSubRegPair(); 88730b57cec5SDimitry Andric } 88740b57cec5SDimitry Andric 88750b57cec5SDimitry Andric // Try to find the definition of reg:subreg in subreg-manipulation pseudos 88760b57cec5SDimitry Andric // Following a subreg of reg:subreg isn't supported 88770b57cec5SDimitry Andric static bool followSubRegDef(MachineInstr &MI, 88780b57cec5SDimitry Andric TargetInstrInfo::RegSubRegPair &RSR) { 88790b57cec5SDimitry Andric if (!RSR.SubReg) 88800b57cec5SDimitry Andric return false; 88810b57cec5SDimitry Andric switch (MI.getOpcode()) { 88820b57cec5SDimitry Andric default: break; 88830b57cec5SDimitry Andric case AMDGPU::REG_SEQUENCE: 88840b57cec5SDimitry Andric RSR = getRegSequenceSubReg(MI, RSR.SubReg); 88850b57cec5SDimitry Andric return true; 88860b57cec5SDimitry Andric // EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg 88870b57cec5SDimitry Andric case AMDGPU::INSERT_SUBREG: 88880b57cec5SDimitry Andric if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm()) 88890b57cec5SDimitry Andric // inserted the subreg we're looking for 88900b57cec5SDimitry Andric RSR = getRegOrUndef(MI.getOperand(2)); 88910b57cec5SDimitry Andric else { // the subreg in the rest of the reg 88920b57cec5SDimitry Andric auto R1 = getRegOrUndef(MI.getOperand(1)); 88930b57cec5SDimitry Andric if (R1.SubReg) // subreg of subreg isn't supported 88940b57cec5SDimitry Andric return false; 88950b57cec5SDimitry Andric RSR.Reg = R1.Reg; 88960b57cec5SDimitry Andric } 88970b57cec5SDimitry Andric return true; 88980b57cec5SDimitry Andric } 88990b57cec5SDimitry Andric return false; 89000b57cec5SDimitry Andric } 89010b57cec5SDimitry Andric 89020b57cec5SDimitry Andric MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 89030b57cec5SDimitry Andric MachineRegisterInfo &MRI) { 89040b57cec5SDimitry Andric assert(MRI.isSSA()); 8905e8d8bef9SDimitry Andric if (!P.Reg.isVirtual()) 89060b57cec5SDimitry Andric return nullptr; 89070b57cec5SDimitry Andric 89080b57cec5SDimitry Andric auto RSR = P; 89090b57cec5SDimitry Andric auto *DefInst = MRI.getVRegDef(RSR.Reg); 89100b57cec5SDimitry Andric while (auto *MI = DefInst) { 89110b57cec5SDimitry Andric DefInst = nullptr; 89120b57cec5SDimitry Andric switch (MI->getOpcode()) { 89130b57cec5SDimitry Andric case AMDGPU::COPY: 89140b57cec5SDimitry Andric case AMDGPU::V_MOV_B32_e32: { 89150b57cec5SDimitry Andric auto &Op1 = MI->getOperand(1); 8916e8d8bef9SDimitry Andric if (Op1.isReg() && Op1.getReg().isVirtual()) { 89170b57cec5SDimitry Andric if (Op1.isUndef()) 89180b57cec5SDimitry Andric return nullptr; 89190b57cec5SDimitry Andric RSR = getRegSubRegPair(Op1); 89200b57cec5SDimitry Andric DefInst = MRI.getVRegDef(RSR.Reg); 89210b57cec5SDimitry Andric } 89220b57cec5SDimitry Andric break; 89230b57cec5SDimitry Andric } 89240b57cec5SDimitry Andric default: 89250b57cec5SDimitry Andric if (followSubRegDef(*MI, RSR)) { 89260b57cec5SDimitry Andric if (!RSR.Reg) 89270b57cec5SDimitry Andric return nullptr; 89280b57cec5SDimitry Andric DefInst = MRI.getVRegDef(RSR.Reg); 89290b57cec5SDimitry Andric } 89300b57cec5SDimitry Andric } 89310b57cec5SDimitry Andric if (!DefInst) 89320b57cec5SDimitry Andric return MI; 89330b57cec5SDimitry Andric } 89340b57cec5SDimitry Andric return nullptr; 89350b57cec5SDimitry Andric } 89360b57cec5SDimitry Andric 89370b57cec5SDimitry Andric bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 89380b57cec5SDimitry Andric Register VReg, 89390b57cec5SDimitry Andric const MachineInstr &DefMI, 89400b57cec5SDimitry Andric const MachineInstr &UseMI) { 89410b57cec5SDimitry Andric assert(MRI.isSSA() && "Must be run on SSA"); 89420b57cec5SDimitry Andric 89430b57cec5SDimitry Andric auto *TRI = MRI.getTargetRegisterInfo(); 89440b57cec5SDimitry Andric auto *DefBB = DefMI.getParent(); 89450b57cec5SDimitry Andric 89460b57cec5SDimitry Andric // Don't bother searching between blocks, although it is possible this block 89470b57cec5SDimitry Andric // doesn't modify exec. 89480b57cec5SDimitry Andric if (UseMI.getParent() != DefBB) 89490b57cec5SDimitry Andric return true; 89500b57cec5SDimitry Andric 89510b57cec5SDimitry Andric const int MaxInstScan = 20; 89520b57cec5SDimitry Andric int NumInst = 0; 89530b57cec5SDimitry Andric 89540b57cec5SDimitry Andric // Stop scan at the use. 89550b57cec5SDimitry Andric auto E = UseMI.getIterator(); 89560b57cec5SDimitry Andric for (auto I = std::next(DefMI.getIterator()); I != E; ++I) { 89570b57cec5SDimitry Andric if (I->isDebugInstr()) 89580b57cec5SDimitry Andric continue; 89590b57cec5SDimitry Andric 89600b57cec5SDimitry Andric if (++NumInst > MaxInstScan) 89610b57cec5SDimitry Andric return true; 89620b57cec5SDimitry Andric 89630b57cec5SDimitry Andric if (I->modifiesRegister(AMDGPU::EXEC, TRI)) 89640b57cec5SDimitry Andric return true; 89650b57cec5SDimitry Andric } 89660b57cec5SDimitry Andric 89670b57cec5SDimitry Andric return false; 89680b57cec5SDimitry Andric } 89690b57cec5SDimitry Andric 89700b57cec5SDimitry Andric bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 89710b57cec5SDimitry Andric Register VReg, 89720b57cec5SDimitry Andric const MachineInstr &DefMI) { 89730b57cec5SDimitry Andric assert(MRI.isSSA() && "Must be run on SSA"); 89740b57cec5SDimitry Andric 89750b57cec5SDimitry Andric auto *TRI = MRI.getTargetRegisterInfo(); 89760b57cec5SDimitry Andric auto *DefBB = DefMI.getParent(); 89770b57cec5SDimitry Andric 8978e8d8bef9SDimitry Andric const int MaxUseScan = 10; 8979e8d8bef9SDimitry Andric int NumUse = 0; 89800b57cec5SDimitry Andric 8981e8d8bef9SDimitry Andric for (auto &Use : MRI.use_nodbg_operands(VReg)) { 8982e8d8bef9SDimitry Andric auto &UseInst = *Use.getParent(); 89830b57cec5SDimitry Andric // Don't bother searching between blocks, although it is possible this block 89840b57cec5SDimitry Andric // doesn't modify exec. 898581ad6265SDimitry Andric if (UseInst.getParent() != DefBB || UseInst.isPHI()) 89860b57cec5SDimitry Andric return true; 89870b57cec5SDimitry Andric 8988e8d8bef9SDimitry Andric if (++NumUse > MaxUseScan) 89890b57cec5SDimitry Andric return true; 89900b57cec5SDimitry Andric } 89910b57cec5SDimitry Andric 8992e8d8bef9SDimitry Andric if (NumUse == 0) 8993e8d8bef9SDimitry Andric return false; 8994e8d8bef9SDimitry Andric 89950b57cec5SDimitry Andric const int MaxInstScan = 20; 89960b57cec5SDimitry Andric int NumInst = 0; 89970b57cec5SDimitry Andric 89980b57cec5SDimitry Andric // Stop scan when we have seen all the uses. 89990b57cec5SDimitry Andric for (auto I = std::next(DefMI.getIterator()); ; ++I) { 9000e8d8bef9SDimitry Andric assert(I != DefBB->end()); 9001e8d8bef9SDimitry Andric 90020b57cec5SDimitry Andric if (I->isDebugInstr()) 90030b57cec5SDimitry Andric continue; 90040b57cec5SDimitry Andric 90050b57cec5SDimitry Andric if (++NumInst > MaxInstScan) 90060b57cec5SDimitry Andric return true; 90070b57cec5SDimitry Andric 9008e8d8bef9SDimitry Andric for (const MachineOperand &Op : I->operands()) { 9009e8d8bef9SDimitry Andric // We don't check reg masks here as they're used only on calls: 9010e8d8bef9SDimitry Andric // 1. EXEC is only considered const within one BB 9011e8d8bef9SDimitry Andric // 2. Call should be a terminator instruction if present in a BB 90120b57cec5SDimitry Andric 9013e8d8bef9SDimitry Andric if (!Op.isReg()) 9014e8d8bef9SDimitry Andric continue; 9015e8d8bef9SDimitry Andric 9016e8d8bef9SDimitry Andric Register Reg = Op.getReg(); 9017e8d8bef9SDimitry Andric if (Op.isUse()) { 9018e8d8bef9SDimitry Andric if (Reg == VReg && --NumUse == 0) 9019e8d8bef9SDimitry Andric return false; 9020e8d8bef9SDimitry Andric } else if (TRI->regsOverlap(Reg, AMDGPU::EXEC)) 90210b57cec5SDimitry Andric return true; 90220b57cec5SDimitry Andric } 90230b57cec5SDimitry Andric } 9024e8d8bef9SDimitry Andric } 90258bcb0991SDimitry Andric 90268bcb0991SDimitry Andric MachineInstr *SIInstrInfo::createPHIDestinationCopy( 90278bcb0991SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt, 90288bcb0991SDimitry Andric const DebugLoc &DL, Register Src, Register Dst) const { 90298bcb0991SDimitry Andric auto Cur = MBB.begin(); 90308bcb0991SDimitry Andric if (Cur != MBB.end()) 90318bcb0991SDimitry Andric do { 90328bcb0991SDimitry Andric if (!Cur->isPHI() && Cur->readsRegister(Dst)) 90338bcb0991SDimitry Andric return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src); 90348bcb0991SDimitry Andric ++Cur; 90358bcb0991SDimitry Andric } while (Cur != MBB.end() && Cur != LastPHIIt); 90368bcb0991SDimitry Andric 90378bcb0991SDimitry Andric return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src, 90388bcb0991SDimitry Andric Dst); 90398bcb0991SDimitry Andric } 90408bcb0991SDimitry Andric 90418bcb0991SDimitry Andric MachineInstr *SIInstrInfo::createPHISourceCopy( 90428bcb0991SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, 9043480093f4SDimitry Andric const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const { 90448bcb0991SDimitry Andric if (InsPt != MBB.end() && 90458bcb0991SDimitry Andric (InsPt->getOpcode() == AMDGPU::SI_IF || 90468bcb0991SDimitry Andric InsPt->getOpcode() == AMDGPU::SI_ELSE || 90478bcb0991SDimitry Andric InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) && 90488bcb0991SDimitry Andric InsPt->definesRegister(Src)) { 90498bcb0991SDimitry Andric InsPt++; 9050480093f4SDimitry Andric return BuildMI(MBB, InsPt, DL, 90518bcb0991SDimitry Andric get(ST.isWave32() ? AMDGPU::S_MOV_B32_term 90528bcb0991SDimitry Andric : AMDGPU::S_MOV_B64_term), 90538bcb0991SDimitry Andric Dst) 90548bcb0991SDimitry Andric .addReg(Src, 0, SrcSubReg) 90558bcb0991SDimitry Andric .addReg(AMDGPU::EXEC, RegState::Implicit); 90568bcb0991SDimitry Andric } 90578bcb0991SDimitry Andric return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg, 90588bcb0991SDimitry Andric Dst); 90598bcb0991SDimitry Andric } 90608bcb0991SDimitry Andric 90618bcb0991SDimitry Andric bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } 9062480093f4SDimitry Andric 9063480093f4SDimitry Andric MachineInstr *SIInstrInfo::foldMemoryOperandImpl( 9064480093f4SDimitry Andric MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 9065480093f4SDimitry Andric MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, 9066480093f4SDimitry Andric VirtRegMap *VRM) const { 9067480093f4SDimitry Andric // This is a bit of a hack (copied from AArch64). Consider this instruction: 9068480093f4SDimitry Andric // 9069480093f4SDimitry Andric // %0:sreg_32 = COPY $m0 9070480093f4SDimitry Andric // 9071480093f4SDimitry Andric // We explicitly chose SReg_32 for the virtual register so such a copy might 9072480093f4SDimitry Andric // be eliminated by RegisterCoalescer. However, that may not be possible, and 9073480093f4SDimitry Andric // %0 may even spill. We can't spill $m0 normally (it would require copying to 9074480093f4SDimitry Andric // a numbered SGPR anyway), and since it is in the SReg_32 register class, 9075480093f4SDimitry Andric // TargetInstrInfo::foldMemoryOperand() is going to try. 90765ffd83dbSDimitry Andric // A similar issue also exists with spilling and reloading $exec registers. 9077480093f4SDimitry Andric // 9078480093f4SDimitry Andric // To prevent that, constrain the %0 register class here. 9079*5f757f3fSDimitry Andric if (isFullCopyInstr(MI)) { 9080480093f4SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 9081480093f4SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 90825ffd83dbSDimitry Andric if ((DstReg.isVirtual() || SrcReg.isVirtual()) && 90835ffd83dbSDimitry Andric (DstReg.isVirtual() != SrcReg.isVirtual())) { 90845ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 90855ffd83dbSDimitry Andric Register VirtReg = DstReg.isVirtual() ? DstReg : SrcReg; 90865ffd83dbSDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(VirtReg); 90875ffd83dbSDimitry Andric if (RC->hasSuperClassEq(&AMDGPU::SReg_32RegClass)) { 90885ffd83dbSDimitry Andric MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); 90895ffd83dbSDimitry Andric return nullptr; 90905ffd83dbSDimitry Andric } else if (RC->hasSuperClassEq(&AMDGPU::SReg_64RegClass)) { 90915ffd83dbSDimitry Andric MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass); 9092480093f4SDimitry Andric return nullptr; 9093480093f4SDimitry Andric } 9094480093f4SDimitry Andric } 9095480093f4SDimitry Andric } 9096480093f4SDimitry Andric 9097480093f4SDimitry Andric return nullptr; 9098480093f4SDimitry Andric } 9099480093f4SDimitry Andric 9100480093f4SDimitry Andric unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 9101480093f4SDimitry Andric const MachineInstr &MI, 9102480093f4SDimitry Andric unsigned *PredCost) const { 9103480093f4SDimitry Andric if (MI.isBundle()) { 9104480093f4SDimitry Andric MachineBasicBlock::const_instr_iterator I(MI.getIterator()); 9105480093f4SDimitry Andric MachineBasicBlock::const_instr_iterator E(MI.getParent()->instr_end()); 9106480093f4SDimitry Andric unsigned Lat = 0, Count = 0; 9107480093f4SDimitry Andric for (++I; I != E && I->isBundledWithPred(); ++I) { 9108480093f4SDimitry Andric ++Count; 9109480093f4SDimitry Andric Lat = std::max(Lat, SchedModel.computeInstrLatency(&*I)); 9110480093f4SDimitry Andric } 9111480093f4SDimitry Andric return Lat + Count - 1; 9112480093f4SDimitry Andric } 9113480093f4SDimitry Andric 9114480093f4SDimitry Andric return SchedModel.computeInstrLatency(&MI); 9115480093f4SDimitry Andric } 9116e8d8bef9SDimitry Andric 9117bdd1243dSDimitry Andric InstructionUniformity 9118bdd1243dSDimitry Andric SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { 9119bdd1243dSDimitry Andric unsigned opcode = MI.getOpcode(); 9120*5f757f3fSDimitry Andric if (auto *GI = dyn_cast<GIntrinsic>(&MI)) { 9121*5f757f3fSDimitry Andric auto IID = GI->getIntrinsicID(); 912206c3fb27SDimitry Andric if (AMDGPU::isIntrinsicSourceOfDivergence(IID)) 912306c3fb27SDimitry Andric return InstructionUniformity::NeverUniform; 912406c3fb27SDimitry Andric if (AMDGPU::isIntrinsicAlwaysUniform(IID)) 912506c3fb27SDimitry Andric return InstructionUniformity::AlwaysUniform; 912606c3fb27SDimitry Andric 912706c3fb27SDimitry Andric switch (IID) { 912806c3fb27SDimitry Andric case Intrinsic::amdgcn_if: 912906c3fb27SDimitry Andric case Intrinsic::amdgcn_else: 913006c3fb27SDimitry Andric // FIXME: Uniform if second result 913106c3fb27SDimitry Andric break; 913206c3fb27SDimitry Andric } 913306c3fb27SDimitry Andric 913406c3fb27SDimitry Andric return InstructionUniformity::Default; 9135bdd1243dSDimitry Andric } 9136bdd1243dSDimitry Andric 9137bdd1243dSDimitry Andric // Loads from the private and flat address spaces are divergent, because 9138bdd1243dSDimitry Andric // threads can execute the load instruction with the same inputs and get 9139bdd1243dSDimitry Andric // different results. 9140bdd1243dSDimitry Andric // 9141bdd1243dSDimitry Andric // All other loads are not divergent, because if threads issue loads with the 9142bdd1243dSDimitry Andric // same arguments, they will always get the same result. 9143bdd1243dSDimitry Andric if (opcode == AMDGPU::G_LOAD) { 9144bdd1243dSDimitry Andric if (MI.memoperands_empty()) 9145bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; // conservative assumption 9146bdd1243dSDimitry Andric 9147bdd1243dSDimitry Andric if (llvm::any_of(MI.memoperands(), [](const MachineMemOperand *mmo) { 9148bdd1243dSDimitry Andric return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS || 9149bdd1243dSDimitry Andric mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS; 9150bdd1243dSDimitry Andric })) { 9151bdd1243dSDimitry Andric // At least one MMO in a non-global address space. 9152bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9153bdd1243dSDimitry Andric } 9154bdd1243dSDimitry Andric return InstructionUniformity::Default; 9155bdd1243dSDimitry Andric } 9156bdd1243dSDimitry Andric 9157bdd1243dSDimitry Andric if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) || 9158bdd1243dSDimitry Andric opcode == AMDGPU::G_ATOMIC_CMPXCHG || 9159*5f757f3fSDimitry Andric opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS || 9160*5f757f3fSDimitry Andric AMDGPU::isGenericAtomic(opcode)) { 9161bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9162bdd1243dSDimitry Andric } 9163bdd1243dSDimitry Andric return InstructionUniformity::Default; 9164bdd1243dSDimitry Andric } 9165bdd1243dSDimitry Andric 9166bdd1243dSDimitry Andric InstructionUniformity 9167bdd1243dSDimitry Andric SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { 916806c3fb27SDimitry Andric 916906c3fb27SDimitry Andric if (isNeverUniform(MI)) 917006c3fb27SDimitry Andric return InstructionUniformity::NeverUniform; 917106c3fb27SDimitry Andric 917206c3fb27SDimitry Andric unsigned opcode = MI.getOpcode(); 9173*5f757f3fSDimitry Andric if (opcode == AMDGPU::V_READLANE_B32 || 9174*5f757f3fSDimitry Andric opcode == AMDGPU::V_READFIRSTLANE_B32 || 9175*5f757f3fSDimitry Andric opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR) 917606c3fb27SDimitry Andric return InstructionUniformity::AlwaysUniform; 917706c3fb27SDimitry Andric 9178*5f757f3fSDimitry Andric if (isCopyInstr(MI)) { 917906c3fb27SDimitry Andric const MachineOperand &srcOp = MI.getOperand(1); 918006c3fb27SDimitry Andric if (srcOp.isReg() && srcOp.getReg().isPhysical()) { 918106c3fb27SDimitry Andric const TargetRegisterClass *regClass = 918206c3fb27SDimitry Andric RI.getPhysRegBaseClass(srcOp.getReg()); 918306c3fb27SDimitry Andric return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform 918406c3fb27SDimitry Andric : InstructionUniformity::NeverUniform; 918506c3fb27SDimitry Andric } 918606c3fb27SDimitry Andric return InstructionUniformity::Default; 918706c3fb27SDimitry Andric } 918806c3fb27SDimitry Andric 918906c3fb27SDimitry Andric // GMIR handling 919006c3fb27SDimitry Andric if (MI.isPreISelOpcode()) 919106c3fb27SDimitry Andric return SIInstrInfo::getGenericInstructionUniformity(MI); 919206c3fb27SDimitry Andric 9193bdd1243dSDimitry Andric // Atomics are divergent because they are executed sequentially: when an 9194bdd1243dSDimitry Andric // atomic operation refers to the same address in each thread, then each 9195bdd1243dSDimitry Andric // thread after the first sees the value written by the previous thread as 9196bdd1243dSDimitry Andric // original value. 9197bdd1243dSDimitry Andric 9198bdd1243dSDimitry Andric if (isAtomic(MI)) 9199bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9200bdd1243dSDimitry Andric 9201bdd1243dSDimitry Andric // Loads from the private and flat address spaces are divergent, because 9202bdd1243dSDimitry Andric // threads can execute the load instruction with the same inputs and get 9203bdd1243dSDimitry Andric // different results. 9204bdd1243dSDimitry Andric if (isFLAT(MI) && MI.mayLoad()) { 9205bdd1243dSDimitry Andric if (MI.memoperands_empty()) 9206bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; // conservative assumption 9207bdd1243dSDimitry Andric 9208bdd1243dSDimitry Andric if (llvm::any_of(MI.memoperands(), [](const MachineMemOperand *mmo) { 9209bdd1243dSDimitry Andric return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS || 9210bdd1243dSDimitry Andric mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS; 9211bdd1243dSDimitry Andric })) { 9212bdd1243dSDimitry Andric // At least one MMO in a non-global address space. 9213bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9214bdd1243dSDimitry Andric } 9215bdd1243dSDimitry Andric 9216bdd1243dSDimitry Andric return InstructionUniformity::Default; 9217bdd1243dSDimitry Andric } 9218bdd1243dSDimitry Andric 9219bdd1243dSDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 922006c3fb27SDimitry Andric const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo(); 922106c3fb27SDimitry Andric 922206c3fb27SDimitry Andric // FIXME: It's conceptually broken to report this for an instruction, and not 922306c3fb27SDimitry Andric // a specific def operand. For inline asm in particular, there could be mixed 922406c3fb27SDimitry Andric // uniform and divergent results. 922506c3fb27SDimitry Andric for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { 922606c3fb27SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(I); 922706c3fb27SDimitry Andric if (!SrcOp.isReg()) 9228bdd1243dSDimitry Andric continue; 9229bdd1243dSDimitry Andric 923006c3fb27SDimitry Andric Register Reg = SrcOp.getReg(); 923106c3fb27SDimitry Andric if (!Reg || !SrcOp.readsReg()) 923206c3fb27SDimitry Andric continue; 9233bdd1243dSDimitry Andric 923406c3fb27SDimitry Andric // If RegBank is null, this is unassigned or an unallocatable special 923506c3fb27SDimitry Andric // register, which are all scalars. 923606c3fb27SDimitry Andric const RegisterBank *RegBank = RBI->getRegBank(Reg, MRI, RI); 923706c3fb27SDimitry Andric if (RegBank && RegBank->getID() != AMDGPU::SGPRRegBankID) 9238bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9239bdd1243dSDimitry Andric } 9240bdd1243dSDimitry Andric 9241bdd1243dSDimitry Andric // TODO: Uniformity check condtions above can be rearranged for more 9242bdd1243dSDimitry Andric // redability 9243bdd1243dSDimitry Andric 9244bdd1243dSDimitry Andric // TODO: amdgcn.{ballot, [if]cmp} should be AlwaysUniform, but they are 9245bdd1243dSDimitry Andric // currently turned into no-op COPYs by SelectionDAG ISel and are 9246bdd1243dSDimitry Andric // therefore no longer recognizable. 9247bdd1243dSDimitry Andric 9248bdd1243dSDimitry Andric return InstructionUniformity::Default; 9249bdd1243dSDimitry Andric } 9250bdd1243dSDimitry Andric 9251e8d8bef9SDimitry Andric unsigned SIInstrInfo::getDSShaderTypeValue(const MachineFunction &MF) { 9252e8d8bef9SDimitry Andric switch (MF.getFunction().getCallingConv()) { 9253e8d8bef9SDimitry Andric case CallingConv::AMDGPU_PS: 9254e8d8bef9SDimitry Andric return 1; 9255e8d8bef9SDimitry Andric case CallingConv::AMDGPU_VS: 9256e8d8bef9SDimitry Andric return 2; 9257e8d8bef9SDimitry Andric case CallingConv::AMDGPU_GS: 9258e8d8bef9SDimitry Andric return 3; 9259e8d8bef9SDimitry Andric case CallingConv::AMDGPU_HS: 9260e8d8bef9SDimitry Andric case CallingConv::AMDGPU_LS: 9261e8d8bef9SDimitry Andric case CallingConv::AMDGPU_ES: 9262e8d8bef9SDimitry Andric report_fatal_error("ds_ordered_count unsupported for this calling conv"); 9263e8d8bef9SDimitry Andric case CallingConv::AMDGPU_CS: 9264e8d8bef9SDimitry Andric case CallingConv::AMDGPU_KERNEL: 9265e8d8bef9SDimitry Andric case CallingConv::C: 9266e8d8bef9SDimitry Andric case CallingConv::Fast: 9267e8d8bef9SDimitry Andric default: 9268e8d8bef9SDimitry Andric // Assume other calling conventions are various compute callable functions 9269e8d8bef9SDimitry Andric return 0; 9270e8d8bef9SDimitry Andric } 9271e8d8bef9SDimitry Andric } 9272349cc55cSDimitry Andric 9273349cc55cSDimitry Andric bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, 9274349cc55cSDimitry Andric Register &SrcReg2, int64_t &CmpMask, 9275349cc55cSDimitry Andric int64_t &CmpValue) const { 9276349cc55cSDimitry Andric if (!MI.getOperand(0).isReg() || MI.getOperand(0).getSubReg()) 9277349cc55cSDimitry Andric return false; 9278349cc55cSDimitry Andric 9279349cc55cSDimitry Andric switch (MI.getOpcode()) { 9280349cc55cSDimitry Andric default: 9281349cc55cSDimitry Andric break; 9282349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U32: 9283349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_I32: 9284349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U32: 9285349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_I32: 9286349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_U32: 9287349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_I32: 9288349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_U32: 9289349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_I32: 9290349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_U32: 9291349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_I32: 9292349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_U32: 9293349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_I32: 9294349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U64: 9295349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U64: 9296349cc55cSDimitry Andric SrcReg = MI.getOperand(0).getReg(); 9297349cc55cSDimitry Andric if (MI.getOperand(1).isReg()) { 9298349cc55cSDimitry Andric if (MI.getOperand(1).getSubReg()) 9299349cc55cSDimitry Andric return false; 9300349cc55cSDimitry Andric SrcReg2 = MI.getOperand(1).getReg(); 9301349cc55cSDimitry Andric CmpValue = 0; 9302349cc55cSDimitry Andric } else if (MI.getOperand(1).isImm()) { 9303349cc55cSDimitry Andric SrcReg2 = Register(); 9304349cc55cSDimitry Andric CmpValue = MI.getOperand(1).getImm(); 9305349cc55cSDimitry Andric } else { 9306349cc55cSDimitry Andric return false; 9307349cc55cSDimitry Andric } 9308349cc55cSDimitry Andric CmpMask = ~0; 9309349cc55cSDimitry Andric return true; 9310349cc55cSDimitry Andric case AMDGPU::S_CMPK_EQ_U32: 9311349cc55cSDimitry Andric case AMDGPU::S_CMPK_EQ_I32: 9312349cc55cSDimitry Andric case AMDGPU::S_CMPK_LG_U32: 9313349cc55cSDimitry Andric case AMDGPU::S_CMPK_LG_I32: 9314349cc55cSDimitry Andric case AMDGPU::S_CMPK_LT_U32: 9315349cc55cSDimitry Andric case AMDGPU::S_CMPK_LT_I32: 9316349cc55cSDimitry Andric case AMDGPU::S_CMPK_GT_U32: 9317349cc55cSDimitry Andric case AMDGPU::S_CMPK_GT_I32: 9318349cc55cSDimitry Andric case AMDGPU::S_CMPK_LE_U32: 9319349cc55cSDimitry Andric case AMDGPU::S_CMPK_LE_I32: 9320349cc55cSDimitry Andric case AMDGPU::S_CMPK_GE_U32: 9321349cc55cSDimitry Andric case AMDGPU::S_CMPK_GE_I32: 9322349cc55cSDimitry Andric SrcReg = MI.getOperand(0).getReg(); 9323349cc55cSDimitry Andric SrcReg2 = Register(); 9324349cc55cSDimitry Andric CmpValue = MI.getOperand(1).getImm(); 9325349cc55cSDimitry Andric CmpMask = ~0; 9326349cc55cSDimitry Andric return true; 9327349cc55cSDimitry Andric } 9328349cc55cSDimitry Andric 9329349cc55cSDimitry Andric return false; 9330349cc55cSDimitry Andric } 9331349cc55cSDimitry Andric 9332349cc55cSDimitry Andric bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 9333349cc55cSDimitry Andric Register SrcReg2, int64_t CmpMask, 9334349cc55cSDimitry Andric int64_t CmpValue, 9335349cc55cSDimitry Andric const MachineRegisterInfo *MRI) const { 9336349cc55cSDimitry Andric if (!SrcReg || SrcReg.isPhysical()) 9337349cc55cSDimitry Andric return false; 9338349cc55cSDimitry Andric 9339349cc55cSDimitry Andric if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue)) 9340349cc55cSDimitry Andric return false; 9341349cc55cSDimitry Andric 9342349cc55cSDimitry Andric const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, 9343349cc55cSDimitry Andric this](int64_t ExpectedValue, unsigned SrcSize, 934481ad6265SDimitry Andric bool IsReversible, bool IsSigned) -> bool { 9345349cc55cSDimitry Andric // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n 9346349cc55cSDimitry Andric // s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n 9347349cc55cSDimitry Andric // s_cmp_ge_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n 9348349cc55cSDimitry Andric // s_cmp_ge_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n 9349349cc55cSDimitry Andric // s_cmp_eq_u64 (s_and_b64 $src, 1 << n), 1 << n => s_and_b64 $src, 1 << n 9350349cc55cSDimitry Andric // s_cmp_lg_u32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n 9351349cc55cSDimitry Andric // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n 9352349cc55cSDimitry Andric // s_cmp_gt_u32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n 9353349cc55cSDimitry Andric // s_cmp_gt_i32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n 9354349cc55cSDimitry Andric // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 0 => s_and_b64 $src, 1 << n 9355349cc55cSDimitry Andric // 9356349cc55cSDimitry Andric // Signed ge/gt are not used for the sign bit. 9357349cc55cSDimitry Andric // 9358349cc55cSDimitry Andric // If result of the AND is unused except in the compare: 9359349cc55cSDimitry Andric // s_and_b(32|64) $src, 1 << n => s_bitcmp1_b(32|64) $src, n 9360349cc55cSDimitry Andric // 9361349cc55cSDimitry Andric // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 0 => s_bitcmp0_b32 $src, n 9362349cc55cSDimitry Andric // s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 0 => s_bitcmp0_b32 $src, n 9363349cc55cSDimitry Andric // s_cmp_eq_u64 (s_and_b64 $src, 1 << n), 0 => s_bitcmp0_b64 $src, n 9364349cc55cSDimitry Andric // s_cmp_lg_u32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n 9365349cc55cSDimitry Andric // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n 9366349cc55cSDimitry Andric // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 1 << n => s_bitcmp0_b64 $src, n 9367349cc55cSDimitry Andric 9368349cc55cSDimitry Andric MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg); 9369349cc55cSDimitry Andric if (!Def || Def->getParent() != CmpInstr.getParent()) 9370349cc55cSDimitry Andric return false; 9371349cc55cSDimitry Andric 9372349cc55cSDimitry Andric if (Def->getOpcode() != AMDGPU::S_AND_B32 && 9373349cc55cSDimitry Andric Def->getOpcode() != AMDGPU::S_AND_B64) 9374349cc55cSDimitry Andric return false; 9375349cc55cSDimitry Andric 9376349cc55cSDimitry Andric int64_t Mask; 9377349cc55cSDimitry Andric const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool { 9378349cc55cSDimitry Andric if (MO->isImm()) 9379349cc55cSDimitry Andric Mask = MO->getImm(); 9380349cc55cSDimitry Andric else if (!getFoldableImm(MO, Mask)) 9381349cc55cSDimitry Andric return false; 9382349cc55cSDimitry Andric Mask &= maxUIntN(SrcSize); 9383349cc55cSDimitry Andric return isPowerOf2_64(Mask); 9384349cc55cSDimitry Andric }; 9385349cc55cSDimitry Andric 9386349cc55cSDimitry Andric MachineOperand *SrcOp = &Def->getOperand(1); 9387349cc55cSDimitry Andric if (isMask(SrcOp)) 9388349cc55cSDimitry Andric SrcOp = &Def->getOperand(2); 9389349cc55cSDimitry Andric else if (isMask(&Def->getOperand(2))) 9390349cc55cSDimitry Andric SrcOp = &Def->getOperand(1); 9391349cc55cSDimitry Andric else 9392349cc55cSDimitry Andric return false; 9393349cc55cSDimitry Andric 939406c3fb27SDimitry Andric unsigned BitNo = llvm::countr_zero((uint64_t)Mask); 9395349cc55cSDimitry Andric if (IsSigned && BitNo == SrcSize - 1) 9396349cc55cSDimitry Andric return false; 9397349cc55cSDimitry Andric 9398349cc55cSDimitry Andric ExpectedValue <<= BitNo; 9399349cc55cSDimitry Andric 9400349cc55cSDimitry Andric bool IsReversedCC = false; 9401349cc55cSDimitry Andric if (CmpValue != ExpectedValue) { 940281ad6265SDimitry Andric if (!IsReversible) 9403349cc55cSDimitry Andric return false; 9404349cc55cSDimitry Andric IsReversedCC = CmpValue == (ExpectedValue ^ Mask); 9405349cc55cSDimitry Andric if (!IsReversedCC) 9406349cc55cSDimitry Andric return false; 9407349cc55cSDimitry Andric } 9408349cc55cSDimitry Andric 9409349cc55cSDimitry Andric Register DefReg = Def->getOperand(0).getReg(); 9410349cc55cSDimitry Andric if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg)) 9411349cc55cSDimitry Andric return false; 9412349cc55cSDimitry Andric 9413349cc55cSDimitry Andric for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator(); 9414349cc55cSDimitry Andric I != E; ++I) { 9415349cc55cSDimitry Andric if (I->modifiesRegister(AMDGPU::SCC, &RI) || 9416349cc55cSDimitry Andric I->killsRegister(AMDGPU::SCC, &RI)) 9417349cc55cSDimitry Andric return false; 9418349cc55cSDimitry Andric } 9419349cc55cSDimitry Andric 9420349cc55cSDimitry Andric MachineOperand *SccDef = Def->findRegisterDefOperand(AMDGPU::SCC); 9421349cc55cSDimitry Andric SccDef->setIsDead(false); 9422349cc55cSDimitry Andric CmpInstr.eraseFromParent(); 9423349cc55cSDimitry Andric 9424349cc55cSDimitry Andric if (!MRI->use_nodbg_empty(DefReg)) { 9425349cc55cSDimitry Andric assert(!IsReversedCC); 9426349cc55cSDimitry Andric return true; 9427349cc55cSDimitry Andric } 9428349cc55cSDimitry Andric 9429349cc55cSDimitry Andric // Replace AND with unused result with a S_BITCMP. 9430349cc55cSDimitry Andric MachineBasicBlock *MBB = Def->getParent(); 9431349cc55cSDimitry Andric 9432349cc55cSDimitry Andric unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32 9433349cc55cSDimitry Andric : AMDGPU::S_BITCMP1_B32 9434349cc55cSDimitry Andric : IsReversedCC ? AMDGPU::S_BITCMP0_B64 9435349cc55cSDimitry Andric : AMDGPU::S_BITCMP1_B64; 9436349cc55cSDimitry Andric 9437349cc55cSDimitry Andric BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc)) 9438349cc55cSDimitry Andric .add(*SrcOp) 9439349cc55cSDimitry Andric .addImm(BitNo); 9440349cc55cSDimitry Andric Def->eraseFromParent(); 9441349cc55cSDimitry Andric 9442349cc55cSDimitry Andric return true; 9443349cc55cSDimitry Andric }; 9444349cc55cSDimitry Andric 9445349cc55cSDimitry Andric switch (CmpInstr.getOpcode()) { 9446349cc55cSDimitry Andric default: 9447349cc55cSDimitry Andric break; 9448349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U32: 9449349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_I32: 9450349cc55cSDimitry Andric case AMDGPU::S_CMPK_EQ_U32: 9451349cc55cSDimitry Andric case AMDGPU::S_CMPK_EQ_I32: 9452349cc55cSDimitry Andric return optimizeCmpAnd(1, 32, true, false); 9453349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_U32: 9454349cc55cSDimitry Andric case AMDGPU::S_CMPK_GE_U32: 9455349cc55cSDimitry Andric return optimizeCmpAnd(1, 32, false, false); 9456349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_I32: 9457349cc55cSDimitry Andric case AMDGPU::S_CMPK_GE_I32: 9458349cc55cSDimitry Andric return optimizeCmpAnd(1, 32, false, true); 9459349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U64: 9460349cc55cSDimitry Andric return optimizeCmpAnd(1, 64, true, false); 9461349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U32: 9462349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_I32: 9463349cc55cSDimitry Andric case AMDGPU::S_CMPK_LG_U32: 9464349cc55cSDimitry Andric case AMDGPU::S_CMPK_LG_I32: 9465349cc55cSDimitry Andric return optimizeCmpAnd(0, 32, true, false); 9466349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_U32: 9467349cc55cSDimitry Andric case AMDGPU::S_CMPK_GT_U32: 9468349cc55cSDimitry Andric return optimizeCmpAnd(0, 32, false, false); 9469349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_I32: 9470349cc55cSDimitry Andric case AMDGPU::S_CMPK_GT_I32: 9471349cc55cSDimitry Andric return optimizeCmpAnd(0, 32, false, true); 9472349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U64: 9473349cc55cSDimitry Andric return optimizeCmpAnd(0, 64, true, false); 9474349cc55cSDimitry Andric } 9475349cc55cSDimitry Andric 9476349cc55cSDimitry Andric return false; 9477349cc55cSDimitry Andric } 947881ad6265SDimitry Andric 947981ad6265SDimitry Andric void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI, 948081ad6265SDimitry Andric unsigned OpName) const { 948181ad6265SDimitry Andric if (!ST.needsAlignedVGPRs()) 948281ad6265SDimitry Andric return; 948381ad6265SDimitry Andric 948481ad6265SDimitry Andric int OpNo = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 948581ad6265SDimitry Andric if (OpNo < 0) 948681ad6265SDimitry Andric return; 948781ad6265SDimitry Andric MachineOperand &Op = MI.getOperand(OpNo); 948881ad6265SDimitry Andric if (getOpSize(MI, OpNo) > 4) 948981ad6265SDimitry Andric return; 949081ad6265SDimitry Andric 949181ad6265SDimitry Andric // Add implicit aligned super-reg to force alignment on the data operand. 949281ad6265SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 949381ad6265SDimitry Andric MachineBasicBlock *BB = MI.getParent(); 949481ad6265SDimitry Andric MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 949581ad6265SDimitry Andric Register DataReg = Op.getReg(); 949681ad6265SDimitry Andric bool IsAGPR = RI.isAGPR(MRI, DataReg); 949781ad6265SDimitry Andric Register Undef = MRI.createVirtualRegister( 949881ad6265SDimitry Andric IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass); 949981ad6265SDimitry Andric BuildMI(*BB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef); 950081ad6265SDimitry Andric Register NewVR = 950181ad6265SDimitry Andric MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass 950281ad6265SDimitry Andric : &AMDGPU::VReg_64_Align2RegClass); 950381ad6265SDimitry Andric BuildMI(*BB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewVR) 950481ad6265SDimitry Andric .addReg(DataReg, 0, Op.getSubReg()) 950581ad6265SDimitry Andric .addImm(AMDGPU::sub0) 950681ad6265SDimitry Andric .addReg(Undef) 950781ad6265SDimitry Andric .addImm(AMDGPU::sub1); 950881ad6265SDimitry Andric Op.setReg(NewVR); 950981ad6265SDimitry Andric Op.setSubReg(AMDGPU::sub0); 951081ad6265SDimitry Andric MI.addOperand(MachineOperand::CreateReg(NewVR, false, true)); 951181ad6265SDimitry Andric } 9512