Lines Matching +full:mii +full:- +full:conv
1 //===- SIInstrInfo.cpp - SI Instruction Information ----------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
38 #define DEBUG_TYPE "si-instr-info"
54 BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
58 "amdgpu-fix-16-bit-physreg-copies",
69 //===----------------------------------------------------------------------===//
71 //===----------------------------------------------------------------------===//
74 unsigned N = Node->getNumOperands(); in getNumOperandsNoGlue()
75 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) in getNumOperandsNoGlue()
76 --N; in getNumOperandsNoGlue()
83 unsigned Opc0 = N0->getMachineOpcode(); in nodesHaveSameOperandValue()
84 unsigned Opc1 = N1->getMachineOpcode(); in nodesHaveSameOperandValue()
89 if (Op0Idx == -1 && Op1Idx == -1) in nodesHaveSameOperandValue()
93 if ((Op0Idx == -1 && Op1Idx != -1) || in nodesHaveSameOperandValue()
94 (Op1Idx == -1 && Op0Idx != -1)) in nodesHaveSameOperandValue()
101 --Op0Idx; in nodesHaveSameOperandValue()
102 --Op1Idx; in nodesHaveSameOperandValue()
104 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); in nodesHaveSameOperandValue()
117 return MMO->isLoad() && MMO->isInvariant(); in canRemat()
153 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); in resultDependsOnExec()
197 MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); in isSafeToSink()
205 MachineCycle *FromCycle = CI->getCycle(SgprDef->getParent()); in isSafeToSink()
209 MachineCycle *ToCycle = CI->getCycle(SuccToSinkTo); in isSafeToSink()
212 while (FromCycle && !FromCycle->contains(ToCycle)) { in isSafeToSink()
214 FromCycle->getExitingBlocks(ExitingBlocks); in isSafeToSink()
222 FromCycle = FromCycle->getParentCycle(); in isSafeToSink()
233 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) in areLoadsFromSameBasePtr()
236 unsigned Opc0 = Load0->getMachineOpcode(); in areLoadsFromSameBasePtr()
237 unsigned Opc1 = Load1->getMachineOpcode(); in areLoadsFromSameBasePtr()
254 if (Load0->getOperand(0) != Load1->getOperand(0)) in areLoadsFromSameBasePtr()
262 if (Offset0Idx == -1 || Offset1Idx == -1) in areLoadsFromSameBasePtr()
265 // XXX - be careful of dataless loads in areLoadsFromSameBasePtr()
269 Offset0Idx -= get(Opc0).NumDefs; in areLoadsFromSameBasePtr()
270 Offset1Idx -= get(Opc1).NumDefs; in areLoadsFromSameBasePtr()
271 Offset0 = Load0->getConstantOperandVal(Offset0Idx); in areLoadsFromSameBasePtr()
272 Offset1 = Load1->getConstantOperandVal(Offset1Idx); in areLoadsFromSameBasePtr()
287 if (Load0->getOperand(0) != Load1->getOperand(0)) in areLoadsFromSameBasePtr()
292 if (NumOps == 5 && Load0->getOperand(1) != Load1->getOperand(1)) in areLoadsFromSameBasePtr()
296 dyn_cast<ConstantSDNode>(Load0->getOperand(NumOps - 3)); in areLoadsFromSameBasePtr()
298 dyn_cast<ConstantSDNode>(Load1->getOperand(NumOps - 3)); in areLoadsFromSameBasePtr()
303 Offset0 = Load0Offset->getZExtValue(); in areLoadsFromSameBasePtr()
304 Offset1 = Load1Offset->getZExtValue(); in areLoadsFromSameBasePtr()
320 if (OffIdx0 == -1 || OffIdx1 == -1) in areLoadsFromSameBasePtr()
326 OffIdx0 -= get(Opc0).NumDefs; in areLoadsFromSameBasePtr()
327 OffIdx1 -= get(Opc1).NumDefs; in areLoadsFromSameBasePtr()
329 SDValue Off0 = Load0->getOperand(OffIdx0); in areLoadsFromSameBasePtr()
330 SDValue Off1 = Load1->getOperand(OffIdx1); in areLoadsFromSameBasePtr()
336 Offset0 = Off0->getAsZExtVal(); in areLoadsFromSameBasePtr()
337 Offset1 = Off1->getAsZExtVal(); in areLoadsFromSameBasePtr()
379 Offset = OffsetOp->getImm(); in getMemOperandsWithOffsetWidth()
382 if (DataOpIdx == -1) in getMemOperandsWithOffsetWidth()
394 unsigned Offset0 = Offset0Op->getImm() & 0xff; in getMemOperandsWithOffsetWidth()
395 unsigned Offset1 = Offset1Op->getImm() & 0xff; in getMemOperandsWithOffsetWidth()
404 EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; in getMemOperandsWithOffsetWidth()
408 EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; in getMemOperandsWithOffsetWidth()
418 if (DataOpIdx == -1) { in getMemOperandsWithOffsetWidth()
436 if (BaseOp && !BaseOp->isFI()) in getMemOperandsWithOffsetWidth()
440 Offset = OffsetImm->getImm(); in getMemOperandsWithOffsetWidth()
444 if (SOffset->isReg()) in getMemOperandsWithOffsetWidth()
447 Offset += SOffset->getImm(); in getMemOperandsWithOffsetWidth()
451 if (DataOpIdx == -1) in getMemOperandsWithOffsetWidth()
453 if (DataOpIdx == -1) // LDS DMA in getMemOperandsWithOffsetWidth()
475 if (DataOpIdx == -1) in getMemOperandsWithOffsetWidth()
487 Offset = OffsetOp ? OffsetOp->getImm() : 0; in getMemOperandsWithOffsetWidth()
490 if (DataOpIdx == -1) in getMemOperandsWithOffsetWidth()
504 Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); in getMemOperandsWithOffsetWidth()
507 if (DataOpIdx == -1) in getMemOperandsWithOffsetWidth()
509 if (DataOpIdx == -1) // LDS DMA in getMemOperandsWithOffsetWidth()
525 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) in memOpsHaveSameBasePtr()
533 if (MO1->getAddrSpace() != MO2->getAddrSpace()) in memOpsHaveSameBasePtr()
536 auto Base1 = MO1->getValue(); in memOpsHaveSameBasePtr()
537 auto Base2 = MO2->getValue(); in memOpsHaveSameBasePtr()
558 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); in shouldClusterMemOps()
559 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); in shouldClusterMemOps()
571 // The good thing about this heuristic is - it avoids clustering of too many in shouldClusterMemOps()
572 // sub-word loads, and also avoids clustering of wide loads. Below is the in shouldClusterMemOps()
603 return (NumLoads <= 16 && (Offset1 - Offset0) < 64); in shouldScheduleLoadsNear()
612 DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error); in reportIllegalCopy()
613 LLVMContext &C = MF->getFunction().getContext(); in reportIllegalCopy()
616 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg) in reportIllegalCopy()
647 // an accvgpr_write used for this same copy due to implicit-defs in indirectCopyToAGPR()
650 --Def; in indirectCopyToAGPR()
652 if (!Def->modifiesRegister(SrcReg, &RI)) in indirectCopyToAGPR()
655 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 || in indirectCopyToAGPR()
656 Def->getOperand(0).getReg() != SrcReg) in indirectCopyToAGPR()
659 MachineOperand &DefOp = Def->getOperand(1); in indirectCopyToAGPR()
667 if (I->modifiesRegister(DefOp.getReg(), &RI)) in indirectCopyToAGPR()
700 // use register number to pick one of three round-robin temps. in indirectCopyToAGPR()
701 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3; in indirectCopyToAGPR()
703 MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getVGPRForAGPRCopy(); in indirectCopyToAGPR()
704 assert(MBB.getParent()->getRegInfo().isReserved(Tmp) && in indirectCopyToAGPR()
709 while (RegNo--) { in indirectCopyToAGPR()
759 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0; in expandSGPRCopy()
760 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0; in expandSGPRCopy()
780 I--; in expandSGPRCopy()
787 FirstMI->addOperand( in expandSGPRCopy()
791 LastMI->addRegisterKilled(SrcReg, &RI); in expandSGPRCopy()
804 // TODO-GFX11_16BIT If all true 16 bit instruction patterns are completed can in copyPhysReg()
808 // Non-VGPR Src and Dst will later be expanded back to 32 bits. in copyPhysReg()
906 // Copying 64-bit or 32-bit sources to SCC barely makes sense, in copyPhysReg()
1024 MIB->tieOperands(0, MIB->getNumOperands() - 1); in copyPhysReg()
1077 // TODO: In 96-bit case, could do a 64-bit mov and then a 32-bit mov. in copyPhysReg()
1090 // FIXME: The pass should maintain this for us so we don't have to re-scan the in copyPhysReg()
1098 // If there is an overlap, we can't kill the super-register on the last in copyPhysReg()
1108 SubIdx = SubIndices[SubIndices.size() - Idx - 1]; in copyPhysReg()
1114 bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1; in copyPhysReg()
1152 if (NewOpc != -1) in commuteOpcode()
1154 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; in commuteOpcode()
1158 if (NewOpc != -1) in commuteOpcode()
1159 // Check if the original (non-REV) opcode exists on the target. in commuteOpcode()
1160 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; in commuteOpcode()
1169 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in materializeImmediate()
1193 if (RegClass->hasSuperClassEq(&AMDGPU::VReg_64RegClass)) { in materializeImmediate()
1232 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in insertVectorSelect()
1356 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); in insertEQ()
1369 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); in insertNE()
1689 // Currently, there is only 32-bit WWM register spills needed. in getWWMRegSpillSaveOpcode()
1722 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); in storeRegToStackSlot()
1723 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); in storeRegToStackSlot()
1728 MachineMemOperand *MMO = MF->getMachineMemOperand( in storeRegToStackSlot()
1731 unsigned SpillSize = TRI->getSpillSize(*RC); in storeRegToStackSlot()
1733 MachineRegisterInfo &MRI = MF->getRegInfo(); in storeRegToStackSlot()
1735 MFI->setHasSpilledSGPRs(); in storeRegToStackSlot()
1754 .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); in storeRegToStackSlot()
1763 MFI->setHasSpilledVGPRs(); in storeRegToStackSlot()
1768 .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset in storeRegToStackSlot()
1915 // Currently, there is only 32-bit WWM register spills needed. in getWWMRegSpillRestoreOpcode()
1949 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); in loadRegFromStackSlot()
1950 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); in loadRegFromStackSlot()
1952 unsigned SpillSize = TRI->getSpillSize(*RC); in loadRegFromStackSlot()
1957 MachineMemOperand *MMO = MF->getMachineMemOperand( in loadRegFromStackSlot()
1962 MFI->setHasSpilledSGPRs(); in loadRegFromStackSlot()
1968 // lowered to non-memory instructions. in loadRegFromStackSlot()
1971 MachineRegisterInfo &MRI = MF->getRegInfo(); in loadRegFromStackSlot()
1980 .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); in loadRegFromStackSlot()
1989 .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset in loadRegFromStackSlot()
2005 Quantity -= Arg; in insertNoops()
2006 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg - 1); in insertNoops()
2012 SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); in insertReturn()
2014 assert(Info->isEntryFunction()); in insertReturn()
2019 if (Info->returnsVoid()) { in insertReturn()
2038 MachineBasicBlock *HaltLoopBB = MF->CreateMachineBasicBlock(); in insertSimulatedTrap()
2042 TrapBB = MF->CreateMachineBasicBlock(); in insertSimulatedTrap()
2044 MF->push_back(TrapBB); in insertSimulatedTrap()
2050 BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_TRAP)) in insertSimulatedTrap()
2053 BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_SENDMSG_RTN_B32), in insertSimulatedTrap()
2056 BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2) in insertSimulatedTrap()
2060 BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_AND_B32), DoorbellRegMasked) in insertSimulatedTrap()
2065 BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_OR_B32), SetWaveAbortBit) in insertSimulatedTrap()
2068 BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_MOV_B32), AMDGPU::M0) in insertSimulatedTrap()
2070 BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_SENDMSG)) in insertSimulatedTrap()
2072 BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_MOV_B32), AMDGPU::M0) in insertSimulatedTrap()
2074 BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_BRANCH)).addMBB(HaltLoopBB); in insertSimulatedTrap()
2075 TrapBB->addSuccessor(HaltLoopBB); in insertSimulatedTrap()
2077 BuildMI(*HaltLoopBB, HaltLoopBB->end(), DL, get(AMDGPU::S_SETHALT)).addImm(5); in insertSimulatedTrap()
2078 BuildMI(*HaltLoopBB, HaltLoopBB->end(), DL, get(AMDGPU::S_BRANCH)) in insertSimulatedTrap()
2080 MF->push_back(HaltLoopBB); in insertSimulatedTrap()
2081 HaltLoopBB->addSuccessor(HaltLoopBB); in insertSimulatedTrap()
2190 // FIXME: Will this work for 64-bit floating point immediates? in expandPostRAPseudo()
2224 !RI.isAGPR(MBB.getParent()->getRegInfo(), SrcOp.getReg())) { in expandPostRAPseudo()
2283 FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten in expandPostRAPseudo()
2299 FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten in expandPostRAPseudo()
2364 MIB->tieOperands(ImpDefIdx, ImpUseIdx); in expandPostRAPseudo()
2389 SetOn->getOperand(3).setIsUndef(); in expandPostRAPseudo()
2403 MIB->tieOperands(ImpDefIdx, ImpUseIdx); in expandPostRAPseudo()
2407 finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); in expandPostRAPseudo()
2434 SetOn->getOperand(3).setIsUndef(); in expandPostRAPseudo()
2443 finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); in expandPostRAPseudo()
2456 // Create a bundle so these instructions won't be re-ordered by the in expandPostRAPseudo()
2457 // post-RA scheduler. in expandPostRAPseudo()
2472 // Fix up hardware that does not sign-extend the 48-bit PC value by in expandPostRAPseudo()
2523 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); in expandPostRAPseudo()
2526 // the function and missing live-ins. We are fine in practice because callee in expandPostRAPseudo()
2532 .addReg(TRI->getReturnAddressReg(*MF), RegState::Undef); in expandPostRAPseudo()
2549 // Fix up hardware that does not sign-extend the 48-bit PC value by in expandPostRAPseudo()
2578 if (I->isBundled()) in reMaterialize()
2584 for (auto &CandMO : I->operands()) { in reMaterialize()
2593 if (!UseMO || UseMO->getSubReg() == AMDGPU::NoSubRegister) in reMaterialize()
2596 unsigned Offset = RI.getSubRegIdxOffset(UseMO->getSubReg()); in reMaterialize()
2597 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->getSubReg()); in reMaterialize()
2600 MachineRegisterInfo &MRI = MF->getRegInfo(); in reMaterialize()
2603 unsigned NewOpcode = -1; in reMaterialize()
2616 UseMO->setReg(DestReg); in reMaterialize()
2617 UseMO->setSubReg(AMDGPU::NoSubRegister); in reMaterialize()
2620 MachineInstr *MI = MF->CloneMachineInstr(&Orig); in reMaterialize()
2621 MI->setDesc(TID); in reMaterialize()
2622 MI->getOperand(0).setReg(DestReg); in reMaterialize()
2623 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister); in reMaterialize()
2626 int64_t FinalOffset = OffsetMO->getImm() + Offset / 8; in reMaterialize()
2627 OffsetMO->setImm(FinalOffset); in reMaterialize()
2631 NewMMOs.push_back(MF->getMachineMemOperand(MemOp, MemOp->getPointerInfo(), in reMaterialize()
2633 MI->setMemRefs(*MF, NewMMOs); in reMaterialize()
2652 getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) { in expandMovDPP64()
2660 MachineRegisterInfo &MRI = MF->getRegInfo(); in expandMovDPP64()
2701 .addReg(Split[0]->getOperand(0).getReg()) in expandMovDPP64()
2703 .addReg(Split[1]->getOperand(0).getReg()) in expandMovDPP64()
2731 int Src0ModsVal = Src0Mods->getImm(); in swapSourceModifiers()
2732 int Src1ModsVal = Src1Mods->getImm(); in swapSourceModifiers()
2734 Src1Mods->setImm(Src0ModsVal); in swapSourceModifiers()
2735 Src0Mods->setImm(Src1ModsVal); in swapSourceModifiers()
2775 if (CommutedOpcode == -1) in commuteInstructionImpl()
2814 CommutedMI->setDesc(get(CommutedOpcode)); in commuteInstructionImpl()
2837 if (Src0Idx == -1) in findCommutedOpIndices()
2841 if (Src1Idx == -1) in findCommutedOpIndices()
2858 BrOffset -= 1; in isBranchOffsetInRange()
2869 for (const MachineInstr &MI : MBB->terminators()) { in hasDivergentBranch()
2891 MachineRegisterInfo &MRI = MF->getRegInfo(); in insertIndirectBranch()
2892 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); in insertIndirectBranch()
2904 auto &MCCtx = MF->getContext(); in insertIndirectBranch()
2907 GetPC->setPostInstrSymbol(*MF, PostGetPCLabel); in insertIndirectBranch()
2956 Register LongBranchReservedReg = MFI->getLongBranchReservedReg(); in insertIndirectBranch()
2962 RS->enterBasicBlock(MBB); in insertIndirectBranch()
2965 RS->enterBasicBlockEnd(MBB); in insertIndirectBranch()
2966 Scav = RS->scavengeRegisterBackwards( in insertIndirectBranch()
2971 RS->setRegUsed(Scav); in insertIndirectBranch()
2977 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); in insertIndirectBranch()
2979 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS); in insertIndirectBranch()
2991 OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); in insertIndirectBranch()
2993 OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); in insertIndirectBranch()
3040 if (I->getOpcode() == AMDGPU::S_BRANCH) { in analyzeBranchImpl()
3042 TBB = I->getOperand(0).getMBB(); in analyzeBranchImpl()
3048 if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { in analyzeBranchImpl()
3049 CondBB = I->getOperand(1).getMBB(); in analyzeBranchImpl()
3050 Cond.push_back(I->getOperand(0)); in analyzeBranchImpl()
3052 BranchPredicate Pred = getBranchPredicate(I->getOpcode()); in analyzeBranchImpl()
3056 CondBB = I->getOperand(0).getMBB(); in analyzeBranchImpl()
3058 Cond.push_back(I->getOperand(1)); // Save the branch register. in analyzeBranchImpl()
3063 // Conditional branch followed by fall-through. in analyzeBranchImpl()
3068 if (I->getOpcode() == AMDGPU::S_BRANCH) { in analyzeBranchImpl()
3070 FBB = I->getOperand(0).getMBB(); in analyzeBranchImpl()
3088 while (I != E && !I->isBranch() && !I->isReturn()) { in analyzeBranch()
3089 switch (I->getOpcode()) { in analyzeBranch()
3110 llvm_unreachable("unexpected non-branch terminator inst"); in analyzeBranch()
3180 preserveCondRegFlags(CondBr->getOperand(1), Cond[1]); in insertBranch()
3197 MachineOperand &CondReg = CondBr->getOperand(1); in insertBranch()
3214 Cond[0].setImm(-Cond[0].getImm()); in reverseBranchCondition()
3229 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in canInsertSelect()
3244 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in canInsertSelect()
3269 Pred = static_cast<BranchPredicate>(-Pred); in insertSelect()
3273 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in insertSelect()
3290 preserveCondRegFlags(Select->getOperand(3), Cond[1]); in insertSelect()
3300 preserveCondRegFlags(Select->getOperand(3), Cond[1]); in insertSelect()
3323 // 64-bit select is only available for SALU. in insertSelect()
3324 // TODO: Split 96-bit into 64-bit and 32-bit, not 3x 32-bit. in insertSelect()
3340 I = MIB->getIterator(); in insertSelect()
3362 preserveCondRegFlags(Select->getOperand(3), Cond[1]); in insertSelect()
3407 if (!MRI->hasOneNonDBGUse(Reg)) in foldImmediate()
3426 if (!ImmOp->isImm()) in foldImmediate()
3429 auto getImmFor = [ImmOp](const MachineOperand &UseOp) -> int64_t { in foldImmediate()
3430 int64_t Imm = ImmOp->getImm(); in foldImmediate()
3489 !RI.getRegClass(NewMCID.operands()[0].RegClass)->contains(DstReg)) in foldImmediate()
3494 UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); in foldImmediate()
3527 if ((Src0->isReg() && Src0->getReg() == Reg) || in foldImmediate()
3528 (Src1->isReg() && Src1->getReg() == Reg)) { in foldImmediate()
3530 Src1->isReg() && Src1->getReg() == Reg ? Src0 : Src1; in foldImmediate()
3531 if (!RegSrc->isReg()) in foldImmediate()
3533 if (RI.isSGPRClass(MRI->getRegClass(RegSrc->getReg())) && in foldImmediate()
3537 if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) in foldImmediate()
3548 MachineInstr *Def = MRI->getUniqueVRegDef(Src2->getReg()); in foldImmediate()
3549 if (Def && Def->isMoveImmediate() && in foldImmediate()
3550 !isInlineConstant(Def->getOperand(1))) in foldImmediate()
3558 if (pseudoToMCOpcode(NewOpc) == -1) in foldImmediate()
3572 Register SrcReg = RegSrc->getReg(); in foldImmediate()
3573 unsigned SrcSubReg = RegSrc->getSubReg(); in foldImmediate()
3574 Src0->setReg(SrcReg); in foldImmediate()
3575 Src0->setSubReg(SrcSubReg); in foldImmediate()
3576 Src0->setIsKill(RegSrc->isKill()); in foldImmediate()
3584 Src1->ChangeToImmediate(Imm); in foldImmediate()
3589 bool DeleteDef = MRI->use_nodbg_empty(Reg); in foldImmediate()
3597 if (Src2->isReg() && Src2->getReg() == Reg) { in foldImmediate()
3602 if (Src0->isReg()) { in foldImmediate()
3606 MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg()); in foldImmediate()
3607 if (Def && Def->isMoveImmediate() && in foldImmediate()
3608 isInlineConstant(Def->getOperand(1)) && in foldImmediate()
3609 MRI->hasOneUse(Src0->getReg())) { in foldImmediate()
3610 Src0->ChangeToImmediate(Def->getOperand(1).getImm()); in foldImmediate()
3613 RI.isSGPRReg(*MRI, Src0->getReg())) { in foldImmediate()
3616 // VGPR is okay as Src0 - fallthrough in foldImmediate()
3619 if (Src1->isReg() && !Src0Inlined) { in foldImmediate()
3620 // We have one slot for inlinable constant so far - try to fill it in foldImmediate()
3621 MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg()); in foldImmediate()
3622 if (Def && Def->isMoveImmediate() && in foldImmediate()
3623 isInlineConstant(Def->getOperand(1)) && in foldImmediate()
3624 MRI->hasOneUse(Src1->getReg()) && commuteInstruction(UseMI)) in foldImmediate()
3625 Src0->ChangeToImmediate(Def->getOperand(1).getImm()); in foldImmediate()
3626 else if (RI.isSGPRReg(*MRI, Src1->getReg())) in foldImmediate()
3628 // VGPR is okay as Src1 - fallthrough in foldImmediate()
3637 if (pseudoToMCOpcode(NewOpc) == -1) in foldImmediate()
3656 Src2->ChangeToImmediate(getImmFor(*Src2)); in foldImmediate()
3666 bool DeleteDef = MRI->use_nodbg_empty(Reg); in foldImmediate()
3683 if (!BaseOps1[I]->isIdenticalTo(*BaseOps2[I])) in memOpsHaveSameBaseOperands()
3717 LocationSize Width0 = MIa.memoperands().front()->getSize(); in checkInstOffsetsDoNotOverlap()
3718 LocationSize Width1 = MIb.memoperands().front()->getSize(); in checkInstOffsetsDoNotOverlap()
3732 // XXX - Can we relax this between address spaces? in areMemAccessesTriviallyDisjoint()
3791 if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) { in getFoldableImm()
3792 Imm = Def->getOperand(1).getImm(); in getFoldableImm()
3802 if (!MO->isReg()) in getFoldableImm()
3804 const MachineFunction *MF = MO->getParent()->getParent()->getParent(); in getFoldableImm()
3805 const MachineRegisterInfo &MRI = MF->getRegInfo(); in getFoldableImm()
3806 return getFoldableImm(MO->getReg(), MRI, Imm, DefMI); in getFoldableImm()
3816 LV->replaceKillInstruction(Op.getReg(), MI, NewMI); in updateLiveVariables()
3829 if (NewMFMAOpc != -1) { in convertToThreeAddress()
3836 LIS->ReplaceMachineInstrInMaps(MI, *MIB); in convertToThreeAddress()
3837 // SlotIndex of defs needs to be updated when converting to early-clobber in convertToThreeAddress()
3838 MachineOperand &Def = MIB->getOperand(0); in convertToThreeAddress()
3840 LIS->hasInterval(Def.getReg())) { in convertToThreeAddress()
3841 SlotIndex OldIndex = LIS->getInstructionIndex(*MIB).getRegSlot(false); in convertToThreeAddress()
3842 SlotIndex NewIndex = LIS->getInstructionIndex(*MIB).getRegSlot(true); in convertToThreeAddress()
3843 auto &LI = LIS->getInterval(Def.getReg()); in convertToThreeAddress()
3846 if (S != LR.end() && S->start == OldIndex) { in convertToThreeAddress()
3847 assert(S->valno && S->valno->def == OldIndex); in convertToThreeAddress()
3848 S->start = NewIndex; in convertToThreeAddress()
3849 S->valno->def = NewIndex; in convertToThreeAddress()
3865 MIB->addOperand(MI.getOperand(I)); in convertToThreeAddress()
3869 LIS->ReplaceMachineInstrInMaps(MI, *MIB); in convertToThreeAddress()
3876 "pre-RA"); in convertToThreeAddress()
3917 if (!Src0->isReg() && !Src0->isImm()) in convertToThreeAddress()
3920 if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0)) in convertToThreeAddress()
3945 (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || in convertToThreeAddress()
3946 !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { in convertToThreeAddress()
3948 const auto killDef = [&]() -> void { in convertToThreeAddress()
3949 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in convertToThreeAddress()
3951 Register DefReg = DefMI->getOperand(0).getReg(); in convertToThreeAddress()
3955 DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); in convertToThreeAddress()
3956 for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) in convertToThreeAddress()
3957 DefMI->removeOperand(I); in convertToThreeAddress()
3959 LV->getVarInfo(DefReg).AliveBlocks.clear(); in convertToThreeAddress()
3969 if (pseudoToMCOpcode(NewOpc) != -1) { in convertToThreeAddress()
3978 LIS->ReplaceMachineInstrInMaps(MI, *MIB); in convertToThreeAddress()
3989 if (pseudoToMCOpcode(NewOpc) != -1) { in convertToThreeAddress()
3998 LIS->ReplaceMachineInstrInMaps(MI, *MIB); in convertToThreeAddress()
4005 Imm = Src0->getImm(); in convertToThreeAddress()
4008 if (pseudoToMCOpcode(NewOpc) != -1 && in convertToThreeAddress()
4020 LIS->ReplaceMachineInstrInMaps(MI, *MIB); in convertToThreeAddress()
4041 if (pseudoToMCOpcode(NewOpc) == -1) in convertToThreeAddress()
4046 .addImm(Src0Mods ? Src0Mods->getImm() : 0) in convertToThreeAddress()
4048 .addImm(Src1Mods ? Src1Mods->getImm() : 0) in convertToThreeAddress()
4050 .addImm(Src2Mods ? Src2Mods->getImm() : 0) in convertToThreeAddress()
4052 .addImm(Clamp ? Clamp->getImm() : 0) in convertToThreeAddress()
4053 .addImm(Omod ? Omod->getImm() : 0) in convertToThreeAddress()
4056 MIB.addImm(OpSel ? OpSel->getImm() : 0); in convertToThreeAddress()
4059 LIS->ReplaceMachineInstrInMaps(MI, *MIB); in convertToThreeAddress()
4065 // XXX - Why isn't hasSideEffects sufficient for these?
4098 // Target-independent instructions do not have an implicit-use of EXEC, even in isSchedulingBoundary()
4170 // This won't read exec if this is an SGPR->SGPR copy. in mayReadEXEC()
4236 // records a 64-bit value. We need to know the size to determine if a 32-bit in isInlineConstant()
4238 // would be for any 32-bit integer operand, but would not be for a 64-bit one. in isInlineConstant()
4268 // distinction. However, in the case of 16-bit integer operations, the in isInlineConstant()
4269 // "floating point" values appear to not work. It seems read the low 16-bits in isInlineConstant()
4270 // of 32-bit immediates, which happens to always work for the integer in isInlineConstant()
4275 // TODO: Theoretically we could use op-sel to use the high bits of the in isInlineConstant()
4276 // 32-bit FP values. in isInlineConstant()
4295 // A few special case instructions have 16-bit operands on subtargets in isInlineConstant()
4296 // where 16-bit instructions are not legal. in isInlineConstant()
4297 // TODO: Do the 32-bit immediates work? We shouldn't really need to handle in isInlineConstant()
4391 if (Op32 == -1) in hasVALU32BitEncoding()
4394 return pseudoToMCOpcode(Op32) != -1; in hasVALU32BitEncoding()
4407 return Mods && Mods->getImm(); in hasModifiersSet()
4428 if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg())) in canShrink()
4441 if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) || in canShrink()
4452 if (Src1 && (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()) || in canShrink()
4495 // Add the dst operand if the 32-bit encoding also has an explicit $vdst. in buildShrunkInst()
4499 // shrunk opcode loses the last def (SGPR def, in the VOP3->VOPC case). in buildShrunkInst()
4512 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) { in buildShrunkInst()
4618 const MachineFunction *MF = MI.getParent()->getParent(); in verifyInstruction()
4619 const MachineRegisterInfo &MRI = MF->getRegInfo(); in verifyInstruction()
4624 int Src3Idx = -1; in verifyInstruction()
4625 if (Src0Idx == -1) { in verifyInstruction()
4654 if (!Reg.isVirtual() && !RC->contains(Reg)) { in verifyInstruction()
4720 ErrInfo = "Expected immediate, but got non-immediate"; in verifyInstruction()
4755 if (RegClass != -1) { in verifyInstruction()
4760 if (!RC->contains(Reg)) { in verifyInstruction()
4777 if (OpIdx == -1) in verifyInstruction()
4801 (!OMod->isImm() || OMod->getImm() != 0)) { in verifyInstruction()
4813 unsigned Mods = Src0ModsMO->getImm(); in verifyInstruction()
4823 if (!ST.hasSDWASdst() && DstIdx != -1) { in verifyInstruction()
4833 if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) { in verifyInstruction()
4840 if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) { in verifyInstruction()
4848 if (DstUnused && DstUnused->isImm() && in verifyInstruction()
4849 DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) { in verifyInstruction()
4876 uint64_t DMaskImm = DMask->getImm(); in verifyInstruction()
4884 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem()) in verifyInstruction()
4888 if ((LWE && LWE->getImm()) || (TFE && TFE->getImm())) in verifyInstruction()
4913 if (ImmIdx != -1) { in verifyInstruction()
4923 // bus, and we don't want to check pseudo-operands like the source modifier in verifyInstruction()
4926 if (OpIdx == -1) in verifyInstruction()
4962 // vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const in verifyInstruction()
4975 // Special case for writelane - this can break the multiple constant bus rule, in verifyInstruction()
4982 if (OpIdx == -1) in verifyInstruction()
5014 if ((getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() & in verifyInstruction()
5016 (getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm() & in verifyInstruction()
5018 (getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() & in verifyInstruction()
5041 if (!Op->isMBB()) { in verifyInstruction()
5046 uint64_t Imm = Op->getImm(); in verifyInstruction()
5073 // RA scheduler where the main implicit operand is killed and implicit-defs in verifyInstruction()
5074 // are added for sub-registers that remain live after this instruction. in verifyInstruction()
5082 if (!Dst->isUse()) { in verifyInstruction()
5097 = MI.getOperand(StaticNumOps + NumImplicitOps - 1); in verifyInstruction()
5120 if (Soff && Soff->getReg() != AMDGPU::M0) { in verifyInstruction()
5129 if (Offset->getImm() != 0) { in verifyInstruction()
5137 if (GDSOp && GDSOp->getImm() != 0) { in verifyInstruction()
5153 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); in verifyInstruction()
5155 AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm()); in verifyInstruction()
5165 IsA16 = R128A16->getImm() != 0; in verifyInstruction()
5168 IsA16 = A16->getImm() != 0; in verifyInstruction()
5171 bool IsNSA = RsrcIdx - VAddr0Idx > 1; in verifyInstruction()
5178 VAddrWords = RsrcIdx - VAddr0Idx; in verifyInstruction()
5181 unsigned LastVAddrIdx = RsrcIdx - 1; in verifyInstruction()
5182 VAddrWords += getOpSize(MI, LastVAddrIdx) / 4 - 1; in verifyInstruction()
5203 unsigned DC = DppCt->getImm(); in verifyInstruction()
5257 if (Data && !Data->isReg()) in verifyInstruction()
5262 (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI, Data->getReg()))) { in verifyInstruction()
5268 (RI.isAGPR(MRI, Data->getReg()) != RI.isAGPR(MRI, Data2->getReg()))) { in verifyInstruction()
5274 if ((Dst && RI.isAGPR(MRI, Dst->getReg())) || in verifyInstruction()
5275 (Data && RI.isAGPR(MRI, Data->getReg())) || in verifyInstruction()
5276 (Data2 && RI.isAGPR(MRI, Data2->getReg()))) { in verifyInstruction()
5285 const auto isAlignedReg = [&MI, &MRI, this](unsigned OpName) -> bool { in verifyInstruction()
5289 Register Reg = Op->getReg(); in verifyInstruction()
5294 !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); in verifyInstruction()
5320 if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) { in verifyInstruction()
5339 // clang-format off
5353 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); in getVALUOp()
5500 // clang-format on
5517 auto StoreExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg) in insertScratchExecCopy()
5519 auto FlipExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1); in insertScratchExecCopy()
5521 Indexes->insertMachineInstrInMaps(*StoreExecMI); in insertScratchExecCopy()
5522 Indexes->insertMachineInstrInMaps(*FlipExecMI); in insertScratchExecCopy()
5528 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), Reg).addImm(-1); in insertScratchExecCopy()
5529 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. in insertScratchExecCopy()
5531 Indexes->insertMachineInstrInMaps(*SaveExec); in insertScratchExecCopy()
5544 Indexes->insertMachineInstrInMaps(*ExecRestoreMI); in restoreExec()
5597 // The check is limited to FLAT and DS because atomics in non-flat encoding in getRegClass()
5604 if (DataIdx != -1) { in getRegClass()
5605 IsAllocatable = VDstIdx != -1 || AMDGPU::hasNamedOperand( in getRegClass()
5615 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); in getOpRegClass()
5618 Desc.operands()[OpNo].RegClass == -1) { in getOpRegClass()
5634 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); in legalizeOpWithMove()
5646 DebugLoc DL = MBB->findDebugLoc(I); in legalizeOpWithMove()
5655 MachineBasicBlock *MBB = MI->getParent(); in buildExtractSubReg()
5656 DebugLoc DL = MI->getDebugLoc(); in buildExtractSubReg()
5666 MachineBasicBlock::iterator MII, MachineRegisterInfo &MRI, in buildExtractSubRegOrImm() argument
5678 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, in buildExtractSubRegOrImm()
5701 return DRC->contains(Reg); in isLegalRegOperand()
5706 const MachineFunction *MF = MO.getParent()->getParent()->getParent(); in isLegalRegOperand()
5715 return RC->hasSuperClassEq(DRC); in isLegalRegOperand()
5724 // Handle non-register types that are treated like immediates. in isLegalVSrcOperand()
5731 const MachineFunction &MF = *MI.getParent()->getParent(); in isOperandLegal()
5736 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; in isOperandLegal()
5743 if (!MO->isReg() && !isInlineConstant(*MO, OpInfo) && !LiteralLimit--) in isOperandLegal()
5747 if (MO->isReg()) in isOperandLegal()
5748 SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg())); in isOperandLegal()
5759 if (--ConstantBusLimit <= 0) in isOperandLegal()
5765 if (!LiteralLimit--) in isOperandLegal()
5767 if (--ConstantBusLimit <= 0) in isOperandLegal()
5771 } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() && in isOperandLegal()
5777 if (MO->isReg()) { in isOperandLegal()
5782 bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); in isOperandLegal()
5794 if ((int)OpIdx == VDstIdx && DataIdx != -1 && in isOperandLegal()
5799 if (VDstIdx != -1 && in isOperandLegal()
5805 if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() && in isOperandLegal()
5811 RI.isSGPRReg(MRI, MO->getReg())) in isOperandLegal()
5816 if (MO->isImm()) { in isOperandLegal()
5817 uint64_t Imm = MO->getImm(); in isOperandLegal()
5828 // FIXME: We can use sign extended 64-bit literals, but only for signed in isOperandLegal()
5837 // Handle non-register types that are treated like immediates. in isOperandLegal()
5838 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal()); in isOperandLegal()
5867 // both the value to write (src0) and lane select (src1). Fix up non-SGPR in legalizeOperandsVOP2()
5931 // TODO: Other immediate-like operand kinds could be commuted if there was a in legalizeOperandsVOP2()
5940 if (CommutedOpc == -1) { in legalizeOperandsVOP2()
6003 --ConstantBusLimit; in legalizeOperandsVOP3()
6007 if (Idx == -1) in legalizeOperandsVOP3()
6016 --LiteralLimit; in legalizeOperandsVOP3()
6017 --ConstantBusLimit; in legalizeOperandsVOP3()
6021 --LiteralLimit; in legalizeOperandsVOP3()
6022 --ConstantBusLimit; in legalizeOperandsVOP3()
6042 --ConstantBusLimit; in legalizeOperandsVOP3()
6107 if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { in legalizeOperandsSMRD()
6108 Register SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); in legalizeOperandsSMRD()
6109 SBase->setReg(SGPR); in legalizeOperandsSMRD()
6112 if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) { in legalizeOperandsSMRD()
6113 Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI); in legalizeOperandsSMRD()
6114 SOff->setReg(SGPR); in legalizeOperandsSMRD()
6132 MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); in moveFlatAddrToVGPR()
6148 if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 || in moveFlatAddrToVGPR()
6149 !VAddrDef->getOperand(1).isImm() || in moveFlatAddrToVGPR()
6150 VAddrDef->getOperand(1).getImm() != 0) in moveFlatAddrToVGPR()
6178 if (NewVDstIn != -1) { in moveFlatAddrToVGPR()
6185 if (NewVDstIn != -1) { in moveFlatAddrToVGPR()
6192 if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg())) in moveFlatAddrToVGPR()
6193 VAddrDef->eraseFromParent(); in moveFlatAddrToVGPR()
6207 if (!SAddr || RI.isSGPRClass(MRI.getRegClass(SAddr->getReg()))) in legalizeOperandsFLAT()
6213 Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI); in legalizeOperandsFLAT()
6214 SAddr->setReg(ToSGPR); in legalizeOperandsFLAT()
6244 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) in legalizeGenericOperand()
6247 bool ImpDef = Def->isImplicitDef(); in legalizeGenericOperand()
6248 while (!ImpDef && Def && Def->isCopy()) { in legalizeGenericOperand()
6249 if (Def->getOperand(1).getReg().isPhysical()) in legalizeGenericOperand()
6251 Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); in legalizeGenericOperand()
6252 ImpDef = Def && Def->isImplicitDef(); in legalizeGenericOperand()
6254 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && in legalizeGenericOperand()
6276 const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); in emitLoadScalarOpsFromVGPRLoop()
6284 unsigned RegSize = TRI->getRegSizeInBits(ScalarOp->getReg(), MRI); in emitLoadScalarOpsFromVGPRLoop()
6286 Register VScalarOp = ScalarOp->getReg(); in emitLoadScalarOpsFromVGPRLoop()
6312 ScalarOp->setReg(CurReg); in emitLoadScalarOpsFromVGPRLoop()
6313 ScalarOp->setIsKill(); in emitLoadScalarOpsFromVGPRLoop()
6315 unsigned VScalarOpUndef = getUndefRegState(ScalarOp->isUndef()); in emitLoadScalarOpsFromVGPRLoop()
6323 // Read the next variant <- also loop target. in emitLoadScalarOpsFromVGPRLoop()
6325 .addReg(VScalarOp, VScalarOpUndef, TRI->getSubRegFromChannel(Idx)); in emitLoadScalarOpsFromVGPRLoop()
6327 // Read the next variant <- also loop target. in emitLoadScalarOpsFromVGPRLoop()
6330 TRI->getSubRegFromChannel(Idx + 1)); in emitLoadScalarOpsFromVGPRLoop()
6335 // Comparison is to be done as 64-bit. in emitLoadScalarOpsFromVGPRLoop()
6351 TRI->getSubRegFromChannel(Idx, 2)); in emitLoadScalarOpsFromVGPRLoop()
6366 TRI->getEquivalentSGPRClass(MRI.getRegClass(VScalarOp)); in emitLoadScalarOpsFromVGPRLoop()
6374 Merge.addReg(Piece).addImm(TRI->getSubRegFromChannel(Channel++)); in emitLoadScalarOpsFromVGPRLoop()
6378 ScalarOp->setReg(SScalarOp); in emitLoadScalarOpsFromVGPRLoop()
6379 ScalarOp->setIsKill(); in emitLoadScalarOpsFromVGPRLoop()
6424 const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); in loadMBUFScalarOperandsFromVGPR()
6448 // incorrect due to the added control-flow. in loadMBUFScalarOperandsFromVGPR()
6452 for (auto &MO : I->all_uses()) in loadMBUFScalarOperandsFromVGPR()
6468 LoopBB->addSuccessor(BodyBB); in loadMBUFScalarOperandsFromVGPR()
6469 BodyBB->addSuccessor(LoopBB); in loadMBUFScalarOperandsFromVGPR()
6470 BodyBB->addSuccessor(RemainderBB); in loadMBUFScalarOperandsFromVGPR()
6474 RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); in loadMBUFScalarOperandsFromVGPR()
6475 RemainderBB->splice(RemainderBB->begin(), &MBB, End, MBB.end()); in loadMBUFScalarOperandsFromVGPR()
6476 BodyBB->splice(BodyBB->begin(), &MBB, Begin, MBB.end()); in loadMBUFScalarOperandsFromVGPR()
6485 MDT->addNewBlock(LoopBB, &MBB); in loadMBUFScalarOperandsFromVGPR()
6486 MDT->addNewBlock(BodyBB, LoopBB); in loadMBUFScalarOperandsFromVGPR()
6487 MDT->addNewBlock(RemainderBB, BodyBB); in loadMBUFScalarOperandsFromVGPR()
6488 for (auto &Succ : RemainderBB->successors()) { in loadMBUFScalarOperandsFromVGPR()
6489 if (MDT->properlyDominates(&MBB, Succ)) { in loadMBUFScalarOperandsFromVGPR()
6490 MDT->changeImmediateDominator(Succ, RemainderBB); in loadMBUFScalarOperandsFromVGPR()
6497 MachineBasicBlock::iterator First = RemainderBB->begin(); in loadMBUFScalarOperandsFromVGPR()
6510 // Extract pointer from Rsrc and return a zero-value Rsrc replacement.
6533 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} in extractRsrcPtr()
6537 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} in extractRsrcPtr()
6556 MachineFunction &MF = *MI.getParent()->getParent(); in legalizeOperands()
6602 // otherwise we will create illegal VGPR->SGPR copies when legalizing in legalizeOperands()
6631 MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); in legalizeOperands()
6633 // Avoid creating no-op copies with the same src and dst reg class. These in legalizeOperands()
6714 if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) in legalizeOperands()
6719 if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) in legalizeOperands()
6728 if (!RI.isSGPRClass(MRI.getRegClass(Dest->getReg()))) { in legalizeOperands()
6738 while (Start->getOpcode() != FrameSetupOpcode) in legalizeOperands()
6739 --Start; in legalizeOperands()
6741 while (End->getOpcode() != FrameDestroyOpcode) in legalizeOperands()
6745 while (End != MBB.end() && End->isCopy() && End->getOperand(1).isReg() && in legalizeOperands()
6746 MI.definesRegister(End->getOperand(1).getReg(), /*TRI=*/nullptr)) in legalizeOperands()
6770 if (SoffsetIdx != -1) { in legalizeOperands()
6772 if (Soffset->isReg() && Soffset->getReg().isVirtual() && in legalizeOperands()
6773 !RI.isSGPRClass(MRI.getRegClass(Soffset->getReg()))) { in legalizeOperands()
6781 if (RsrcIdx != -1) { in legalizeOperands()
6783 if (Rsrc->isReg() && !RI.isSGPRClass(MRI.getRegClass(Rsrc->getReg()))) { in legalizeOperands()
6797 // a zero-value SRsrc. in legalizeOperands()
6803 // Otherwise we are on non-ADDR64 hardware, and/or we have in legalizeOperands()
6810 if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) { in legalizeOperands()
6829 .addReg(VAddr->getReg(), 0, AMDGPU::sub0) in legalizeOperands()
6836 .addReg(VAddr->getReg(), 0, AMDGPU::sub1) in legalizeOperands()
6847 VAddr->setReg(NewVAddr); in legalizeOperands()
6848 Rsrc->setReg(NewSRsrc); in legalizeOperands()
6881 MIB.addImm(CPol->getImm()); in legalizeOperands()
6886 MIB.addImm(TFE->getImm()); in legalizeOperands()
6909 BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), in legalizeOperands()
6941 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); in insert()
6942 if (RsrcIdx != -1) { in insert()
6968 "Deferred MachineInstr are not supposed to re-populate worklist"); in moveToVALU()
6979 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); in moveToVALUImpl()
7006 // Split s_mul_u64 in 32-bit vector multiplications. in moveToVALUImpl()
7243 addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI, in moveToVALUImpl()
7366 // Handle converting generic instructions like COPY-to-SGPR into in moveToVALUImpl()
7367 // COPY-to-VGPR. in moveToVALUImpl()
7396 // Make sure we don't leave around a dead VGPR->SGPR copy. Normally in moveToVALUImpl()
7397 // these are deleted later, but at -O0 it would leave a suspicious in moveToVALUImpl()
7399 for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) in moveToVALUImpl()
7416 NewInstr->addOperand(Inst.getOperand(0)); in moveToVALUImpl()
7426 NewInstr->addOperand(Src); in moveToVALUImpl()
7456 NewInstr->addOperand(Inst.getOperand(2)); in moveToVALUImpl()
7461 NewInstr->addOperand(Inst.getOperand(3)); in moveToVALUImpl()
7472 NewInstr->addOperand(Op); in moveToVALUImpl()
7480 // Only propagate through live-def of SCC. in moveToVALUImpl()
7489 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) { in moveToVALUImpl()
7490 Register DstReg = NewInstr->getOperand(0).getReg(); in moveToVALUImpl()
7515 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in moveScalarAddSub()
7546 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in lowerSelect()
7547 MachineBasicBlock::iterator MII = Inst; in lowerSelect() local
7562 if (!IsSCC && Src0.isImm() && (Src0.getImm() == -1) && Src1.isImm() && in lowerSelect()
7579 Inst.getParent()->rend())) { in lowerSelect()
7581 -1) { in lowerSelect()
7583 BuildMI(MBB, MII, DL, get(AMDGPU::COPY), NewCondReg) in lowerSelect()
7598 BuildMI(MBB, MII, DL, get(Opcode), NewCondReg).addImm(-1).addImm(0); in lowerSelect()
7599 NewSelect->getOperand(3).setIsUndef(Cond.isUndef()); in lowerSelect()
7607 NewInst = BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg) in lowerSelect()
7615 BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B64_PSEUDO), NewDestReg) in lowerSelect()
7628 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in lowerScalarAbs()
7629 MachineBasicBlock::iterator MII = Inst; in lowerScalarAbs() local
7640 BuildMI(MBB, MII, DL, get(SubOp), TmpReg) in lowerScalarAbs()
7644 BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) in lowerScalarAbs()
7655 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in lowerScalarXnor()
7656 MachineBasicBlock::iterator MII = Inst; in lowerScalarXnor() local
7665 legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); in lowerScalarXnor()
7666 legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); in lowerScalarXnor()
7668 BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest) in lowerScalarXnor()
7691 BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src0); in lowerScalarXnor()
7692 Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest) in lowerScalarXnor()
7696 BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src1); in lowerScalarXnor()
7697 Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest) in lowerScalarXnor()
7701 Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp) in lowerScalarXnor()
7705 BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest).addReg(Temp); in lowerScalarXnor()
7721 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalarNotBinop()
7722 MachineBasicBlock::iterator MII = Inst; in splitScalarNotBinop() local
7732 MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm) in splitScalarNotBinop()
7736 MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest) in splitScalarNotBinop()
7750 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalarBinOpN2()
7751 MachineBasicBlock::iterator MII = Inst; in splitScalarBinOpN2() local
7761 MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm) in splitScalarBinOpN2()
7764 MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest) in splitScalarBinOpN2()
7779 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalar64BitUnaryOp()
7785 MachineBasicBlock::iterator MII = Inst; in splitScalar64BitUnaryOp() local
7795 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, in splitScalar64BitUnaryOp()
7804 MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); in splitScalar64BitUnaryOp()
7806 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, in splitScalar64BitUnaryOp()
7810 MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1); in splitScalar64BitUnaryOp()
7816 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) in splitScalar64BitUnaryOp()
7835 // split the s_mul_u64 in 32-bit vector multiplications.
7840 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalarSMulU64()
7850 MachineBasicBlock::iterator MII = Inst; in splitScalarSMulU64() local
7863 // First, we extract the low 32-bit and high 32-bit values from each of the in splitScalarSMulU64()
7866 buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); in splitScalarSMulU64()
7868 buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); in splitScalarSMulU64()
7870 buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); in splitScalarSMulU64()
7872 buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); in splitScalarSMulU64()
7878 // -------------------- in splitScalarSMulU64()
7881 // ----------------------------------------- in splitScalarSMulU64()
7884 // We drop Op1H*Op0H because the result of the multiplication is a 64-bit in splitScalarSMulU64()
7886 // The low 32-bit value is Op1L*Op0L. in splitScalarSMulU64()
7887 // The high 32-bit value is Op1H*Op0L + Op1L*Op0H + carry (from Op1L*Op0L). in splitScalarSMulU64()
7891 BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_LO_U32_e64), Op1L_Op0H_Reg) in splitScalarSMulU64()
7897 BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_LO_U32_e64), Op1H_Op0L_Reg) in splitScalarSMulU64()
7903 BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_HI_U32_e64), CarryReg) in splitScalarSMulU64()
7908 BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_LO_U32_e64), DestSub0) in splitScalarSMulU64()
7913 MachineInstr *Add = BuildMI(MBB, MII, DL, get(AMDGPU::V_ADD_U32_e32), AddReg) in splitScalarSMulU64()
7918 BuildMI(MBB, MII, DL, get(AMDGPU::V_ADD_U32_e32), DestSub1) in splitScalarSMulU64()
7922 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) in splitScalarSMulU64()
7943 // Lower S_MUL_U64_U32_PSEUDO/S_MUL_I64_I32_PSEUDO in two 32-bit vector
7949 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalarSMulPseudo()
7959 MachineBasicBlock::iterator MII = Inst; in splitScalarSMulPseudo() local
7972 // First, we extract the low 32-bit and high 32-bit values from each of the in splitScalarSMulPseudo()
7975 buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); in splitScalarSMulPseudo()
7977 buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); in splitScalarSMulPseudo()
7984 BuildMI(MBB, MII, DL, get(NewOpc), DestSub1).add(Op1L).add(Op0L); in splitScalarSMulPseudo()
7987 BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_LO_U32_e64), DestSub0) in splitScalarSMulPseudo()
7991 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) in splitScalarSMulPseudo()
8012 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalar64BitBinaryOp()
8019 MachineBasicBlock::iterator MII = Inst; in splitScalar64BitBinaryOp() local
8035 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, in splitScalar64BitBinaryOp()
8037 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, in splitScalar64BitBinaryOp()
8039 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, in splitScalar64BitBinaryOp()
8041 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, in splitScalar64BitBinaryOp()
8050 MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) in splitScalar64BitBinaryOp()
8055 MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) in splitScalar64BitBinaryOp()
8060 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) in splitScalar64BitBinaryOp()
8079 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalar64BitXnor()
8086 MachineBasicBlock::iterator MII = Inst; in splitScalar64BitXnor() local
8103 BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm) in splitScalar64BitXnor()
8108 MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest) in splitScalar64BitXnor()
8120 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalar64BitBCNT()
8122 MachineBasicBlock::iterator MII = Inst; in splitScalar64BitBCNT() local
8139 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, in splitScalar64BitBCNT()
8141 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, in splitScalar64BitBCNT()
8144 BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0); in splitScalar64BitBCNT()
8146 BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg); in splitScalar64BitBCNT()
8158 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalar64BitBFE()
8159 MachineBasicBlock::iterator MII = Inst; in splitScalar64BitBFE() local
8178 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32_e64), MidRegLo) in splitScalar64BitBFE()
8183 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) in splitScalar64BitBFE()
8187 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) in splitScalar64BitBFE()
8202 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) in splitScalar64BitBFE()
8206 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) in splitScalar64BitBFE()
8219 // (S_FLBIT_I32_B64 hi:lo) -> in splitScalar64BitCountOp()
8220 // -> (umin (V_FFBH_U32_e32 hi), (uaddsat (V_FFBH_U32_e32 lo), 32)) in splitScalar64BitCountOp()
8221 // (S_FF1_I32_B64 hi:lo) -> in splitScalar64BitCountOp()
8222 // ->(umin (uaddsat (V_FFBL_B32_e32 hi), 32) (V_FFBL_B32_e32 lo)) in splitScalar64BitCountOp()
8225 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in splitScalar64BitCountOp()
8226 MachineBasicBlock::iterator MII = Inst; in splitScalar64BitCountOp() local
8244 buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC); in splitScalar64BitCountOp()
8246 buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC); in splitScalar64BitCountOp()
8253 BuildMI(MBB, MII, DL, InstDesc, MidReg1).add(SrcRegSub0); in splitScalar64BitCountOp()
8255 BuildMI(MBB, MII, DL, InstDesc, MidReg2).add(SrcRegSub1); in splitScalar64BitCountOp()
8257 BuildMI(MBB, MII, DL, get(OpcodeAdd), MidReg3) in splitScalar64BitCountOp()
8262 BuildMI(MBB, MII, DL, get(AMDGPU::V_MIN_U32_e64), MidReg4) in splitScalar64BitCountOp()
8276 MachineInstr &UseMI = *I->getParent(); in addUsersToMoveToVALUWorklist()
8300 } while (I != E && I->getParent() == &UseMI); in addUsersToMoveToVALUWorklist()
8393 SCCDefInst.getParent()->end())) { in addSCCDefUsersToVALUWorklist()
8396 if (SCCIdx != -1) { in addSCCDefUsersToVALUWorklist()
8398 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); in addSCCDefUsersToVALUWorklist()
8412 if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI, false, false) != -1) in addSCCDefUsersToVALUWorklist()
8416 Copy->eraseFromParent(); in addSCCDefUsersToVALUWorklist()
8432 SCCUseInst->getParent()->rend())) { in addSCCDefsToVALUWorklist()
8510 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); in findUsedSGPR()
8514 if (Idx == -1) in findUsedSGPR()
8543 // V_FMA_F32 v0, s0, s0, s0 -> No moves in findUsedSGPR()
8544 // V_FMA_F32 v0, s0, s1, s0 -> Move s1 in findUsedSGPR()
8546 // TODO: If some of the operands are 64-bit SGPRs and some 32, we should in findUsedSGPR()
8565 if (Idx == -1) in getNamedOperand()
8603 uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize(true)) - 1; in getScratchRsrcWords23()
8634 if (!Addr || !Addr->isFI()) in isStackAccess()
8638 (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); in isStackAccess()
8640 FrameIndex = Addr->getIndex(); in isStackAccess()
8641 return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); in isStackAccess()
8647 assert(Addr && Addr->isFI()); in isSGPRStackAccess()
8648 FrameIndex = Addr->getIndex(); in isSGPRStackAccess()
8649 return getNamedOperand(MI, AMDGPU::OpName::data)->getReg(); in isSGPRStackAccess()
8683 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); in getInstBundleSize()
8684 while (++I != E && I->isInsideBundle()) { in getInstBundleSize()
8685 assert(!I->isBundle() && "No nested bundle!"); in getInstBundleSize()
8710 // Instructions may have a 32-bit literal encoded after them. Check in getInstSizeInBytes()
8734 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4); in getInstSizeInBytes()
8742 const MachineFunction *MF = MI.getParent()->getParent(); in getInstSizeInBytes()
8744 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(), &ST); in getInstSizeInBytes()
8761 if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) in mayAccessFlatAddressSpace()
8773 MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); in convertNonUniformIfRegion()
8774 assert(TI != IfEntry->end()); in convertNonUniformIfRegion()
8777 MachineFunction *MF = IfEntry->getParent(); in convertNonUniformIfRegion()
8778 MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); in convertNonUniformIfRegion()
8780 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { in convertNonUniformIfRegion()
8783 BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) in convertNonUniformIfRegion()
8784 .add(Branch->getOperand(0)) in convertNonUniformIfRegion()
8785 .add(Branch->getOperand(1)); in convertNonUniformIfRegion()
8787 BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF)) in convertNonUniformIfRegion()
8790 IfEntry->erase(TI); in convertNonUniformIfRegion()
8791 IfEntry->insert(IfEntry->end(), SIIF); in convertNonUniformIfRegion()
8792 IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); in convertNonUniformIfRegion()
8798 MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); in convertNonUniformLoopRegion()
8800 assert(TI != LoopEnd->end()); in convertNonUniformLoopRegion()
8803 MachineFunction *MF = LoopEnd->getParent(); in convertNonUniformLoopRegion()
8804 MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); in convertNonUniformLoopRegion()
8806 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { in convertNonUniformLoopRegion()
8811 BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); in convertNonUniformLoopRegion()
8812 for (MachineBasicBlock *PMBB : LoopEntry->predecessors()) { in convertNonUniformLoopRegion()
8817 materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), in convertNonUniformLoopRegion()
8824 MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), in convertNonUniformLoopRegion()
8827 .add(Branch->getOperand(0)); in convertNonUniformLoopRegion()
8829 BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP)) in convertNonUniformLoopRegion()
8833 LoopEntry->insert(LoopEntry->begin(), HeaderPhi); in convertNonUniformLoopRegion()
8834 LoopEnd->erase(TI); in convertNonUniformLoopRegion()
8835 LoopEnd->insert(LoopEnd->end(), SIIFBREAK); in convertNonUniformLoopRegion()
8836 LoopEnd->insert(LoopEnd->end(), SILOOP); in convertNonUniformLoopRegion()
8843 {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, in getSerializableTargetIndices()
8844 {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, in getSerializableTargetIndices()
8845 {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, in getSerializableTargetIndices()
8846 {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, in getSerializableTargetIndices()
8847 {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; in getSerializableTargetIndices()
8851 /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
8852 /// post-RA version of misched uses CreateTargetMIHazardRecognizer.
8856 return new GCNHazardRecognizer(DAG->MF); in CreateTargetPostRAHazardRecognizer()
8859 /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
8867 // - pre-RA scheduling and post-RA scheduling
8873 // post-RA scheduling; we can tell that we're post-RA because we don't in CreateTargetMIHazardRecognizer()
8875 if (!DAG->hasVRegLiveness()) in CreateTargetMIHazardRecognizer()
8876 return new GCNHazardRecognizer(DAG->MF); in CreateTargetMIHazardRecognizer()
8888 { MO_GOTPCREL, "amdgpu-gotprel" }, in getSerializableDirectMachineOperandTargetFlags()
8889 { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" }, in getSerializableDirectMachineOperandTargetFlags()
8890 { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" }, in getSerializableDirectMachineOperandTargetFlags()
8891 { MO_REL32_LO, "amdgpu-rel32-lo" }, in getSerializableDirectMachineOperandTargetFlags()
8892 { MO_REL32_HI, "amdgpu-rel32-hi" }, in getSerializableDirectMachineOperandTargetFlags()
8893 { MO_ABS32_LO, "amdgpu-abs32-lo" }, in getSerializableDirectMachineOperandTargetFlags()
8894 { MO_ABS32_HI, "amdgpu-abs32-hi" }, in getSerializableDirectMachineOperandTargetFlags()
8904 {MONoClobber, "amdgpu-noclobber"}, in getSerializableMachineMemOperandTargetFlags()
8905 {MOLastUse, "amdgpu-last-use"}, in getSerializableMachineMemOperandTargetFlags()
8915 if (MFI->checkFlag(SrcReg, AMDGPU::VirtRegFlag::WWM_REG)) in getLiveRangeSplitOpcode()
8931 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); in isBasicBlockPrologue()
8936 // FIXME: Copies inserted in the block prolog for live-range split should also in isBasicBlockPrologue()
8951 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in getAddNoCarry()
9008 // GFX12 field is non-negative 24-bit signed byte offset. in getMaxMUBUFImmOffset()
9011 return (1 << OffsetBits) - 1; in getMaxMUBUFImmOffset()
9033 if (Idx == -1) // e.g. s_memtime in isBufferSMRD()
9037 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); in isBufferSMRD()
9056 Overflow = Imm - MaxImm; in splitMUBUFOffset()
9060 // the corresponding register contents can be re-used. in splitMUBUFOffset()
9062 // Load values with all low-bits (except for alignment bits) set into in splitMUBUFOffset()
9071 Overflow = High - Alignment.value(); in splitMUBUFOffset()
9094 // Pre-GFX12, flat instruction offsets can only be non-negative, global and
9105 // +----------------------------+------+------+
9106 // | Address-Mode | SGPR | VGPR |
9107 // +----------------------------+------+------+
9109 // | negative, 4-aligned offset | x | ok |
9111 // +----------------------------+------+------+
9113 // | negative, 4-aligned offset | ok | ok |
9115 // +----------------------------+------+------+
9117 // | negative, 4-aligned offset | ok | ok |
9119 // +----------------------------+------+------+
9153 const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST) - 1; in splitFlatOffset()
9159 ImmField = COffsetVal - RemainderOffset; in splitFlatOffset()
9166 ImmField -= ImmField % 4; in splitFlatOffset()
9170 RemainderOffset = COffsetVal - ImmField; in splitFlatOffset()
9257 if (MFMAOp != -1) in pseudoToMCOpcode()
9263 // -1 means that Opcode is already a native instruction. in pseudoToMCOpcode()
9264 if (MCOp == -1) in pseudoToMCOpcode()
9268 uint16_t NMCOp = (uint16_t)-1; in pseudoToMCOpcode()
9271 if (NMCOp == (uint16_t)-1) in pseudoToMCOpcode()
9273 if (NMCOp == (uint16_t)-1) in pseudoToMCOpcode()
9275 if (NMCOp != (uint16_t)-1) in pseudoToMCOpcode()
9279 // (uint16_t)-1 means that Opcode is a pseudo instruction that has in pseudoToMCOpcode()
9281 if (MCOp == (uint16_t)-1) in pseudoToMCOpcode()
9282 return -1; in pseudoToMCOpcode()
9285 return -1; in pseudoToMCOpcode()
9300 for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I) in getRegSequenceSubReg()
9308 // Try to find the definition of reg:subreg in subreg-manipulation pseudos
9345 switch (MI->getOpcode()) { in getVRegSubRegDef()
9348 auto &Op1 = MI->getOperand(1); in getVRegSubRegDef()
9390 if (I->isDebugInstr()) in execMayBeModifiedBeforeUse()
9396 if (I->modifiesRegister(AMDGPU::EXEC, TRI)) in execMayBeModifiedBeforeUse()
9433 assert(I != DefBB->end()); in execMayBeModifiedBeforeAnyUse()
9435 if (I->isDebugInstr()) in execMayBeModifiedBeforeAnyUse()
9441 for (const MachineOperand &Op : I->operands()) { in execMayBeModifiedBeforeAnyUse()
9451 if (Reg == VReg && --NumUse == 0) in execMayBeModifiedBeforeAnyUse()
9453 } else if (TRI->regsOverlap(Reg, AMDGPU::EXEC)) in execMayBeModifiedBeforeAnyUse()
9465 if (!Cur->isPHI() && Cur->readsRegister(Dst, /*TRI=*/nullptr)) in createPHIDestinationCopy()
9478 (InsPt->getOpcode() == AMDGPU::SI_IF || in createPHISourceCopy()
9479 InsPt->getOpcode() == AMDGPU::SI_ELSE || in createPHISourceCopy()
9480 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) && in createPHISourceCopy()
9481 InsPt->definesRegister(Src, /*TRI=*/nullptr)) { in createPHISourceCopy()
9520 if (RC->hasSuperClassEq(&AMDGPU::SReg_32RegClass)) { in foldMemoryOperandImpl()
9524 if (RC->hasSuperClassEq(&AMDGPU::SReg_64RegClass)) { in foldMemoryOperandImpl()
9539 MachineBasicBlock::const_instr_iterator E(MI.getParent()->instr_end()); in getInstrLatency()
9541 for (++I; I != E && I->isBundledWithPred(); ++I) { in getInstrLatency()
9545 return Lat + Count - 1; in getInstrLatency()
9555 auto IID = GI->getIntrinsicID(); in getGenericInstructionUniformity()
9582 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS || in getGenericInstructionUniformity()
9583 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS; in getGenericInstructionUniformity()
9585 // At least one MMO in a non-global address space. in getGenericInstructionUniformity()
9643 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS || in getInstructionUniformity()
9644 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS; in getInstructionUniformity()
9646 // At least one MMO in a non-global address space. in getInstructionUniformity()
9653 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); in getInstructionUniformity()
9670 const RegisterBank *RegBank = RBI->getRegBank(Reg, MRI, RI); in getInstructionUniformity()
9671 if (RegBank && RegBank->getID() != AMDGPU::SGPRRegBankID) in getInstructionUniformity()
9679 // currently turned into no-op COPYs by SelectionDAG ISel and are in getInstructionUniformity()
9696 report_fatal_error("ds_ordered_count unsupported for this calling conv"); in getDSShaderTypeValue()
9778 bool IsReversible, bool IsSigned) -> bool { in optimizeCompareInstr()
9802 MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg); in optimizeCompareInstr()
9803 if (!Def || Def->getParent() != CmpInstr.getParent()) in optimizeCompareInstr()
9806 if (Def->getOpcode() != AMDGPU::S_AND_B32 && in optimizeCompareInstr()
9807 Def->getOpcode() != AMDGPU::S_AND_B64) in optimizeCompareInstr()
9811 const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool { in optimizeCompareInstr()
9812 if (MO->isImm()) in optimizeCompareInstr()
9813 Mask = MO->getImm(); in optimizeCompareInstr()
9820 MachineOperand *SrcOp = &Def->getOperand(1); in optimizeCompareInstr()
9822 SrcOp = &Def->getOperand(2); in optimizeCompareInstr()
9823 else if (isMask(&Def->getOperand(2))) in optimizeCompareInstr()
9824 SrcOp = &Def->getOperand(1); in optimizeCompareInstr()
9829 if (IsSigned && BitNo == SrcSize - 1) in optimizeCompareInstr()
9843 Register DefReg = Def->getOperand(0).getReg(); in optimizeCompareInstr()
9844 if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg)) in optimizeCompareInstr()
9847 for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator(); in optimizeCompareInstr()
9849 if (I->modifiesRegister(AMDGPU::SCC, &RI) || in optimizeCompareInstr()
9850 I->killsRegister(AMDGPU::SCC, &RI)) in optimizeCompareInstr()
9855 Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr); in optimizeCompareInstr()
9856 SccDef->setIsDead(false); in optimizeCompareInstr()
9859 if (!MRI->use_nodbg_empty(DefReg)) { in optimizeCompareInstr()
9865 MachineBasicBlock *MBB = Def->getParent(); in optimizeCompareInstr()
9872 BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc)) in optimizeCompareInstr()
9875 Def->eraseFromParent(); in optimizeCompareInstr()
9926 // Add implicit aligned super-reg to force alignment on the data operand. in enforceOperandRCAlignment()
9929 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); in enforceOperandRCAlignment()