Lines Matching +full:tri +full:- +full:state

1 //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
65 //===----------------------------------------------------------------------===//
77 #define DEBUG_TYPE "si-fix-sgpr-copies"
80 "amdgpu-enable-merge-m0",
100 // Current score state. To speedup selection V2SCopyInfos for processing
135 const SIRegisterInfo *TRI; member in __anon8637e2150111::SIFixSGPRCopies
190 const SIRegisterInfo &TRI, in getCopyRegClasses() argument
197 : TRI.getPhysRegBaseClass(SrcReg); in getCopyRegClasses()
200 // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); in getCopyRegClasses()
204 : TRI.getPhysRegBaseClass(DstReg); in getCopyRegClasses()
211 const SIRegisterInfo &TRI) { in isVGPRToSGPRCopy() argument
212 return SrcRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(DstRC) && in isVGPRToSGPRCopy()
213 TRI.hasVectorRegisters(SrcRC); in isVGPRToSGPRCopy()
218 const SIRegisterInfo &TRI) { in isSGPRToVGPRCopy() argument
219 return DstRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(SrcRC) && in isSGPRToVGPRCopy()
220 TRI.hasVectorRegisters(DstRC); in isSGPRToVGPRCopy()
224 const SIRegisterInfo *TRI, in tryChangeVGPRtoSGPRinCopy() argument
226 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); in tryChangeVGPRtoSGPRinCopy()
237 if (MO.isDef() || UseMI->getParent() != MI.getParent() || in tryChangeVGPRtoSGPRinCopy()
238 UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) in tryChangeVGPRtoSGPRinCopy()
242 if (OpIdx >= UseMI->getDesc().getNumOperands() || in tryChangeVGPRtoSGPRinCopy()
243 !TII->isOperandLegal(*UseMI, OpIdx, &Src)) in tryChangeVGPRtoSGPRinCopy()
247 MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg))); in tryChangeVGPRtoSGPRinCopy()
251 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
262 // This exposes immediate folding opportunities when materializing 64-bit
265 const SIRegisterInfo *TRI, in foldVGPRCopyIntoRegSequence() argument
271 if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) in foldVGPRCopyIntoRegSequence()
286 std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI); in foldVGPRCopyIntoRegSequence()
288 if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) in foldVGPRCopyIntoRegSequence()
291 if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII)) in foldVGPRCopyIntoRegSequence()
310 bool IsAGPR = TRI->isAGPRClass(DstRC); in foldVGPRCopyIntoRegSequence()
314 TRI->getRegClassForOperandReg(MRI, MI.getOperand(I)); in foldVGPRCopyIntoRegSequence()
315 assert(TRI->isSGPRClass(SrcRC) && in foldVGPRCopyIntoRegSequence()
317 const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); in foldVGPRCopyIntoRegSequence()
321 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), in foldVGPRCopyIntoRegSequence()
326 const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC); in foldVGPRCopyIntoRegSequence()
330 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc), in foldVGPRCopyIntoRegSequence()
348 if (Copy->getOpcode() != AMDGPU::COPY) in isSafeToFoldImmIntoCopy()
351 if (!MoveImm->isMoveImmediate()) in isSafeToFoldImmIntoCopy()
355 TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0); in isSafeToFoldImmIntoCopy()
356 if (!ImmOp->isImm()) in isSafeToFoldImmIntoCopy()
359 // FIXME: Handle copies with sub-regs. in isSafeToFoldImmIntoCopy()
360 if (Copy->getOperand(1).getSubReg()) in isSafeToFoldImmIntoCopy()
363 switch (MoveImm->getOpcode()) { in isSafeToFoldImmIntoCopy()
373 Imm = ImmOp->getImm(); in isSafeToFoldImmIntoCopy()
385 SmallVector<MachineBasicBlock *, 4> Worklist(MBB->predecessors()); in searchPredecessors()
397 Worklist.append(MBB->pred_begin(), MBB->pred_end()); in searchPredecessors()
413 const MachineBasicBlock *MBBFrom = From->getParent(); in isReachable()
414 const MachineBasicBlock *MBBTo = To->getParent(); in isReachable()
418 // other than -1. in isReachable()
423 // Return the first non-prologue instruction in the block.
426 MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); in getFirstNonPrologue()
427 while (I != MBB->end() && TII->isBasicBlockPrologue(*I)) in getFirstNonPrologue()
439 const TargetRegisterInfo *TRI, in hoistAndMergeSGPRInits() argument
464 Inits[Imm->getImm()].push_front(&MI); in hoistAndMergeSGPRInits()
480 MachineBasicBlock::iterator To) -> bool { in hoistAndMergeSGPRInits()
484 auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool { in hoistAndMergeSGPRInits()
485 const MachineBasicBlock *MBBFrom = From->getParent(); in hoistAndMergeSGPRInits()
486 const MachineBasicBlock *MBBTo = To->getParent(); in hoistAndMergeSGPRInits()
501 MDT.properlyDominates(Clobber->getParent(), MBBTo)); in hoistAndMergeSGPRInits()
515 << printMBBReference(*MI2->getParent()) << " " << *MI2); in hoistAndMergeSGPRInits()
525 << printMBBReference(*MI1->getParent()) << " " << *MI1); in hoistAndMergeSGPRInits()
532 auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(), in hoistAndMergeSGPRInits()
533 MI2->getParent()); in hoistAndMergeSGPRInits()
543 << printMBBReference(*MI1->getParent()) << " " << *MI1 in hoistAndMergeSGPRInits()
545 << printMBBReference(*MI2->getParent()) << " to " in hoistAndMergeSGPRInits()
546 << printMBBReference(*I->getParent()) << " " << *MI2); in hoistAndMergeSGPRInits()
547 I->getParent()->splice(I, MI2->getParent(), MI2); in hoistAndMergeSGPRInits()
566 (*I)->eraseFromParent(); in hoistAndMergeSGPRInits()
577 auto MBB = MI->getParent(); in hoistAndMergeSGPRInits()
582 if (!TII->isBasicBlockPrologue(*B)) in hoistAndMergeSGPRInits()
585 auto R = std::next(MI->getReverseIterator()); in hoistAndMergeSGPRInits()
589 if (R->readsRegister(Reg, TRI) || R->definesRegister(Reg, TRI) || in hoistAndMergeSGPRInits()
590 TII->isSchedulingBoundary(*R, MBB, *MBB->getParent())) in hoistAndMergeSGPRInits()
594 if (&*--R != MI) in hoistAndMergeSGPRInits()
595 MBB->splice(*R, MBB, MI); in hoistAndMergeSGPRInits()
613 TRI = ST.getRegisterInfo(); in runOnMachineFunction()
631 std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI); in runOnMachineFunction()
633 if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) { in runOnMachineFunction()
637 if (tryChangeVGPRtoSGPRinCopy(MI, TRI, TII)) in runOnMachineFunction()
644 if (!isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) in runOnMachineFunction()
656 if (TRI->isSGPRClass(TII->getOpRegClass(MI, 0))) { in runOnMachineFunction()
660 const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg()); in runOnMachineFunction()
661 if (TRI->hasVectorRegisters(SrcRC)) { in runOnMachineFunction()
663 TRI->getEquivalentSGPRClass(SrcRC); in runOnMachineFunction()
664 Register NewDst = MRI->createVirtualRegister(DestRC); in runOnMachineFunction()
669 MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I; in runOnMachineFunction()
675 PointToInsertCopy->getDebugLoc(), in runOnMachineFunction()
676 TII->get(AMDGPU::COPY), NewDst) in runOnMachineFunction()
699 // normally count as using the constant bus twice - but in this case it in runOnMachineFunction()
712 if ((Src0.isReg() && TRI->isSGPRReg(*MRI, Src0.getReg()) && in runOnMachineFunction()
714 (Src1.isReg() && TRI->isSGPRReg(*MRI, Src1.getReg()) && in runOnMachineFunction()
723 if (MO->getReg().isVirtual()) { in runOnMachineFunction()
724 MachineInstr *DefMI = MRI->getVRegDef(MO->getReg()); in runOnMachineFunction()
725 if (DefMI && TII->isFoldableCopy(*DefMI)) { in runOnMachineFunction()
726 const MachineOperand &Def = DefMI->getOperand(0); in runOnMachineFunction()
728 MO->getReg() == Def.getReg() && in runOnMachineFunction()
729 MO->getSubReg() == Def.getSubReg()) { in runOnMachineFunction()
730 const MachineOperand &Copied = DefMI->getOperand(1); in runOnMachineFunction()
732 TII->isInlineConstant(APInt(64, Copied.getImm(), true))) { in runOnMachineFunction()
733 MO->ChangeToImmediate(Copied.getImm()); in runOnMachineFunction()
746 TII->get(AMDGPU::COPY), AMDGPU::M0) in runOnMachineFunction()
762 if (MI->isCopy()) { in runOnMachineFunction()
764 std::tie(SrcRC, DstRC) = getCopyRegClasses(*MI, *TRI, *MRI); in runOnMachineFunction()
765 if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) in runOnMachineFunction()
766 tryChangeVGPRtoSGPRinCopy(*MI, TRI, TII); in runOnMachineFunction()
771 if (MI->isRegSequence()) in runOnMachineFunction()
772 foldVGPRCopyIntoRegSequence(*MI, TRI, TII, *MRI); in runOnMachineFunction()
778 hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII); in runOnMachineFunction()
801 Register Reg = Instr->getOperand(0).getReg(); in processPHINode()
802 for (const auto &Use : MRI->use_operands(Reg)) { in processPHINode()
805 AllAGPRUses &= (UseMI->isCopy() && in processPHINode()
806 TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg())) || in processPHINode()
807 TRI->isAGPR(*MRI, Use.getReg()); in processPHINode()
808 if (UseMI->isCopy() || UseMI->isRegSequence()) { in processPHINode()
818 const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes); in processPHINode()
819 if (HasUses && AllAGPRUses && !TRI->isAGPRClass(RC0)) { in processPHINode()
821 MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0)); in processPHINode()
823 MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(I).getReg()); in processPHINode()
824 if (DefMI && DefMI->isPHI()) in processPHINode()
829 if (TRI->isVectorRegister(*MRI, PHIRes) || in processPHINode()
832 TII->legalizeOperands(MI, MDT); in processPHINode()
846 MachineInstr *DefMI = MRI->getVRegDef(MaybeVGPRConstMO.getReg()); in tryMoveVGPRConstToSGPR()
847 if (!DefMI || !DefMI->isMoveImmediate()) in tryMoveVGPRConstToSGPR()
850 MachineOperand *SrcConst = TII->getNamedOperand(*DefMI, AMDGPU::OpName::src0); in tryMoveVGPRConstToSGPR()
851 if (SrcConst->isReg()) in tryMoveVGPRConstToSGPR()
855 MRI->getRegClass(MaybeVGPRConstMO.getReg()); in tryMoveVGPRConstToSGPR()
856 unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC); in tryMoveVGPRConstToSGPR()
858 BuildMI(*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc(), in tryMoveVGPRConstToSGPR()
859 TII->get(MoveOp), DstReg) in tryMoveVGPRConstToSGPR()
861 if (MRI->hasOneUse(MaybeVGPRConstMO.getReg())) in tryMoveVGPRConstToSGPR()
862 DefMI->eraseFromParent(); in tryMoveVGPRConstToSGPR()
877 TRI->hasVectorRegisters(MRI->getRegClass(SrcReg))) { in lowerSpecialCase()
879 MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); in lowerSpecialCase()
881 TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg) in lowerSpecialCase()
891 if (!SrcReg.isVirtual() || TRI->isAGPR(*MRI, SrcReg)) { in lowerSpecialCase()
894 TII->moveToVALU(worklist, MDT); in lowerSpecialCase()
902 if (isSafeToFoldImmIntoCopy(&MI, MRI->getVRegDef(SrcReg), TII, SMovOp, Imm)) { in lowerSpecialCase()
904 MI.addImplicitDefUseOperands(*MI.getParent()->getParent()); in lowerSpecialCase()
905 MI.setDesc(TII->get(SMovOp)); in lowerSpecialCase()
912 Register DstReg = MI->getOperand(0).getReg(); in analyzeVGPRToSGPRCopy()
913 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); in analyzeVGPRToSGPRCopy()
916 TRI->getRegSizeInBits(*DstRC)); in analyzeVGPRToSGPRCopy()
931 if (Inst->isCopy() || Inst->isRegSequence()) { in analyzeVGPRToSGPRCopy()
932 if (TRI->isVGPR(*MRI, Inst->getOperand(0).getReg())) { in analyzeVGPRToSGPRCopy()
933 if (!Inst->isCopy() || in analyzeVGPRToSGPRCopy()
934 !tryChangeVGPRtoSGPRinCopy(*Inst, TRI, TII)) { in analyzeVGPRToSGPRCopy()
944 if ((TII->isSALU(*Inst) && Inst->isCompare()) || in analyzeVGPRToSGPRCopy()
945 (Inst->isCopy() && Inst->getOperand(0).getReg() == AMDGPU::SCC)) { in analyzeVGPRToSGPRCopy()
946 auto I = Inst->getIterator(); in analyzeVGPRToSGPRCopy()
947 auto E = Inst->getParent()->end(); in analyzeVGPRToSGPRCopy()
949 !I->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)) { in analyzeVGPRToSGPRCopy()
950 if (I->readsRegister(AMDGPU::SCC, /*TRI=*/nullptr)) in analyzeVGPRToSGPRCopy()
953 } else if (Inst->getNumExplicitDefs() != 0) { in analyzeVGPRToSGPRCopy()
954 Register Reg = Inst->getOperand(0).getReg(); in analyzeVGPRToSGPRCopy()
955 if (TRI->isSGPRReg(*MRI, Reg) && !TII->isVALU(*Inst)) in analyzeVGPRToSGPRCopy()
956 for (auto &U : MRI->use_instructions(Reg)) in analyzeVGPRToSGPRCopy()
960 if (TII->isSALU(*U)) in analyzeVGPRToSGPRCopy()
971 if (Info->SChain.empty()) { in needToBeConvertedToVALU()
972 Info->Score = 0; in needToBeConvertedToVALU()
975 Info->Siblings = SiblingPenalty[*llvm::max_element( in needToBeConvertedToVALU()
976 Info->SChain, [&](MachineInstr *A, MachineInstr *B) -> bool { in needToBeConvertedToVALU()
979 Info->Siblings.remove_if([&](unsigned ID) { return ID == Info->ID; }); in needToBeConvertedToVALU()
986 for (auto J : Info->Siblings) { in needToBeConvertedToVALU()
989 MachineInstr *SiblingCopy = InfoIt->second.Copy; in needToBeConvertedToVALU()
990 if (SiblingCopy->isImplicitDef()) in needToBeConvertedToVALU()
994 SrcRegs.insert(std::pair(SiblingCopy->getOperand(1).getReg(), in needToBeConvertedToVALU()
995 SiblingCopy->getOperand(1).getSubReg())); in needToBeConvertedToVALU()
998 Info->SiblingPenalty = SrcRegs.size(); in needToBeConvertedToVALU()
1001 Info->NumSVCopies + Info->SiblingPenalty + Info->NumReadfirstlanes; in needToBeConvertedToVALU()
1002 unsigned Profit = Info->SChain.size(); in needToBeConvertedToVALU()
1003 Info->Score = Penalty > Profit ? 0 : Profit - Penalty; in needToBeConvertedToVALU()
1004 Info->NeedToBeConvertedToVALU = Info->Score < 3; in needToBeConvertedToVALU()
1005 return Info->NeedToBeConvertedToVALU; in needToBeConvertedToVALU()
1024 V2SCopyInfo C = CurInfoIt->second; in lowerVGPR2SGPRCopies()
1029 V2SCopyInfo &SI = SibInfoIt->second; in lowerVGPR2SGPRCopies()
1048 TII->moveToVALU(Copies, MDT); in lowerVGPR2SGPRCopies()
1054 MachineBasicBlock *MBB = MI->getParent(); in lowerVGPR2SGPRCopies()
1060 Register DstReg = MI->getOperand(0).getReg(); in lowerVGPR2SGPRCopies()
1061 Register SrcReg = MI->getOperand(1).getReg(); in lowerVGPR2SGPRCopies()
1062 unsigned SubReg = MI->getOperand(1).getSubReg(); in lowerVGPR2SGPRCopies()
1064 TRI->getRegClassForOperandReg(*MRI, MI->getOperand(1)); in lowerVGPR2SGPRCopies()
1065 size_t SrcSize = TRI->getRegSizeInBits(*SrcRC); in lowerVGPR2SGPRCopies()
1068 auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), in lowerVGPR2SGPRCopies()
1069 TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg); in lowerVGPR2SGPRCopies()
1072 auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), in lowerVGPR2SGPRCopies()
1073 TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg); in lowerVGPR2SGPRCopies()
1076 auto Result = BuildMI(*MBB, MI, MI->getDebugLoc(), in lowerVGPR2SGPRCopies()
1077 TII->get(AMDGPU::REG_SEQUENCE), DstReg); in lowerVGPR2SGPRCopies()
1078 int N = TRI->getRegSizeInBits(*SrcRC) / 32; in lowerVGPR2SGPRCopies()
1080 Register PartialSrc = TII->buildExtractSubReg( in lowerVGPR2SGPRCopies()
1081 Result, *MRI, MI->getOperand(1), SrcRC, in lowerVGPR2SGPRCopies()
1082 TRI->getSubRegFromChannel(i), &AMDGPU::VGPR_32RegClass); in lowerVGPR2SGPRCopies()
1084 MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); in lowerVGPR2SGPRCopies()
1085 BuildMI(*MBB, *Result, Result->getDebugLoc(), in lowerVGPR2SGPRCopies()
1086 TII->get(AMDGPU::V_READFIRSTLANE_B32), PartialDst) in lowerVGPR2SGPRCopies()
1088 Result.addReg(PartialDst).addImm(TRI->getSubRegFromChannel(i)); in lowerVGPR2SGPRCopies()
1091 MI->eraseFromParent(); in lowerVGPR2SGPRCopies()
1107 Register SCCCopy = MRI->createVirtualRegister( in fixSCCCopies()
1108 TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID)); in fixSCCCopies()
1111 TII->get(IsWave32 ? AMDGPU::S_CSELECT_B32 in fixSCCCopies()
1114 .addImm(-1) in fixSCCCopies()
1116 I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(), in fixSCCCopies()
1117 TII->get(AMDGPU::COPY), DstReg) in fixSCCCopies()
1125 Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC()); in fixSCCCopies()
1127 MI.getDebugLoc(), TII->get(Opcode)) in fixSCCCopies()