Lines Matching +full:auto +full:- +full:detects
1 //===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 // to lane masks (32 / 64-bit scalar registers). The pass assumes machine SSA
11 // form and a wave-level control flow graph.
22 //===----------------------------------------------------------------------===//
30 #define DEBUG_TYPE "si-i1-copies"
87 return Reg.isVirtual() && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass; in isVreg1()
99 MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass); in cleanConstrainRegs()
107 /// simply be taken as a scalar lane mask as-is, and where it needs to be
111 /// - Determine all basic blocks which, starting from the incoming blocks,
114 /// - If an incoming block has no predecessors in this set, we can take the
115 /// incoming value as a scalar lane mask as-is.
116 /// -- A special case of this is when the def block has a self-loop.
117 /// - Otherwise, the incoming value needs to be merged with a previously
119 /// - If there is a path into the set of reachable blocks that does _not_ go
120 /// through an incoming block where we can take the scalar lane mask as-is,
145 return ReachableMap.find(&MBB)->second; in isSource()
161 for (auto Incoming : Incomings) { in analyze()
164 ReachableMap[&DefBlock] = true; // self-loop on DefBlock in analyze()
172 // post-dominator, the wave may first visit the other successors. in analyze()
173 if (TII->hasDivergentBranch(MBB) && PDT.dominates(&DefBlock, MBB)) in analyze()
174 append_range(Stack, MBB->successors()); in analyze()
183 append_range(Stack, MBB->successors()); in analyze()
188 for (MachineBasicBlock *Pred : MBB->predecessors()) { in analyze()
208 /// Helper class that detects loops which require us to lower an i1 COPY into
214 /// A-+-+
216 /// B-+ |
218 /// C---+
229 /// post-dominator of B and all uses of the def.
234 /// The class is designed to cache the CFG traversal so that it can be re-used
253 // Post-dominator of all visited blocks.
291 while (PDNode->getBlock() != PostDom) { in findLoop()
292 if (PDNode->getBlock() == VisitedPostDom) in findLoop()
294 PDNode = PDNode->getIDom(); in findLoop()
313 for (auto &Incoming : Incomings)
322 for (MachineBasicBlock *Pred : Dom->predecessors()) {
333 auto DomIt = Visited.find(&MBB); in inLoopLevel()
334 if (DomIt != Visited.end() && DomIt->second <= LoopLevel) in inLoopLevel()
337 for (auto &Incoming : Incomings) in inLoopLevel()
352 VisitedPostDom = PDT.getNode(VisitedPostDom)->getIDom()->getBlock(); in advanceLevel()
376 for (MachineBasicBlock *Succ : MBB->successors()) { in advanceLevel()
418 return MRI->createVirtualRegister(LaneMaskRegAttrs); in createLaneMaskReg()
424 MachineFunction &MF = *MBB->getParent(); in insertUndefLaneMask()
428 BuildMI(*MBB, MBB->getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF), in insertUndefLaneMask()
487 // Copy into a 32-bit vector register. in lowerCopiesFromI1()
491 assert(isVRegCompatibleReg(TII->getRegisterInfo(), *MRI, DstReg)); in lowerCopiesFromI1()
495 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg) in lowerCopiesFromI1()
499 .addImm(-1) in lowerCopiesFromI1()
505 MI->eraseFromParent(); in lowerCopiesFromI1()
515 MRI = &MF->getRegInfo(); in PhiLoweringHelper()
517 ST = &MF->getSubtarget<GCNSubtarget>(); in PhiLoweringHelper()
518 TII = ST->getInstrInfo(); in PhiLoweringHelper()
519 IsWave32 = ST->isWave32(); in PhiLoweringHelper()
551 DT->getBase().updateDFSNumbers(); in lowerPhis()
554 MachineBasicBlock &MBB = *MI->getParent(); in lowerPhis()
562 Register DstReg = MI->getOperand(0).getReg(); in lowerPhis()
569 // values are sorted earlier. This allows us to do some amount of on-the-fly in lowerPhis()
573 return DT->getNode(LHS.Block)->getDFSNumIn() < in lowerPhis()
574 DT->getNode(RHS.Block)->getDFSNumIn(); in lowerPhis()
584 for (MachineInstr &Use : MRI->use_instructions(DstReg)) in lowerPhis()
588 PDT->findNearestCommonDominator(DomBlocks); in lowerPhis()
602 for (auto &Incoming : Incomings) { in lowerPhis()
607 for (auto &Incoming : Incomings) { in lowerPhis()
622 for (auto &Incoming : Incomings) { in lowerPhis()
633 for (auto &Incoming : Incomings) { in lowerPhis()
647 MI->eraseFromParent(); in lowerPhis()
675 if (MRI->use_empty(DstReg)) { in lowerCopiesToI1()
693 assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32); in lowerCopiesToI1()
695 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg) in lowerCopiesToI1()
708 for (MachineInstr &Use : MRI->use_instructions(DstReg)) in lowerCopiesToI1()
712 PDT->findNearestCommonDominator(DomBlocks); in lowerCopiesToI1()
726 MI->eraseFromParent(); in lowerCopiesToI1()
735 MI = MRI->getUniqueVRegDef(Reg); in isConstantLaneMask()
736 if (MI->getOpcode() == AMDGPU::IMPLICIT_DEF) in isConstantLaneMask()
739 if (MI->getOpcode() != AMDGPU::COPY) in isConstantLaneMask()
742 Reg = MI->getOperand(1).getReg(); in isConstantLaneMask()
749 if (MI->getOpcode() != MovOp) in isConstantLaneMask()
752 if (!MI->getOperand(1).isImm()) in isConstantLaneMask()
755 int64_t Imm = MI->getOperand(1).getImm(); in isConstantLaneMask()
760 if (Imm == -1) { in isConstantLaneMask()
786 auto InsertionPt = MBB.getFirstTerminator(); in getSaluInsertionAtEnd()
788 for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) { in getSaluInsertionAtEnd()
799 InsertionPt--; in getSaluInsertionAtEnd()
811 // VReg_1 -> SReg_32 or SReg_64
813 MRI->setRegClass(DstReg, ST->getBoolRC()); in markAsLaneMask()
828 for (unsigned i = 1; i < MI->getNumOperands(); i += 2) { in collectIncomingValuesFromPhi()
829 assert(i + 1 < MI->getNumOperands()); in collectIncomingValuesFromPhi()
830 Register IncomingReg = MI->getOperand(i).getReg(); in collectIncomingValuesFromPhi()
831 MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB(); in collectIncomingValuesFromPhi()
832 MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg); in collectIncomingValuesFromPhi()
834 if (IncomingDef->getOpcode() == AMDGPU::COPY) { in collectIncomingValuesFromPhi()
835 IncomingReg = IncomingDef->getOperand(1).getReg(); in collectIncomingValuesFromPhi()
837 assert(!IncomingDef->getOperand(1).getSubReg()); in collectIncomingValuesFromPhi()
838 } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) { in collectIncomingValuesFromPhi()
841 assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg)); in collectIncomingValuesFromPhi()
850 MRI->replaceRegWith(NewReg, OldReg); in replaceDstReg()
865 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg); in buildMergeLaneMasks()
867 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(ExecReg); in buildMergeLaneMasks()
869 BuildMI(MBB, I, DL, TII->get(XorOp), DstReg) in buildMergeLaneMasks()
871 .addImm(-1); in buildMergeLaneMasks()
883 BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg) in buildMergeLaneMasks()
894 BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg) in buildMergeLaneMasks()
901 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg) in buildMergeLaneMasks()
904 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg) in buildMergeLaneMasks()
907 BuildMI(MBB, I, DL, TII->get(OrN2Op), DstReg) in buildMergeLaneMasks()
911 BuildMI(MBB, I, DL, TII->get(OrOp), DstReg) in buildMergeLaneMasks()