Lines Matching +full:vcc +full:- +full:p

1 //===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
13 //===----------------------------------------------------------------------===//
24 #define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
55 return "SI optimize exec mask operations pre-RA"; in getPassName()
68 "SI optimize exec mask operations pre-RA", false, false)
71 "SI optimize exec mask operations pre-RA", false, false)
81 // See if there is a def between \p AndIdx and \p SelIdx that needs to live
82 // beyond \p AndIdx.
93 SlotIndex AndIdx = LIS->getInstructionIndex(And).getRegSlot(); in isDefBetween()
94 SlotIndex SelIdx = LIS->getInstructionIndex(Sel).getRegSlot(); in isDefBetween()
97 return isDefBetween(LIS->getInterval(Reg), AndIdx, SelIdx); in isDefBetween()
100 if (isDefBetween(LIS->getRegUnit(Unit), AndIdx, SelIdx)) in isDefBetween()
110 // $vcc = S_AND_B64 $exec, %cmp
113 // $vcc = S_ANDN2_B64 $exec, %cc
132 TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS); in optimizeVcndVcmpPair()
133 if (!And || And->getOpcode() != AndOpc || in optimizeVcndVcmpPair()
134 !And->getOperand(1).isReg() || !And->getOperand(2).isReg()) in optimizeVcndVcmpPair()
137 MachineOperand *AndCC = &And->getOperand(1); in optimizeVcndVcmpPair()
138 Register CmpReg = AndCC->getReg(); in optimizeVcndVcmpPair()
139 unsigned CmpSubReg = AndCC->getSubReg(); in optimizeVcndVcmpPair()
141 AndCC = &And->getOperand(2); in optimizeVcndVcmpPair()
142 CmpReg = AndCC->getReg(); in optimizeVcndVcmpPair()
143 CmpSubReg = AndCC->getSubReg(); in optimizeVcndVcmpPair()
144 } else if (And->getOperand(2).getReg() != Register(ExecReg)) { in optimizeVcndVcmpPair()
148 auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, *MRI, LIS); in optimizeVcndVcmpPair()
149 if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 || in optimizeVcndVcmpPair()
150 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) || in optimizeVcndVcmpPair()
151 Cmp->getParent() != And->getParent()) in optimizeVcndVcmpPair()
154 MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0); in optimizeVcndVcmpPair()
155 MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1); in optimizeVcndVcmpPair()
156 if (Op1->isImm() && Op2->isReg()) in optimizeVcndVcmpPair()
158 if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1) in optimizeVcndVcmpPair()
161 Register SelReg = Op1->getReg(); in optimizeVcndVcmpPair()
165 auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS); in optimizeVcndVcmpPair()
166 if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) in optimizeVcndVcmpPair()
169 if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) || in optimizeVcndVcmpPair()
170 TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers)) in optimizeVcndVcmpPair()
173 Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0); in optimizeVcndVcmpPair()
174 Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1); in optimizeVcndVcmpPair()
175 MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2); in optimizeVcndVcmpPair()
176 if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() || in optimizeVcndVcmpPair()
177 Op1->getImm() != 0 || Op2->getImm() != 1) in optimizeVcndVcmpPair()
180 Register CCReg = CC->getReg(); in optimizeVcndVcmpPair()
189 SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); in optimizeVcndVcmpPair()
190 LiveInterval *SelLI = &LIS->getInterval(SelReg); in optimizeVcndVcmpPair()
191 if (llvm::any_of(SelLI->vnis(), in optimizeVcndVcmpPair()
193 return VNI->isPHIDef(); in optimizeVcndVcmpPair()
202 BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc), in optimizeVcndVcmpPair()
203 And->getOperand(0).getReg()) in optimizeVcndVcmpPair()
205 .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg()); in optimizeVcndVcmpPair()
206 MachineOperand &AndSCC = And->getOperand(3); in optimizeVcndVcmpPair()
208 MachineOperand &Andn2SCC = Andn2->getOperand(3); in optimizeVcndVcmpPair()
212 SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps(*And, *Andn2); in optimizeVcndVcmpPair()
213 And->eraseFromParent(); in optimizeVcndVcmpPair()
219 SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); in optimizeVcndVcmpPair()
221 LiveInterval &CCLI = LIS->getInterval(CCReg); in optimizeVcndVcmpPair()
224 LIS->removeInterval(CCReg); in optimizeVcndVcmpPair()
225 LIS->createAndComputeVirtRegInterval(CCReg); in optimizeVcndVcmpPair()
228 LIS->removeAllRegUnitsForPhysReg(CCReg); in optimizeVcndVcmpPair()
231 // and s_and_b64 if VCC or just unused if any other register. in optimizeVcndVcmpPair()
232 LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; in optimizeVcndVcmpPair()
233 if ((CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || in optimizeVcndVcmpPair()
235 std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(), in optimizeVcndVcmpPair()
241 LIS->removeVRegDefAt(*CmpLI, CmpIdx.getRegSlot()); in optimizeVcndVcmpPair()
242 LIS->RemoveMachineInstrFromMaps(*Cmp); in optimizeVcndVcmpPair()
243 Cmp->eraseFromParent(); in optimizeVcndVcmpPair()
247 bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); in optimizeVcndVcmpPair()
248 LIS->shrinkToUses(SelLI); in optimizeVcndVcmpPair()
249 bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); in optimizeVcndVcmpPair()
250 if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { in optimizeVcndVcmpPair()
253 LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); in optimizeVcndVcmpPair()
254 LIS->RemoveMachineInstrFromMaps(*Sel); in optimizeVcndVcmpPair()
255 bool ShrinkSel = Sel->getOperand(0).readsReg(); in optimizeVcndVcmpPair()
256 Sel->eraseFromParent(); in optimizeVcndVcmpPair()
260 LIS->shrinkToUses(SelLI); in optimizeVcndVcmpPair()
307 I--; in optimizeElseBranch()
309 if (I->getOpcode() == AndOpc && I->getOperand(0).getReg() == DstReg && in optimizeElseBranch()
310 I->getOperand(1).getReg() == Register(ExecReg)) in optimizeElseBranch()
312 I--; in optimizeElseBranch()
321 SlotIndex StartIdx = LIS->getInstructionIndex(SaveExecMI); in optimizeElseBranch()
322 SlotIndex EndIdx = LIS->getInstructionIndex(*AndExecMI); in optimizeElseBranch()
323 for (MCRegUnit Unit : TRI->regunits(ExecReg)) { in optimizeElseBranch()
324 LiveRange &RegUnit = LIS->getRegUnit(Unit); in optimizeElseBranch()
330 LIS->removeInterval(SavedExecReg); in optimizeElseBranch()
331 LIS->removeInterval(DstReg); in optimizeElseBranch()
335 LIS->RemoveMachineInstrFromMaps(*AndExecMI); in optimizeElseBranch()
336 AndExecMI->eraseFromParent(); in optimizeElseBranch()
338 LIS->createAndComputeVirtRegInterval(DstReg); in optimizeElseBranch()
359 CondReg = MCRegister::from(Wave32 ? AMDGPU::VCC_LO : AMDGPU::VCC); in runOnMachineFunction()
396 auto I = CurBB->rbegin(), E = CurBB->rend(); in runOnMachineFunction()
398 if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM) in runOnMachineFunction()
400 else if (I->isBranch()) in runOnMachineFunction()
405 if (I->isDebugInstr()) { in runOnMachineFunction()
410 if (I->mayStore() || I->isBarrier() || I->isCall() || in runOnMachineFunction()
411 I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef()) in runOnMachineFunction()
417 for (auto &Op : I->operands()) { in runOnMachineFunction()
423 LIS->RemoveMachineInstrFromMaps(*I); in runOnMachineFunction()
424 I->eraseFromParent(); in runOnMachineFunction()
434 for (auto *Pred : CurBB->predecessors()) { in runOnMachineFunction()
435 if (Pred->succ_size() == 1) in runOnMachineFunction()
451 && ScanThreshold--; ++I) { in runOnMachineFunction()
453 if (!(I->isFullCopy() && I->getOperand(1).getReg() == Register(ExecReg))) in runOnMachineFunction()
456 Register SavedExec = I->getOperand(0).getReg(); in runOnMachineFunction()
457 if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec)) { in runOnMachineFunction()
458 MachineInstr *SingleExecUser = &*MRI->use_instr_nodbg_begin(SavedExec); in runOnMachineFunction()
459 int Idx = SingleExecUser->findRegisterUseOperandIdx(SavedExec, in runOnMachineFunction()
461 assert(Idx != -1); in runOnMachineFunction()
462 if (SingleExecUser->getParent() == I->getParent() && in runOnMachineFunction()
463 !SingleExecUser->getOperand(Idx).isImplicit() && in runOnMachineFunction()
464 TII->isOperandLegal(*SingleExecUser, Idx, &I->getOperand(1))) { in runOnMachineFunction()
466 LIS->RemoveMachineInstrFromMaps(*I); in runOnMachineFunction()
467 I->eraseFromParent(); in runOnMachineFunction()
468 MRI->replaceRegWith(SavedExec, ExecReg); in runOnMachineFunction()
469 LIS->removeInterval(SavedExec); in runOnMachineFunction()
480 LIS->removeInterval(Reg); in runOnMachineFunction()
481 if (!MRI->reg_empty(Reg)) in runOnMachineFunction()
482 LIS->createAndComputeVirtRegInterval(Reg); in runOnMachineFunction()
484 LIS->removeAllRegUnitsForPhysReg(Reg); in runOnMachineFunction()