Lines Matching +full:get +full:- +full:only
1 //===-- SIOptimizeExecMasking.cpp -----------------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
22 #define DEBUG_TYPE "si-optimize-exec-masking"
211 // These are only terminators to get correct spill code placement during
217 MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32)); in removeTerminatorBit()
222 MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64)); in removeTerminatorBit()
226 // This is only a terminator to get the correct spill code placement during in removeTerminatorBit()
228 MI.setDesc(TII->get(AMDGPU::S_XOR_B64)); in removeTerminatorBit()
232 // This is only a terminator to get the correct spill code placement during in removeTerminatorBit()
234 MI.setDesc(TII->get(AMDGPU::S_XOR_B32)); in removeTerminatorBit()
238 // This is only a terminator to get the correct spill code placement during in removeTerminatorBit()
240 MI.setDesc(TII->get(AMDGPU::S_OR_B64)); in removeTerminatorBit()
244 // This is only a terminator to get the correct spill code placement during in removeTerminatorBit()
246 MI.setDesc(TII->get(AMDGPU::S_OR_B32)); in removeTerminatorBit()
250 // This is only a terminator to get the correct spill code placement during in removeTerminatorBit()
252 MI.setDesc(TII->get(AMDGPU::S_ANDN2_B64)); in removeTerminatorBit()
256 // This is only a terminator to get the correct spill code placement during in removeTerminatorBit()
258 MI.setDesc(TII->get(AMDGPU::S_ANDN2_B32)); in removeTerminatorBit()
262 // This is only a terminator to get the correct spill code placement during in removeTerminatorBit()
264 MI.setDesc(TII->get(AMDGPU::S_AND_B64)); in removeTerminatorBit()
268 // This is only a terminator to get the correct spill code placement during in removeTerminatorBit()
270 MI.setDesc(TII->get(AMDGPU::S_AND_B32)); in removeTerminatorBit()
278 // Turn all pseudoterminators in the block into their equivalent non-terminator
279 // instructions. Returns the reverse iterator to the first non-terminator
288 if (!I->isTerminator()) in fixTerminators()
316 // XXX - Seems LiveRegUnits doesn't work correctly since it will incorrectly
317 // report the register as unavailable because a super-register with a lane mask
321 if (Succ->isLiveIn(Reg)) in isLiveOut()
328 // Backwards-iterate from Origin (for n=MaxInstructions iterations) until either
329 // the beginning of the BB is reached or Pred evaluates to true - which can be
339 E = Origin.getParent()->rend(); in findInstrBackwards()
343 if (A->isDebugInstr()) in findInstrBackwards()
350 if (A->modifiesRegister(Reg, TRI)) in findInstrBackwards()
358 A->killsRegister(Reg, TRI)) { in findInstrBackwards()
359 for (MachineOperand &MO : A->operands()) { in findInstrBackwards()
362 if (Candidate != Reg && TRI->regsOverlap(Candidate, Reg)) in findInstrBackwards()
363 KillFlagCandidates->push_back(&MO); in findInstrBackwards()
375 // Determine if a register Reg is not re-defined and still in use
395 for (; A != Stop.getParent()->rend() && A != Stop; ++A) { in isRegisterInUseBetween()
399 return !LR.available(Reg) || MRI->isReserved(Reg); in isRegisterInUseBetween()
402 // Determine if a register Reg is not re-defined and still in use
406 return isRegisterInUseBetween(Stop, *Stop.getParent()->rbegin(), Reg, true); in isRegisterInUseAfter()
450 // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec in optimizeExecSequence()
451 if (CopyToExecInst->getOperand(1).isKill() && in optimizeExecSequence()
455 PrepareExecInst->getOperand(0).setReg(Exec); in optimizeExecSequence()
459 CopyToExecInst->eraseFromParent(); in optimizeExecSequence()
472 Register CopyFromExec = CopyFromExecInst->getOperand(0).getReg(); in optimizeExecSequence()
477 J = std::next(CopyFromExecInst->getIterator()), in optimizeExecSequence()
478 JE = I->getIterator(); in optimizeExecSequence()
480 if (SaveExecInst && J->readsRegister(Exec, TRI)) { in optimizeExecSequence()
488 bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI); in optimizeExecSequence()
490 if (J->modifiesRegister(CopyToExec, TRI)) { in optimizeExecSequence()
498 unsigned SaveExecOp = getSaveExecOp(J->getOpcode()); in optimizeExecSequence()
525 if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) { in optimizeExecSequence()
536 MachineOperand &Src0 = SaveExecInst->getOperand(1); in optimizeExecSequence()
537 MachineOperand &Src1 = SaveExecInst->getOperand(2); in optimizeExecSequence()
544 if (!SaveExecInst->isCommutable()) in optimizeExecSequence()
551 CopyFromExecInst->eraseFromParent(); in optimizeExecSequence()
553 auto InsPt = SaveExecInst->getIterator(); in optimizeExecSequence()
554 const DebugLoc &DL = SaveExecInst->getDebugLoc(); in optimizeExecSequence()
556 BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())), in optimizeExecSequence()
558 .addReg(OtherOp->getReg()); in optimizeExecSequence()
559 SaveExecInst->eraseFromParent(); in optimizeExecSequence()
561 CopyToExecInst->eraseFromParent(); in optimizeExecSequence()
564 OtherInst->substituteRegister(CopyToExec, Exec, AMDGPU::NoSubRegister, in optimizeExecSequence()
580 if (NewOpcode == -1) in optimizeVCMPSaveExecSequence()
583 MachineOperand *Src0 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src0); in optimizeVCMPSaveExecSequence()
584 MachineOperand *Src1 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src1); in optimizeVCMPSaveExecSequence()
590 bool IsSGPR32 = TRI->getRegSizeInBits(MoveDest, *MRI) == 32; in optimizeVCMPSaveExecSequence()
593 SaveExecInstr.getDebugLoc(), TII->get(MovOpcode), MoveDest) in optimizeVCMPSaveExecSequence()
600 VCmp.getDebugLoc(), TII->get(NewOpcode)); in optimizeVCMPSaveExecSequence()
603 [&](unsigned OperandName) -> void { in optimizeVCMPSaveExecSequence()
604 if (auto *Mod = TII->getNamedOperand(VCmp, OperandName)) in optimizeVCMPSaveExecSequence()
605 Builder.addImm(Mod->getImm()); in optimizeVCMPSaveExecSequence()
617 if (Src0->isReg()) in optimizeVCMPSaveExecSequence()
618 MRI->clearKillFlags(Src0->getReg()); in optimizeVCMPSaveExecSequence()
619 if (Src1->isReg()) in optimizeVCMPSaveExecSequence()
620 MRI->clearKillFlags(Src1->getReg()); in optimizeVCMPSaveExecSequence()
623 MO->setIsKill(false); in optimizeVCMPSaveExecSequence()
640 if (!ST->hasGFX10_3Insts()) in tryRecordVCmpxAndSaveexecSequence()
644 ST->isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; in tryRecordVCmpxAndSaveexecSequence()
650 if (!TRI->isSGPRReg(*MRI, SaveExecDest)) in tryRecordVCmpxAndSaveexecSequence()
653 MachineOperand *SaveExecSrc0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); in tryRecordVCmpxAndSaveexecSequence()
654 if (!SaveExecSrc0->isReg()) in tryRecordVCmpxAndSaveexecSequence()
669 return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 && in tryRecordVCmpxAndSaveexecSequence()
670 Check->modifiesRegister(SaveExecSrc0->getReg(), TRI); in tryRecordVCmpxAndSaveexecSequence()
672 {Exec, SaveExecSrc0->getReg()}); in tryRecordVCmpxAndSaveexecSequence()
677 MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst); in tryRecordVCmpxAndSaveexecSequence()
681 MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0); in tryRecordVCmpxAndSaveexecSequence()
682 if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) && in tryRecordVCmpxAndSaveexecSequence()
683 MI.modifiesRegister(Src0->getReg(), TRI)) in tryRecordVCmpxAndSaveexecSequence()
686 MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); in tryRecordVCmpxAndSaveexecSequence()
687 if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) && in tryRecordVCmpxAndSaveexecSequence()
688 MI.modifiesRegister(Src1->getReg(), TRI)) in tryRecordVCmpxAndSaveexecSequence()
692 // it's MBB Live-outs, meaning it's used in any of its successors, leading in tryRecordVCmpxAndSaveexecSequence()
695 if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) in tryRecordVCmpxAndSaveexecSequence()
700 if (isRegisterInUseBetween(*VCmp, MI, VCmpDest->getReg(), false, true) || in tryRecordVCmpxAndSaveexecSequence()
701 isRegisterInUseAfter(MI, VCmpDest->getReg())) in tryRecordVCmpxAndSaveexecSequence()
709 if (Src0->isReg()) in tryRecordVCmpxAndSaveexecSequence()
710 NonDefRegs.push_back(Src0->getReg()); in tryRecordVCmpxAndSaveexecSequence()
712 if (Src1->isReg()) in tryRecordVCmpxAndSaveexecSequence()
713 NonDefRegs.push_back(Src1->getReg()); in tryRecordVCmpxAndSaveexecSequence()
731 ST->isWave32() ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64; in tryRecordOrSaveexecXorSequence()
733 if (MI.getOpcode() == XorOpcode && &MI != &MI.getParent()->front()) { in tryRecordOrSaveexecXorSequence()
741 const unsigned OrSaveexecOpcode = ST->isWave32() in tryRecordOrSaveexecXorSequence()
769 const unsigned Andn2Opcode = ST->isWave32() ? AMDGPU::S_ANDN2_SAVEEXEC_B32 in optimizeOrSaveexecXorSequences()
776 BuildMI(*Or->getParent(), Or->getIterator(), Or->getDebugLoc(), in optimizeOrSaveexecXorSequences()
777 TII->get(Andn2Opcode), Or->getOperand(0).getReg()) in optimizeOrSaveexecXorSequences()
778 .addReg(Or->getOperand(1).getReg()); in optimizeOrSaveexecXorSequences()
780 Or->eraseFromParent(); in optimizeOrSaveexecXorSequences()
781 Xor->eraseFromParent(); in optimizeOrSaveexecXorSequences()
793 this->MF = &MF; in runOnMachineFunction()
795 TRI = ST->getRegisterInfo(); in runOnMachineFunction()
796 TII = ST->getInstrInfo(); in runOnMachineFunction()
798 Exec = TRI->getExec(); in runOnMachineFunction()