Lines Matching +full:vcc +full:- +full:p
1 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 /// The pass tries to use the 32-bit encoding for instructions when possible.
8 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "si-shrink-instructions"
21 "Number of 64-bit instruction reduced to 32-bit.");
23 "Number of literal constants folded into 32-bit instructions.");
86 /// This function checks \p MI for operands defined by a move immediate
88 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
91 assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI)); in foldImmediates()
100 MachineInstr *Def = MRI->getUniqueVRegDef(Reg); in foldImmediates()
101 if (Def && Def->isMoveImmediate()) { in foldImmediates()
102 MachineOperand &MovSrc = Def->getOperand(1); in foldImmediates()
105 if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) { in foldImmediates()
120 if (MRI->use_nodbg_empty(Reg)) in foldImmediates()
121 Def->eraseFromParent(); in foldImmediates()
131 if (TII->commuteInstruction(MI)) { in foldImmediates()
136 TII->commuteInstruction(MI); in foldImmediates()
151 "True16 Instructions post-RA"); in shouldShrinkTrue16()
162 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo()); in isKImmOperand()
167 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo()); in isKUImmOperand()
174 return !TII->isInlineConstant(Src); in isKImmOrKUImmOperand()
179 return !TII->isInlineConstant(Src); in isKImmOrKUImmOperand()
185 /// \returns the opcode of an instruction a move immediate of the constant \p
186 /// Src can be replaced with if the constant is replaced with \p ModifiedImm.
197 if (TII->isInlineConstant(Src)) in canModifyToInlineImmOp32()
206 if (TII->isInlineConstant(APInt(32, ModifiedImm))) in canModifyToInlineImmOp32()
211 if (TII->isInlineConstant(APInt(32, ModifiedImm))) in canModifyToInlineImmOp32()
234 if (!ST->hasSCmpK()) in shrinkScalarCompare()
240 TII->commuteInstruction(MI, false, 0, 1); in shrinkScalarCompare()
252 if (SOPKOpc == -1) in shrinkScalarCompare()
266 MI.setDesc(TII->get(SOPKOpc)); in shrinkScalarCompare()
272 const MCInstrDesc &NewDesc = TII->get(SOPKOpc); in shrinkScalarCompare()
282 // Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
289 switch (Info->MIMGEncoding) { in shrinkMIMG()
302 unsigned NewAddrDwords = Info->VAddrDwords; in shrinkMIMG()
305 if (Info->VAddrDwords == 2) { in shrinkMIMG()
307 } else if (Info->VAddrDwords == 3) { in shrinkMIMG()
309 } else if (Info->VAddrDwords == 4) { in shrinkMIMG()
311 } else if (Info->VAddrDwords == 5) { in shrinkMIMG()
313 } else if (Info->VAddrDwords == 6) { in shrinkMIMG()
315 } else if (Info->VAddrDwords == 7) { in shrinkMIMG()
317 } else if (Info->VAddrDwords == 8) { in shrinkMIMG()
319 } else if (Info->VAddrDwords == 9) { in shrinkMIMG()
321 } else if (Info->VAddrDwords == 10) { in shrinkMIMG()
323 } else if (Info->VAddrDwords == 11) { in shrinkMIMG()
325 } else if (Info->VAddrDwords == 12) { in shrinkMIMG()
335 bool IsKill = NewAddrDwords == Info->VAddrDwords; in shrinkMIMG()
336 const unsigned NSAMaxSize = ST->getNSAMaxSize(); in shrinkMIMG()
338 const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands; in shrinkMIMG()
341 unsigned Vgpr = TRI->getHWRegIndex(Op.getReg()); in shrinkMIMG()
342 unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32; in shrinkMIMG()
343 assert(Dwords > 0 && "Un-implemented for less than 32 bit regs"); in shrinkMIMG()
363 // Further check for implicit tied operands - this may be present if TFE is in shrinkMIMG()
367 unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm(); in shrinkMIMG()
368 unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm(); in shrinkMIMG()
369 int ToUntie = -1; in shrinkMIMG()
377 ToUntie == -1 && in shrinkMIMG()
385 unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding, in shrinkMIMG()
386 Info->VDataDwords, NewAddrDwords); in shrinkMIMG()
387 MI.setDesc(TII->get(NewOpcode)); in shrinkMIMG()
388 MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase)); in shrinkMIMG()
398 ToUntie - (EndVAddr - 1)); in shrinkMIMG()
404 // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so in shrinkMadFma()
406 if (!ST->hasVOP3Literal()) in shrinkMadFma()
409 // There is no advantage to doing this pre-RA. in shrinkMadFma()
410 if (!MF->getProperties().hasProperty( in shrinkMadFma()
414 if (TII->hasAnyModifiersSet(MI)) in shrinkMadFma()
418 MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0); in shrinkMadFma()
419 MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1); in shrinkMadFma()
420 MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2); in shrinkMadFma()
426 if (Src2.isImm() && !TII->isInlineConstant(Src2)) { in shrinkMadFma()
427 if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg())) in shrinkMadFma()
429 else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg())) in shrinkMadFma()
448 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16 in shrinkMadFma()
455 if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) { in shrinkMadFma()
456 if (Src1.isImm() && !TII->isInlineConstant(Src1)) in shrinkMadFma()
458 else if (Src0.isImm() && !TII->isInlineConstant(Src0)) in shrinkMadFma()
477 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16 in shrinkMadFma()
491 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode), in shrinkMadFma()
499 TII->removeModOperands(MI); in shrinkMadFma()
500 MI.setDesc(TII->get(NewOpcode)); in shrinkMadFma()
504 /// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
517 if (!SrcImm->isImm() || in shrinkScalarLogicOp()
518 AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm())) in shrinkScalarLogicOp()
521 uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm()); in shrinkScalarLogicOp()
528 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) { in shrinkScalarLogicOp()
536 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) { in shrinkScalarLogicOp()
541 if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) { in shrinkScalarLogicOp()
550 if (Dest->getReg().isVirtual() && SrcReg->isReg()) { in shrinkScalarLogicOp()
551 MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg()); in shrinkScalarLogicOp()
552 MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg()); in shrinkScalarLogicOp()
556 if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) { in shrinkScalarLogicOp()
557 const bool IsUndef = SrcReg->isUndef(); in shrinkScalarLogicOp()
558 const bool IsKill = SrcReg->isKill(); in shrinkScalarLogicOp()
559 MI.setDesc(TII->get(Opc)); in shrinkScalarLogicOp()
562 Src0->ChangeToImmediate(NewImm); in shrinkScalarLogicOp()
564 MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false, in shrinkScalarLogicOp()
569 SrcImm->setImm(NewImm); in shrinkScalarLogicOp()
587 if (TRI->regsOverlap(Reg, MO.getReg())) in instAccessReg()
590 LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) & in instAccessReg()
591 TRI->getSubRegIndexLaneMask(MO.getSubReg()); in instAccessReg()
601 return instAccessReg(MI->uses(), Reg, SubReg); in instReadsReg()
606 return instAccessReg(MI->defs(), Reg, SubReg); in instModifiesReg()
612 if (TRI->getRegSizeInBits(Reg, *MRI) != 32) { in getSubRegForIndex()
614 Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I)); in getSubRegForIndex()
616 Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub)); in getSubRegForIndex()
633 TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg()); in dropInstructionKeepingImpDefs()
671 unsigned Size = TII->getOpSize(MovT, 0) / 4; in matchSwap()
673 if (!TRI->isVGPR(*MRI, X)) in matchSwap()
680 E = MovT.getParent()->instr_end(); in matchSwap()
684 KilledT = MovY->killsRegister(T, TRI); in matchSwap()
686 if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 && in matchSwap()
687 MovY->getOpcode() != AMDGPU::COPY) || in matchSwap()
688 !MovY->getOperand(1).isReg() || in matchSwap()
689 MovY->getOperand(1).getReg() != T || in matchSwap()
690 MovY->getOperand(1).getSubReg() != Tsub) in matchSwap()
693 Register Y = MovY->getOperand(0).getReg(); in matchSwap()
694 unsigned Ysub = MovY->getOperand(0).getSubReg(); in matchSwap()
696 if (!TRI->isVGPR(*MRI, Y)) in matchSwap()
700 for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator()); in matchSwap()
716 (I->getOpcode() != AMDGPU::V_MOV_B32_e32 && in matchSwap()
717 I->getOpcode() != AMDGPU::COPY) || in matchSwap()
718 I->getOperand(0).getReg() != X || in matchSwap()
719 I->getOperand(0).getSubReg() != Xsub) { in matchSwap()
724 if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U))) in matchSwap()
740 auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(), in matchSwap()
741 TII->get(AMDGPU::V_SWAP_B32)) in matchSwap()
746 if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { in matchSwap()
748 MIB->removeOperand(MIB->getNumExplicitOperands()); in matchSwap()
749 MIB->copyImplicitOps(*MBB.getParent(), *MovX); in matchSwap()
752 MovX->eraseFromParent(); in matchSwap()
756 if (T.isVirtual() && MRI->use_nodbg_empty(T)) { in matchSwap()
760 for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) { in matchSwap()
763 if (Op.isKill() && TRI->regsOverlap(X, Op.getReg())) in matchSwap()
776 if (!ST->hasGFX10_3Insts()) in tryReplaceDeadSDST()
779 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); in tryReplaceDeadSDST()
782 Register SDstReg = Op->getReg(); in tryReplaceDeadSDST()
783 if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg)) in tryReplaceDeadSDST()
786 Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64); in tryReplaceDeadSDST()
794 this->MF = &MF; in runOnMachineFunction()
797 TII = ST->getInstrInfo(); in runOnMachineFunction()
798 TRI = &TII->getRegisterInfo(); in runOnMachineFunction()
800 unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC; in runOnMachineFunction()
821 // XXX - not exactly a check for post-regalloc run. in runOnMachineFunction()
828 MI.setDesc(TII->get(ModOpcode)); in runOnMachineFunction()
835 if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || in runOnMachineFunction()
838 Next = NextMI->getIterator(); in runOnMachineFunction()
850 if (!Src0->isReg() && Src1->isReg()) { in runOnMachineFunction()
851 if (TII->commuteInstruction(MI, false, 1, 2)) in runOnMachineFunction()
858 if (Dest->getReg().isVirtual() && Src0->isReg()) { in runOnMachineFunction()
859 MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg()); in runOnMachineFunction()
860 MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg()); in runOnMachineFunction()
864 if (Src0->isReg() && Src0->getReg() == Dest->getReg()) { in runOnMachineFunction()
865 if (Src1->isImm() && isKImmOperand(*Src1)) { in runOnMachineFunction()
869 Src1->setImm(SignExtend64(Src1->getImm(), 32)); in runOnMachineFunction()
870 MI.setDesc(TII->get(Opc)); in runOnMachineFunction()
877 if (MI.isCompare() && TII->isSOPC(MI)) { in runOnMachineFunction()
891 MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); in runOnMachineFunction()
895 MI.setDesc(TII->get(ModOpc)); in runOnMachineFunction()
911 if (TII->isMIMG(MI.getOpcode()) && in runOnMachineFunction()
912 ST->getGeneration() >= AMDGPUSubtarget::GFX10 && in runOnMachineFunction()
919 if (!TII->isVOP3(MI)) in runOnMachineFunction()
931 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) { in runOnMachineFunction()
932 // If there is no chance we will shrink it and use VCC as sdst to get in runOnMachineFunction()
938 if (!TII->canShrink(MI, *MRI)) { in runOnMachineFunction()
941 if (!MI.isCommutable() || !TII->commuteInstruction(MI) || in runOnMachineFunction()
942 !TII->canShrink(MI, *MRI)) { in runOnMachineFunction()
950 if (TII->isVOPC(Op32)) { in runOnMachineFunction()
957 // VOPC instructions can only write to the VCC register. We can't in runOnMachineFunction()
958 // force them to use VCC here, because this is only one register and in runOnMachineFunction()
960 // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...) in runOnMachineFunction()
962 // So, instead of forcing the instruction to write to VCC, we in runOnMachineFunction()
963 // provide a hint to the register allocator to use VCC and then we in runOnMachineFunction()
965 // VCC. in runOnMachineFunction()
966 MRI->setRegAllocationHint(DstReg, 0, VCCReg); in runOnMachineFunction()
978 TII->getNamedOperand(MI, AMDGPU::OpName::src2); in runOnMachineFunction()
979 if (!Src2->isReg()) in runOnMachineFunction()
981 Register SReg = Src2->getReg(); in runOnMachineFunction()
983 MRI->setRegAllocationHint(SReg, 0, VCCReg); in runOnMachineFunction()
991 const MachineOperand *SDst = TII->getNamedOperand(MI, in runOnMachineFunction()
997 if (SDst->getReg() != VCCReg) { in runOnMachineFunction()
998 if (SDst->getReg().isVirtual()) in runOnMachineFunction()
999 MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg); in runOnMachineFunction()
1005 const MachineOperand *Src2 = TII->getNamedOperand(MI, in runOnMachineFunction()
1007 if (Src2 && Src2->getReg() != VCCReg) { in runOnMachineFunction()
1008 if (Src2->getReg().isVirtual()) in runOnMachineFunction()
1009 MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg); in runOnMachineFunction()
1017 // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to in runOnMachineFunction()
1021 if (ST->hasVOP3Literal() && in runOnMachineFunction()
1026 if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) && in runOnMachineFunction()
1033 MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32); in runOnMachineFunction()
1039 // Copy deadness from the old explicit vcc def to the new implicit def. in runOnMachineFunction()
1040 if (SDst && SDst->isDead()) in runOnMachineFunction()
1041 Inst32->findRegisterDefOperand(VCCReg, /*TRI=*/nullptr)->setIsDead(); in runOnMachineFunction()