Lines Matching +full:un +full:- +full:masked

1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
15 /// - t2LoopDec - placed within in the loop body.
16 /// - t2LoopEnd - the loop latch terminator.
19 /// which determines whether we can generated the tail-predicated low-overhead
23 /// Low-overhead loops are constructed and executed using a setup instruction:
40 /// "VPT Active" context (which includes low-overhead loops and vpt blocks).
49 /// When we're inside the low-overhead loop (between DLSTP and LETP), we always
52 //===----------------------------------------------------------------------===//
75 #define DEBUG_TYPE "arm-low-overhead-loops"
79 DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden,
80 cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"),
84 DisableOmitDLS("arm-disable-omit-dls", cl::Hidden,
90 return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR; in isVectorPredicated()
94 return MI->findRegisterDefOperandIdx(ARM::VPR, /*TRI=*/nullptr) != -1; in isVectorPredicate()
98 return MI.findRegisterUseOperandIdx(ARM::VPR, /*TRI=*/nullptr) != -1; in hasVPRUse()
102 uint64_t Domain = MI->getDesc().TSFlags & ARMII::DomainMask; in isDomainMVE()
146 (MachineBasicBlock *MBB) -> void { in ProcessLoop()
151 for (auto *Succ : MBB->successors()) { in ProcessLoop()
169 [this, &GetPredecessor] (MachineBasicBlock *MBB) -> void { in ProcessLoop()
171 if (MBB->pred_size() == 1) in ProcessLoop()
172 GetPredecessor(*MBB->pred_begin()); in ProcessLoop()
229 assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR)) in CreateVPTBlock()
278 assert(isVPTOpcode(VPT->getOpcode()) && in hasImplicitlyValidVPT()
281 if (VPT->getOpcode() == ARM::MVE_VPST) in hasImplicitlyValidVPT()
285 // the tail-predicated version will just perform a subset of the original in hasImplicitlyValidVPT()
287 if (isVPTOpcode(VPT->getOpcode()) && in hasImplicitlyValidVPT()
289 return !MI->mayStore() && !MI->mayLoad() && in hasImplicitlyValidVPT()
295 MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx)); in hasImplicitlyValidVPT()
300 MachineOperand &MO = MI->getOperand(Idx); in hasImplicitlyValidVPT()
310 if (Def->getParent() == VPT->getParent()) in hasImplicitlyValidVPT()
338 assert(isVPTOpcode(Insts.front()->getOpcode()) && in isValid()
340 if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST && in isValid()
349 if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI)) in isValid()
393 MF = ML.getHeader()->getParent(); in LowOverheadLoop()
444 return Start->getOperand(1); in getLoopStartOperand()
452 return VCTPOpcodeToLSTP(VCTPs.back()->getOpcode(), IsDo); in getStartOpcode()
462 dbgs() << " - " << *MI; in dump()
465 dbgs() << "ARM Loops: Not a low-overhead loop.\n"; in dump()
542 BasicBlocks.insert(Dead->getParent()); in INITIALIZE_PASS()
561 Dead->findRegisterUseOperand(ARM::ITSTATE, /*TRI=*/nullptr)) { in INITIALIZE_PASS()
585 << " - can also remove:\n"; in INITIALIZE_PASS()
587 dbgs() << " - " << *Use); in INITIALIZE_PASS()
594 dbgs() << " - " << *Dead); in INITIALIZE_PASS()
605 dbgs() << "ARM Loops: Tail-predication is not valid.\n"); in ValidateTailPredicate()
614 LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n"); in ValidateTailPredicate()
633 if (Start->getOpcode() == ARM::t2DoLoopStartTP || in ValidateTailPredicate()
634 Start->getOpcode() == ARM::t2WhileLoopStartTP) { in ValidateTailPredicate()
635 TPNumElements = Start->getOperand(2); in ValidateTailPredicate()
637 StartInsertBB = Start->getParent(); in ValidateTailPredicate()
639 TPNumElements = VCTP->getOperand(1); in ValidateTailPredicate()
654 if (StartInsertPt != StartInsertBB->end() && in ValidateTailPredicate()
659 ElemDef->removeFromParent(); in ValidateTailPredicate()
660 StartInsertBB->insert(StartInsertPt, ElemDef); in ValidateTailPredicate()
664 StartInsertPt->removeFromParent(); in ValidateTailPredicate()
665 StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), in ValidateTailPredicate()
672 MachineOperand Operand = ElemDef->getOperand(1); in ValidateTailPredicate()
673 if (isMovRegOpcode(ElemDef->getOpcode()) && in ValidateTailPredicate()
694 if (MBB->empty()) in ValidateTailPredicate()
697 if (RDA.hasLocalDefBefore(&MBB->back(), NumElements)) in ValidateTailPredicate()
701 if (MBB->pred_size() > 1) in ValidateTailPredicate()
714 MBB = *MBB->pred_begin(); in ValidateTailPredicate()
727 if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) { in ValidateTailPredicate()
734 // on. The VecSize of the DoubleWidthResult is the larger vector size - the in ValidateTailPredicate()
748 // NumElements = NumElements - VectorWidth. The sub will be a sub immediate in ValidateTailPredicate()
751 return -getAddSubImmediate(*MI) == ExpectedVecWidth; in ValidateTailPredicate()
754 MachineBasicBlock *MBB = VCTP->getParent(); in ValidateTailPredicate()
760 &MBB->back(), VCTP->getOperand(1).getReg().asMCReg())) { in ValidateTailPredicate()
763 unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode()); in ValidateTailPredicate()
771 if (isMovRegOpcode(MI->getOpcode())) in ValidateTailPredicate()
774 if (isSubImmOpcode(MI->getOpcode())) { in ValidateTailPredicate()
794 if ((Start->getOpcode() == ARM::t2DoLoopStartTP || in ValidateTailPredicate()
795 Start->getOpcode() == ARM::t2WhileLoopStartTP) && in ValidateTailPredicate()
796 Preheader && !Preheader->empty() && in ValidateTailPredicate()
797 !RDA.hasLocalDefBefore(VCTP, VCTP->getOperand(1).getReg())) { in ValidateTailPredicate()
799 &Preheader->back(), VCTP->getOperand(1).getReg().asMCReg())) { in ValidateTailPredicate()
811 return MO.isReg() && MO.getReg() && Class->contains(MO.getReg()); in isRegInClass()
832 // Can this instruction generate a non-zero result when given only zeroed
834 // zeroed by masked loads, that the result will also contain zeros in those
846 // FIXME: VNEG FP and -0? I think we'll need to handle this once we allow in canGenerateNonZeros()
847 // fp16 -> fp32 vector conversions. in canGenerateNonZeros()
863 // so that if tail-predication happens, the lanes that aren't updated will
880 Def->getOpcode() == ARM::MVE_VMOVimmi32 && in producesFalseLanesZero()
881 Def->getOperand(1).getImm() == 0; in producesFalseLanesZero()
892 if (PIdx != -1 && (int)MO.getOperandNo() == PIdx + 2) in producesFalseLanesZero()
896 // - If it only consumes false lanes zero or constant 0 (vmov #0) in producesFalseLanesZero()
897 // - If it's predicated, it only matters that it's def register already has in producesFalseLanesZero()
916 // We want to find out if the tail-predicated version of this loop will in ValidateLiveOuts()
921 // predicated and so the conversion from VPT predication to tail-predication in ValidateLiveOuts()
922 // can result in different values being produced; due to the tail-predication in ValidateLiveOuts()
924 // lanes. This analysis assumes that all the instructions perform lane-wise in ValidateLiveOuts()
926 // A masked load, whether through VPT or tail predication, will write zeros in ValidateLiveOuts()
929 // lanes remain zero, or where they change, the differences are masked away in ValidateLiveOuts()
934 // loop and the tail-predicated form too. Because of this, we can insert in ValidateLiveOuts()
995 // all the unknown values have to found to be masked by predicated user(s). in ValidateLiveOuts()
1000 for (auto &MO : MI->operands()) { in ValidateLiveOuts()
1010 // Any unknown false lanes have been masked away by the user(s). in ValidateLiveOuts()
1021 for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) { in ValidateLiveOuts()
1028 // Check Q-regs that are live in the exit blocks. We don't collect scalars in ValidateLiveOuts()
1030 if (QPRs->contains(RegMask.PhysReg)) in ValidateLiveOuts()
1037 // any VPT predicated instruction is predicated upon VCTP. Any live-out in ValidateLiveOuts()
1045 if (MI->getOpcode() == ARM::MQPRCopy) { in ValidateLiveOuts()
1048 RDA.getUniqueReachingMIDef(MI, MI->getOperand(1).getReg()); in ValidateLiveOuts()
1069 MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd in Validate()
1070 ? End->getOperand(1).getMBB() in Validate()
1071 : End->getOperand(2).getMBB(); in Validate()
1079 // The WLS and LE instructions have 12-bits for the label offset. WLS in Validate()
1081 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) || in Validate()
1082 !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) { in Validate()
1083 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n"); in Validate()
1089 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) || in Validate()
1090 !BBUtils->isBBInRange(Start, TargetBB, 4094)) { in Validate()
1091 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n"); in Validate()
1099 StartInsertBB = Start->getParent(); in Validate()
1117 if (!Prev->getOperand(1).isIdenticalTo(MI->getOperand(1)) || in AddVCTP()
1118 !RDA.hasSameReachingDef(Prev, MI, MI->getOperand(1).getReg().asMCReg())) { in AddVCTP()
1130 const PseudoSourceValue *PseudoValue = Operand->getPseudoValue(); in ValidateMVEStore()
1131 if (PseudoValue && PseudoValue->kind() == PseudoSourceValue::FixedStack) { in ValidateMVEStore()
1133 return FS->getFrameIndex(); in ValidateMVEStore()
1136 return -1; in ValidateMVEStore()
1140 switch (I->getOpcode()) { in ValidateMVEStore()
1143 return I->getOperand(1).getReg() == ARM::SP && in ValidateMVEStore()
1144 I->memoperands().size() == 1 && in ValidateMVEStore()
1145 GetFrameIndex(I->memoperands().front()) >= 0; in ValidateMVEStore()
1154 if (MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(MI)) in ValidateMVEStore()
1159 // live-out (which sp never is) to know what blocks to look in in ValidateMVEStore()
1160 if (MI->memoperands().size() == 0) in ValidateMVEStore()
1162 int FI = GetFrameIndex(MI->memoperands().front()); in ValidateMVEStore()
1164 auto &FrameInfo = MI->getParent()->getParent()->getFrameInfo(); in ValidateMVEStore()
1165 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI)) in ValidateMVEStore()
1169 ML->getExitBlocks(Frontier); in ValidateMVEStore()
1170 SmallPtrSet<MachineBasicBlock *, 4> Visited{MI->getParent()}; in ValidateMVEStore()
1193 for (auto *Succ : BB->successors()) { in ValidateMVEStore()
1212 if (MI->getOpcode() == ARM::MVE_VPSEL || in ValidateMVEInst()
1213 MI->getOpcode() == ARM::MVE_VPNOT) { in ValidateMVEInst()
1231 const MCInstrDesc &MCID = MI->getDesc(); in ValidateMVEInst()
1233 unsigned LastOpIdx = MI->getNumOperands() - 1; in ValidateMVEInst()
1235 const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index()); in ValidateMVEInst()
1242 } else if (MI->getOpcode() != ARM::MVE_VPST) { in ValidateMVEInst()
1254 if (MI->getOpcode() == ARM::MQPRCopy) in ValidateMVEInst()
1268 if (MI->mayStore() && !ValidateMVEStore(MI, &ML)) in ValidateMVEInst()
1284 if (isVPTOpcode(MI->getOpcode())) in ValidateMVEInst()
1296 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); in runOnMachineFunction()
1300 MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); in runOnMachineFunction()
1301 MRI = &MF->getRegInfo(); in runOnMachineFunction()
1305 BBUtils->computeAllBlockSizes(); in runOnMachineFunction()
1306 BBUtils->adjustBBOffsetsAfter(&MF->front()); in runOnMachineFunction()
1310 if (ML->isOutermost()) in runOnMachineFunction()
1326 if (auto *Preheader = ML->getLoopPreheader()) in ProcessLoop()
1327 dbgs() << " - Preheader: " << printMBBReference(*Preheader) << "\n"; in ProcessLoop()
1328 else if (auto *Preheader = MLI->findLoopPreheader(ML, true, true)) in ProcessLoop()
1329 dbgs() << " - Preheader: " << printMBBReference(*Preheader) << "\n"; in ProcessLoop()
1330 for (auto *MBB : ML->getBlocks()) in ProcessLoop()
1331 dbgs() << " - Block: " << printMBBReference(*MBB) << "\n"; in ProcessLoop()
1337 [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { in ProcessLoop()
1342 if (MBB->pred_size() == 1) in ProcessLoop()
1343 return SearchForStart(*MBB->pred_begin()); in ProcessLoop()
1356 // Find the low-overhead loop components and decide whether or not to fall in ProcessLoop()
1359 for (auto *MBB : reverse(ML->getBlocks())) { in ProcessLoop()
1391 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart && in ProcessLoop()
1399 RDA->getReachingLocalUses(LoLoop.Dec, MCRegister::from(ARM::LR), Uses); in ProcessLoop()
1417 unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? in RevertWhile()
1430 MachineBasicBlock *MBB = MI->getParent(); in RevertLoopDec()
1432 for (auto I = MachineBasicBlock::iterator(MI), E = MBB->end(); I != E; ++I) { in RevertLoopDec()
1433 if (I->getOpcode() == ARM::t2LoopEnd) { in RevertLoopDec()
1441 RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore); in RevertLoopDec()
1451 MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); in RevertLoopEnd()
1452 unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? in RevertLoopEnd()
1461 assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!"); in RevertLoopEndDec()
1462 MachineBasicBlock *MBB = MI->getParent(); in RevertLoopEndDec()
1465 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); in RevertLoopEndDec()
1467 MIB.add(MI->getOperand(1)); in RevertLoopEndDec()
1472 MIB->getOperand(5).setIsDef(true); in RevertLoopEndDec()
1474 MachineBasicBlock *DestBB = MI->getOperand(2).getMBB(); in RevertLoopEndDec()
1476 BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc; in RevertLoopEndDec()
1479 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); in RevertLoopEndDec()
1480 MIB.add(MI->getOperand(2)); // branch target in RevertLoopEndDec()
1484 MI->eraseFromParent(); in RevertLoopEndDec()
1488 // If we are tail-predicating, the number of elements to be processed is the
1492 // $lr = big-itercount-expression
1502 // What we would like achieve here is to replace the do-loop start pseudo
1517 MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 1); in IterationCountDCE()
1532 // When using tail-predication, try to delete the dead code that was used to in ExpandLoopStart()
1550 BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc)); in ExpandLoopStart()
1567 if (MI->isDebugInstr()) in ConvertVPTBlocks()
1571 assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction"); in ConvertVPTBlocks()
1572 assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then && in ConvertVPTBlocks()
1574 MI->getOperand(PIdx).setImm(ARMVCC::None); in ConvertVPTBlocks()
1575 MI->getOperand(PIdx + 1).setReg(0); in ConvertVPTBlocks()
1582 assert(TheVCMP && "Replacing a removed or non-existent VCMP"); in ConvertVPTBlocks()
1585 BuildMI(*At->getParent(), At, At->getDebugLoc(), in ConvertVPTBlocks()
1586 TII->get(VCMPOpcodeToVPT(TheVCMP->getOpcode()))); in ConvertVPTBlocks()
1589 MIB.add(TheVCMP->getOperand(1)); in ConvertVPTBlocks()
1591 MIB.add(TheVCMP->getOperand(2)); in ConvertVPTBlocks()
1593 MIB.add(TheVCMP->getOperand(3)); in ConvertVPTBlocks()
1605 // - Remove vpst. in ConvertVPTBlocks()
1606 // - Unpredicate the remaining instructions. in ConvertVPTBlocks()
1611 // The VPT block has a non-uniform predicate but it uses a vpst and its in ConvertVPTBlocks()
1613 // - Need to remove the original vpst. in ConvertVPTBlocks()
1614 // - Then need to unpredicate any following instructions, until in ConvertVPTBlocks()
1616 // - Insert a new vpst to predicate the instruction(s) that following in ConvertVPTBlocks()
1619 MachineBasicBlock *MBB = Divergent->getParent(); in ConvertVPTBlocks()
1621 while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr()) in ConvertVPTBlocks()
1625 DivergentNext != MBB->end() && in ConvertVPTBlocks()
1635 VCMPOpcodeToVPT(Divergent->getOpcode()) != 0 ? Divergent : nullptr; in ConvertVPTBlocks()
1645 BuildMI(*Divergent->getParent(), Divergent, in ConvertVPTBlocks()
1646 Divergent->getDebugLoc(), TII->get(ARM::MVE_VPST)); in ConvertVPTBlocks()
1664 assert(VPST->getOpcode() == ARM::MVE_VPST && in ConvertVPTBlocks()
1669 } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) { in ConvertVPTBlocks()
1671 // preceeding un-merged VCMP into a VPT. This VCMP comes from a VPT in ConvertVPTBlocks()
1678 MachineInstr *VprDef = RDA->getUniqueReachingMIDef(VPST, ARM::VPR); in ConvertVPTBlocks()
1679 if (VprDef && VCMPOpcodeToVPT(VprDef->getOpcode()) && in ConvertVPTBlocks()
1689 RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) && in ConvertVPTBlocks()
1690 RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) { in ConvertVPTBlocks()
1707 MachineBasicBlock *MBB = End->getParent(); in Expand()
1710 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(), in Expand()
1711 TII->get(Opc)); in Expand()
1714 MIB.add(End->getOperand(Off + 0)); in Expand()
1715 MIB.add(End->getOperand(Off + 1)); in Expand()
1723 // get here - probably by teaching analyzeBranch about the pseudo in Expand()
1728 MachineBasicBlock *BB = I->getParent(); in Expand()
1729 MachineInstr *Terminator = &BB->instr_back(); in Expand()
1730 if (Terminator->isUnconditionalBranch() && I != Terminator) { in Expand()
1731 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB(); in Expand()
1732 if (BB->isLayoutSuccessor(Succ)) { in Expand()
1734 Terminator->eraseFromParent(); in Expand()
1744 assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!"); in Expand()
1745 MachineBasicBlock *MBB = MI->getParent(); in Expand()
1746 Register Dst = MI->getOperand(0).getReg(); in Expand()
1747 Register Src = MI->getOperand(1).getReg(); in Expand()
1748 auto MIB1 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD), in Expand()
1749 ARM::D0 + (Dst - ARM::Q0) * 2) in Expand()
1750 .addReg(ARM::D0 + (Src - ARM::Q0) * 2) in Expand()
1754 auto MIB2 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD), in Expand()
1755 ARM::D0 + (Dst - ARM::Q0) * 2 + 1) in Expand()
1756 .addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1) in Expand()
1760 MI->eraseFromParent(); in Expand()
1784 I->eraseFromParent(); in Expand()
1802 RDA->reset(); in Expand()
1850 assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!"); in RevertNonLoops()
1851 MachineBasicBlock *MBB = MI->getParent(); in RevertNonLoops()
1852 auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::MVE_VORR), in RevertNonLoops()
1853 MI->getOperand(0).getReg()) in RevertNonLoops()
1854 .add(MI->getOperand(1)) in RevertNonLoops()
1855 .add(MI->getOperand(1)); in RevertNonLoops()
1856 addUnpredicatedMveVpredROp(MIB, MI->getOperand(0).getReg()); in RevertNonLoops()
1857 MI->eraseFromParent(); in RevertNonLoops()