1*fe6060f1SDimitry Andric //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric // 9*fe6060f1SDimitry Andric /// \file This pass does a few optimisations related to Tail predicated loops 10*fe6060f1SDimitry Andric /// and MVE VPT blocks before register allocation is performed. For VPT blocks 11*fe6060f1SDimitry Andric /// the goal is to maximize the sizes of the blocks that will be created by the 12*fe6060f1SDimitry Andric /// MVE VPT Block Insertion pass (which runs after register allocation). For 13*fe6060f1SDimitry Andric /// tail predicated loops we transform the loop into something that will 14*fe6060f1SDimitry Andric /// hopefully make the backend ARMLowOverheadLoops pass's job easier. 15*fe6060f1SDimitry Andric /// 16*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 17*fe6060f1SDimitry Andric 18*fe6060f1SDimitry Andric #include "ARM.h" 19*fe6060f1SDimitry Andric #include "ARMSubtarget.h" 20*fe6060f1SDimitry Andric #include "MCTargetDesc/ARMBaseInfo.h" 21*fe6060f1SDimitry Andric #include "MVETailPredUtils.h" 22*fe6060f1SDimitry Andric #include "Thumb2InstrInfo.h" 23*fe6060f1SDimitry Andric #include "llvm/ADT/SmallVector.h" 24*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 25*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 26*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 27*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 28*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 29*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 30*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 31*fe6060f1SDimitry Andric #include "llvm/Support/Debug.h" 32*fe6060f1SDimitry Andric #include <cassert> 33*fe6060f1SDimitry Andric 34*fe6060f1SDimitry Andric using namespace llvm; 35*fe6060f1SDimitry Andric 36*fe6060f1SDimitry Andric #define DEBUG_TYPE "arm-mve-vpt-opts" 37*fe6060f1SDimitry Andric 38*fe6060f1SDimitry Andric static cl::opt<bool> 39*fe6060f1SDimitry Andric MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, 40*fe6060f1SDimitry Andric cl::desc("Enable merging Loop End and Dec instructions."), 41*fe6060f1SDimitry Andric cl::init(true)); 42*fe6060f1SDimitry Andric 43*fe6060f1SDimitry Andric namespace { 44*fe6060f1SDimitry Andric class MVETPAndVPTOptimisations : public MachineFunctionPass { 45*fe6060f1SDimitry Andric public: 46*fe6060f1SDimitry Andric static char ID; 47*fe6060f1SDimitry Andric const Thumb2InstrInfo *TII; 48*fe6060f1SDimitry Andric MachineRegisterInfo *MRI; 49*fe6060f1SDimitry Andric 50*fe6060f1SDimitry Andric MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {} 51*fe6060f1SDimitry Andric 52*fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override; 53*fe6060f1SDimitry Andric 54*fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 55*fe6060f1SDimitry Andric AU.addRequired<MachineLoopInfo>(); 56*fe6060f1SDimitry Andric AU.addPreserved<MachineLoopInfo>(); 57*fe6060f1SDimitry Andric AU.addRequired<MachineDominatorTree>(); 58*fe6060f1SDimitry Andric AU.addPreserved<MachineDominatorTree>(); 59*fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 60*fe6060f1SDimitry Andric } 61*fe6060f1SDimitry Andric 62*fe6060f1SDimitry Andric StringRef getPassName() const override { 63*fe6060f1SDimitry Andric return "ARM MVE TailPred and VPT Optimisation Pass"; 64*fe6060f1SDimitry Andric } 65*fe6060f1SDimitry Andric 66*fe6060f1SDimitry Andric private: 67*fe6060f1SDimitry Andric bool LowerWhileLoopStart(MachineLoop *ML); 68*fe6060f1SDimitry Andric bool MergeLoopEnd(MachineLoop *ML); 69*fe6060f1SDimitry Andric bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT); 70*fe6060f1SDimitry Andric MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB, 71*fe6060f1SDimitry Andric MachineInstr &Instr, 72*fe6060f1SDimitry Andric MachineOperand &User, 73*fe6060f1SDimitry Andric Register Target); 74*fe6060f1SDimitry Andric bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB); 75*fe6060f1SDimitry Andric bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB); 76*fe6060f1SDimitry Andric bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT); 77*fe6060f1SDimitry Andric bool ConvertVPSEL(MachineBasicBlock &MBB); 78*fe6060f1SDimitry Andric bool HintDoLoopStartReg(MachineBasicBlock &MBB); 79*fe6060f1SDimitry Andric MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader, 80*fe6060f1SDimitry Andric MachineInstr *LoopStart); 81*fe6060f1SDimitry Andric }; 82*fe6060f1SDimitry Andric 83*fe6060f1SDimitry Andric char MVETPAndVPTOptimisations::ID = 0; 84*fe6060f1SDimitry Andric 85*fe6060f1SDimitry Andric } // end anonymous namespace 86*fe6060f1SDimitry Andric 87*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE, 88*fe6060f1SDimitry Andric "ARM MVE TailPred and VPT Optimisations pass", false, 89*fe6060f1SDimitry Andric false) 90*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 91*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 92*fe6060f1SDimitry Andric INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE, 93*fe6060f1SDimitry Andric "ARM MVE TailPred and VPT Optimisations pass", false, false) 94*fe6060f1SDimitry Andric 95*fe6060f1SDimitry Andric static MachineInstr *LookThroughCOPY(MachineInstr *MI, 96*fe6060f1SDimitry Andric MachineRegisterInfo *MRI) { 97*fe6060f1SDimitry Andric while (MI && MI->getOpcode() == TargetOpcode::COPY && 98*fe6060f1SDimitry Andric MI->getOperand(1).getReg().isVirtual()) 99*fe6060f1SDimitry Andric MI = MRI->getVRegDef(MI->getOperand(1).getReg()); 100*fe6060f1SDimitry Andric return MI; 101*fe6060f1SDimitry Andric } 102*fe6060f1SDimitry Andric 103*fe6060f1SDimitry Andric // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and 104*fe6060f1SDimitry Andric // corresponding PHI that make up a low overhead loop. Only handles 'do' loops 105*fe6060f1SDimitry Andric // at the moment, returning a t2DoLoopStart in LoopStart. 106*fe6060f1SDimitry Andric static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, 107*fe6060f1SDimitry Andric MachineInstr *&LoopStart, MachineInstr *&LoopPhi, 108*fe6060f1SDimitry Andric MachineInstr *&LoopDec, MachineInstr *&LoopEnd) { 109*fe6060f1SDimitry Andric MachineBasicBlock *Header = ML->getHeader(); 110*fe6060f1SDimitry Andric MachineBasicBlock *Latch = ML->getLoopLatch(); 111*fe6060f1SDimitry Andric if (!Header || !Latch) { 112*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n"); 113*fe6060f1SDimitry Andric return false; 114*fe6060f1SDimitry Andric } 115*fe6060f1SDimitry Andric 116*fe6060f1SDimitry Andric // Find the loop end from the terminators. 117*fe6060f1SDimitry Andric LoopEnd = nullptr; 118*fe6060f1SDimitry Andric for (auto &T : Latch->terminators()) { 119*fe6060f1SDimitry Andric if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) { 120*fe6060f1SDimitry Andric LoopEnd = &T; 121*fe6060f1SDimitry Andric break; 122*fe6060f1SDimitry Andric } 123*fe6060f1SDimitry Andric if (T.getOpcode() == ARM::t2LoopEndDec && 124*fe6060f1SDimitry Andric T.getOperand(2).getMBB() == Header) { 125*fe6060f1SDimitry Andric LoopEnd = &T; 126*fe6060f1SDimitry Andric break; 127*fe6060f1SDimitry Andric } 128*fe6060f1SDimitry Andric } 129*fe6060f1SDimitry Andric if (!LoopEnd) { 130*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " no LoopEnd\n"); 131*fe6060f1SDimitry Andric return false; 132*fe6060f1SDimitry Andric } 133*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd); 134*fe6060f1SDimitry Andric 135*fe6060f1SDimitry Andric // Find the dec from the use of the end. There may be copies between 136*fe6060f1SDimitry Andric // instructions. We expect the loop to loop like: 137*fe6060f1SDimitry Andric // $vs = t2DoLoopStart ... 138*fe6060f1SDimitry Andric // loop: 139*fe6060f1SDimitry Andric // $vp = phi [ $vs ], [ $vd ] 140*fe6060f1SDimitry Andric // ... 141*fe6060f1SDimitry Andric // $vd = t2LoopDec $vp 142*fe6060f1SDimitry Andric // ... 143*fe6060f1SDimitry Andric // t2LoopEnd $vd, loop 144*fe6060f1SDimitry Andric if (LoopEnd->getOpcode() == ARM::t2LoopEndDec) 145*fe6060f1SDimitry Andric LoopDec = LoopEnd; 146*fe6060f1SDimitry Andric else { 147*fe6060f1SDimitry Andric LoopDec = 148*fe6060f1SDimitry Andric LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI); 149*fe6060f1SDimitry Andric if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) { 150*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n"); 151*fe6060f1SDimitry Andric return false; 152*fe6060f1SDimitry Andric } 153*fe6060f1SDimitry Andric } 154*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec); 155*fe6060f1SDimitry Andric 156*fe6060f1SDimitry Andric LoopPhi = 157*fe6060f1SDimitry Andric LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI); 158*fe6060f1SDimitry Andric if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI || 159*fe6060f1SDimitry Andric LoopPhi->getNumOperands() != 5 || 160*fe6060f1SDimitry Andric (LoopPhi->getOperand(2).getMBB() != Latch && 161*fe6060f1SDimitry Andric LoopPhi->getOperand(4).getMBB() != Latch)) { 162*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n"); 163*fe6060f1SDimitry Andric return false; 164*fe6060f1SDimitry Andric } 165*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi); 166*fe6060f1SDimitry Andric 167*fe6060f1SDimitry Andric Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch 168*fe6060f1SDimitry Andric ? LoopPhi->getOperand(3).getReg() 169*fe6060f1SDimitry Andric : LoopPhi->getOperand(1).getReg(); 170*fe6060f1SDimitry Andric LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI); 171*fe6060f1SDimitry Andric if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart && 172*fe6060f1SDimitry Andric LoopStart->getOpcode() != ARM::t2WhileLoopSetup && 173*fe6060f1SDimitry Andric LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) { 174*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n"); 175*fe6060f1SDimitry Andric return false; 176*fe6060f1SDimitry Andric } 177*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart); 178*fe6060f1SDimitry Andric 179*fe6060f1SDimitry Andric return true; 180*fe6060f1SDimitry Andric } 181*fe6060f1SDimitry Andric 182*fe6060f1SDimitry Andric static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) { 183*fe6060f1SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 184*fe6060f1SDimitry Andric assert(MI->getOpcode() == ARM::t2WhileLoopSetup && 185*fe6060f1SDimitry Andric "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!"); 186*fe6060f1SDimitry Andric 187*fe6060f1SDimitry Andric // Subs 188*fe6060f1SDimitry Andric MachineInstrBuilder MIB = 189*fe6060f1SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); 190*fe6060f1SDimitry Andric MIB.add(MI->getOperand(0)); 191*fe6060f1SDimitry Andric MIB.add(MI->getOperand(1)); 192*fe6060f1SDimitry Andric MIB.addImm(0); 193*fe6060f1SDimitry Andric MIB.addImm(ARMCC::AL); 194*fe6060f1SDimitry Andric MIB.addReg(ARM::NoRegister); 195*fe6060f1SDimitry Andric MIB.addReg(ARM::CPSR, RegState::Define); 196*fe6060f1SDimitry Andric 197*fe6060f1SDimitry Andric // Attempt to find a t2WhileLoopStart and revert to a t2Bcc. 198*fe6060f1SDimitry Andric for (MachineInstr &I : MBB->terminators()) { 199*fe6060f1SDimitry Andric if (I.getOpcode() == ARM::t2WhileLoopStart) { 200*fe6060f1SDimitry Andric MachineInstrBuilder MIB = 201*fe6060f1SDimitry Andric BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc)); 202*fe6060f1SDimitry Andric MIB.add(MI->getOperand(1)); // branch target 203*fe6060f1SDimitry Andric MIB.addImm(ARMCC::EQ); 204*fe6060f1SDimitry Andric MIB.addReg(ARM::CPSR); 205*fe6060f1SDimitry Andric I.eraseFromParent(); 206*fe6060f1SDimitry Andric break; 207*fe6060f1SDimitry Andric } 208*fe6060f1SDimitry Andric } 209*fe6060f1SDimitry Andric 210*fe6060f1SDimitry Andric MI->eraseFromParent(); 211*fe6060f1SDimitry Andric } 212*fe6060f1SDimitry Andric 213*fe6060f1SDimitry Andric // The Hardware Loop insertion and ISel Lowering produce the pseudos for the 214*fe6060f1SDimitry Andric // start of a while loop: 215*fe6060f1SDimitry Andric // %a:gprlr = t2WhileLoopSetup %Cnt 216*fe6060f1SDimitry Andric // t2WhileLoopStart %a, %BB 217*fe6060f1SDimitry Andric // We want to convert those to a single instruction which, like t2LoopEndDec and 218*fe6060f1SDimitry Andric // t2DoLoopStartTP is both a terminator and produces a value: 219*fe6060f1SDimitry Andric // %a:grplr: t2WhileLoopStartLR %Cnt, %BB 220*fe6060f1SDimitry Andric // 221*fe6060f1SDimitry Andric // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and 222*fe6060f1SDimitry Andric // t2WhileLoopStart are not valid past regalloc. 223*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) { 224*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop " 225*fe6060f1SDimitry Andric << ML->getHeader()->getName() << "\n"); 226*fe6060f1SDimitry Andric 227*fe6060f1SDimitry Andric MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 228*fe6060f1SDimitry Andric if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 229*fe6060f1SDimitry Andric return false; 230*fe6060f1SDimitry Andric 231*fe6060f1SDimitry Andric if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup) 232*fe6060f1SDimitry Andric return false; 233*fe6060f1SDimitry Andric 234*fe6060f1SDimitry Andric Register LR = LoopStart->getOperand(0).getReg(); 235*fe6060f1SDimitry Andric auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) { 236*fe6060f1SDimitry Andric return MI.getOpcode() == ARM::t2WhileLoopStart; 237*fe6060f1SDimitry Andric }); 238*fe6060f1SDimitry Andric if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) { 239*fe6060f1SDimitry Andric RevertWhileLoopSetup(LoopStart, TII); 240*fe6060f1SDimitry Andric RevertLoopDec(LoopStart, TII); 241*fe6060f1SDimitry Andric RevertLoopEnd(LoopStart, TII); 242*fe6060f1SDimitry Andric return true; 243*fe6060f1SDimitry Andric } 244*fe6060f1SDimitry Andric 245*fe6060f1SDimitry Andric MachineInstrBuilder MI = 246*fe6060f1SDimitry Andric BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(), 247*fe6060f1SDimitry Andric TII->get(ARM::t2WhileLoopStartLR), LR) 248*fe6060f1SDimitry Andric .add(LoopStart->getOperand(1)) 249*fe6060f1SDimitry Andric .add(WLSIt->getOperand(1)); 250*fe6060f1SDimitry Andric (void)MI; 251*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr()); 252*fe6060f1SDimitry Andric 253*fe6060f1SDimitry Andric WLSIt->eraseFromParent(); 254*fe6060f1SDimitry Andric LoopStart->eraseFromParent(); 255*fe6060f1SDimitry Andric return true; 256*fe6060f1SDimitry Andric } 257*fe6060f1SDimitry Andric 258*fe6060f1SDimitry Andric // Return true if this instruction is invalid in a low overhead loop, usually 259*fe6060f1SDimitry Andric // because it clobbers LR. 260*fe6060f1SDimitry Andric static bool IsInvalidTPInstruction(MachineInstr &MI) { 261*fe6060f1SDimitry Andric return MI.isCall() || isLoopStart(MI); 262*fe6060f1SDimitry Andric } 263*fe6060f1SDimitry Andric 264*fe6060f1SDimitry Andric // Starting from PreHeader, search for invalid instructions back until the 265*fe6060f1SDimitry Andric // LoopStart block is reached. If invalid instructions are found, the loop start 266*fe6060f1SDimitry Andric // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will 267*fe6060f1SDimitry Andric // return the new DLS LoopStart if updated. 268*fe6060f1SDimitry Andric MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors( 269*fe6060f1SDimitry Andric MachineBasicBlock *PreHeader, MachineInstr *LoopStart) { 270*fe6060f1SDimitry Andric SmallVector<MachineBasicBlock *> Worklist; 271*fe6060f1SDimitry Andric SmallPtrSet<MachineBasicBlock *, 4> Visited; 272*fe6060f1SDimitry Andric Worklist.push_back(PreHeader); 273*fe6060f1SDimitry Andric Visited.insert(LoopStart->getParent()); 274*fe6060f1SDimitry Andric 275*fe6060f1SDimitry Andric while (!Worklist.empty()) { 276*fe6060f1SDimitry Andric MachineBasicBlock *MBB = Worklist.pop_back_val(); 277*fe6060f1SDimitry Andric if (Visited.count(MBB)) 278*fe6060f1SDimitry Andric continue; 279*fe6060f1SDimitry Andric 280*fe6060f1SDimitry Andric for (MachineInstr &MI : *MBB) { 281*fe6060f1SDimitry Andric if (!IsInvalidTPInstruction(MI)) 282*fe6060f1SDimitry Andric continue; 283*fe6060f1SDimitry Andric 284*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI); 285*fe6060f1SDimitry Andric 286*fe6060f1SDimitry Andric // Create a t2DoLoopStart at the end of the preheader. 287*fe6060f1SDimitry Andric MachineInstrBuilder MIB = 288*fe6060f1SDimitry Andric BuildMI(*PreHeader, PreHeader->getFirstTerminator(), 289*fe6060f1SDimitry Andric LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart)); 290*fe6060f1SDimitry Andric MIB.add(LoopStart->getOperand(0)); 291*fe6060f1SDimitry Andric MIB.add(LoopStart->getOperand(1)); 292*fe6060f1SDimitry Andric 293*fe6060f1SDimitry Andric // Make sure to remove the kill flags, to prevent them from being invalid. 294*fe6060f1SDimitry Andric LoopStart->getOperand(1).setIsKill(false); 295*fe6060f1SDimitry Andric 296*fe6060f1SDimitry Andric // Revert the t2WhileLoopStartLR to a CMP and Br. 297*fe6060f1SDimitry Andric RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true); 298*fe6060f1SDimitry Andric return MIB; 299*fe6060f1SDimitry Andric } 300*fe6060f1SDimitry Andric 301*fe6060f1SDimitry Andric Visited.insert(MBB); 302*fe6060f1SDimitry Andric for (auto *Pred : MBB->predecessors()) 303*fe6060f1SDimitry Andric Worklist.push_back(Pred); 304*fe6060f1SDimitry Andric } 305*fe6060f1SDimitry Andric return LoopStart; 306*fe6060f1SDimitry Andric } 307*fe6060f1SDimitry Andric 308*fe6060f1SDimitry Andric // This function converts loops with t2LoopEnd and t2LoopEnd instructions into 309*fe6060f1SDimitry Andric // a single t2LoopEndDec instruction. To do that it needs to make sure that LR 310*fe6060f1SDimitry Andric // will be valid to be used for the low overhead loop, which means nothing else 311*fe6060f1SDimitry Andric // is using LR (especially calls) and there are no superfluous copies in the 312*fe6060f1SDimitry Andric // loop. The t2LoopEndDec is a branching terminator that produces a value (the 313*fe6060f1SDimitry Andric // decrement) around the loop edge, which means we need to be careful that they 314*fe6060f1SDimitry Andric // will be valid to allocate without any spilling. 315*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { 316*fe6060f1SDimitry Andric if (!MergeEndDec) 317*fe6060f1SDimitry Andric return false; 318*fe6060f1SDimitry Andric 319*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName() 320*fe6060f1SDimitry Andric << "\n"); 321*fe6060f1SDimitry Andric 322*fe6060f1SDimitry Andric MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 323*fe6060f1SDimitry Andric if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 324*fe6060f1SDimitry Andric return false; 325*fe6060f1SDimitry Andric 326*fe6060f1SDimitry Andric // Check if there is an illegal instruction (a call) in the low overhead loop 327*fe6060f1SDimitry Andric // and if so revert it now before we get any further. While loops also need to 328*fe6060f1SDimitry Andric // check the preheaders, but can be reverted to a DLS loop if needed. 329*fe6060f1SDimitry Andric auto *PreHeader = ML->getLoopPreheader(); 330*fe6060f1SDimitry Andric if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader) 331*fe6060f1SDimitry Andric LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart); 332*fe6060f1SDimitry Andric 333*fe6060f1SDimitry Andric for (MachineBasicBlock *MBB : ML->blocks()) { 334*fe6060f1SDimitry Andric for (MachineInstr &MI : *MBB) { 335*fe6060f1SDimitry Andric if (IsInvalidTPInstruction(MI)) { 336*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI); 337*fe6060f1SDimitry Andric if (LoopStart->getOpcode() == ARM::t2DoLoopStart) 338*fe6060f1SDimitry Andric RevertDoLoopStart(LoopStart, TII); 339*fe6060f1SDimitry Andric else 340*fe6060f1SDimitry Andric RevertWhileLoopStartLR(LoopStart, TII); 341*fe6060f1SDimitry Andric RevertLoopDec(LoopDec, TII); 342*fe6060f1SDimitry Andric RevertLoopEnd(LoopEnd, TII); 343*fe6060f1SDimitry Andric return true; 344*fe6060f1SDimitry Andric } 345*fe6060f1SDimitry Andric } 346*fe6060f1SDimitry Andric } 347*fe6060f1SDimitry Andric 348*fe6060f1SDimitry Andric // Remove any copies from the loop, to ensure the phi that remains is both 349*fe6060f1SDimitry Andric // simpler and contains no extra uses. Because t2LoopEndDec is a terminator 350*fe6060f1SDimitry Andric // that cannot spill, we need to be careful what remains in the loop. 351*fe6060f1SDimitry Andric Register PhiReg = LoopPhi->getOperand(0).getReg(); 352*fe6060f1SDimitry Andric Register DecReg = LoopDec->getOperand(0).getReg(); 353*fe6060f1SDimitry Andric Register StartReg = LoopStart->getOperand(0).getReg(); 354*fe6060f1SDimitry Andric // Ensure the uses are expected, and collect any copies we want to remove. 355*fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> Copies; 356*fe6060f1SDimitry Andric auto CheckUsers = [&Copies](Register BaseReg, 357*fe6060f1SDimitry Andric ArrayRef<MachineInstr *> ExpectedUsers, 358*fe6060f1SDimitry Andric MachineRegisterInfo *MRI) { 359*fe6060f1SDimitry Andric SmallVector<Register, 4> Worklist; 360*fe6060f1SDimitry Andric Worklist.push_back(BaseReg); 361*fe6060f1SDimitry Andric while (!Worklist.empty()) { 362*fe6060f1SDimitry Andric Register Reg = Worklist.pop_back_val(); 363*fe6060f1SDimitry Andric for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 364*fe6060f1SDimitry Andric if (count(ExpectedUsers, &MI)) 365*fe6060f1SDimitry Andric continue; 366*fe6060f1SDimitry Andric if (MI.getOpcode() != TargetOpcode::COPY || 367*fe6060f1SDimitry Andric !MI.getOperand(0).getReg().isVirtual()) { 368*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI); 369*fe6060f1SDimitry Andric return false; 370*fe6060f1SDimitry Andric } 371*fe6060f1SDimitry Andric Worklist.push_back(MI.getOperand(0).getReg()); 372*fe6060f1SDimitry Andric Copies.push_back(&MI); 373*fe6060f1SDimitry Andric } 374*fe6060f1SDimitry Andric } 375*fe6060f1SDimitry Andric return true; 376*fe6060f1SDimitry Andric }; 377*fe6060f1SDimitry Andric if (!CheckUsers(PhiReg, {LoopDec}, MRI) || 378*fe6060f1SDimitry Andric !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) || 379*fe6060f1SDimitry Andric !CheckUsers(StartReg, {LoopPhi}, MRI)) { 380*fe6060f1SDimitry Andric // Don't leave a t2WhileLoopStartLR without the LoopDecEnd. 381*fe6060f1SDimitry Andric if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) { 382*fe6060f1SDimitry Andric RevertWhileLoopStartLR(LoopStart, TII); 383*fe6060f1SDimitry Andric RevertLoopDec(LoopDec, TII); 384*fe6060f1SDimitry Andric RevertLoopEnd(LoopEnd, TII); 385*fe6060f1SDimitry Andric return true; 386*fe6060f1SDimitry Andric } 387*fe6060f1SDimitry Andric return false; 388*fe6060f1SDimitry Andric } 389*fe6060f1SDimitry Andric 390*fe6060f1SDimitry Andric MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass); 391*fe6060f1SDimitry Andric MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass); 392*fe6060f1SDimitry Andric MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass); 393*fe6060f1SDimitry Andric 394*fe6060f1SDimitry Andric if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) { 395*fe6060f1SDimitry Andric LoopPhi->getOperand(3).setReg(StartReg); 396*fe6060f1SDimitry Andric LoopPhi->getOperand(1).setReg(DecReg); 397*fe6060f1SDimitry Andric } else { 398*fe6060f1SDimitry Andric LoopPhi->getOperand(1).setReg(StartReg); 399*fe6060f1SDimitry Andric LoopPhi->getOperand(3).setReg(DecReg); 400*fe6060f1SDimitry Andric } 401*fe6060f1SDimitry Andric 402*fe6060f1SDimitry Andric // Replace the loop dec and loop end as a single instruction. 403*fe6060f1SDimitry Andric MachineInstrBuilder MI = 404*fe6060f1SDimitry Andric BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(), 405*fe6060f1SDimitry Andric TII->get(ARM::t2LoopEndDec), DecReg) 406*fe6060f1SDimitry Andric .addReg(PhiReg) 407*fe6060f1SDimitry Andric .add(LoopEnd->getOperand(1)); 408*fe6060f1SDimitry Andric (void)MI; 409*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr()); 410*fe6060f1SDimitry Andric 411*fe6060f1SDimitry Andric LoopDec->eraseFromParent(); 412*fe6060f1SDimitry Andric LoopEnd->eraseFromParent(); 413*fe6060f1SDimitry Andric for (auto *MI : Copies) 414*fe6060f1SDimitry Andric MI->eraseFromParent(); 415*fe6060f1SDimitry Andric return true; 416*fe6060f1SDimitry Andric } 417*fe6060f1SDimitry Andric 418*fe6060f1SDimitry Andric // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP 419*fe6060f1SDimitry Andric // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP 420*fe6060f1SDimitry Andric // instruction, making the backend ARMLowOverheadLoops passes job of finding the 421*fe6060f1SDimitry Andric // VCTP operand much simpler. 422*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML, 423*fe6060f1SDimitry Andric MachineDominatorTree *DT) { 424*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop " 425*fe6060f1SDimitry Andric << ML->getHeader()->getName() << "\n"); 426*fe6060f1SDimitry Andric 427*fe6060f1SDimitry Andric // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's 428*fe6060f1SDimitry Andric // in the loop. 429*fe6060f1SDimitry Andric MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 430*fe6060f1SDimitry Andric if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 431*fe6060f1SDimitry Andric return false; 432*fe6060f1SDimitry Andric if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart && 433*fe6060f1SDimitry Andric LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) 434*fe6060f1SDimitry Andric return false; 435*fe6060f1SDimitry Andric 436*fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> VCTPs; 437*fe6060f1SDimitry Andric for (MachineBasicBlock *BB : ML->blocks()) 438*fe6060f1SDimitry Andric for (MachineInstr &MI : *BB) 439*fe6060f1SDimitry Andric if (isVCTP(&MI)) 440*fe6060f1SDimitry Andric VCTPs.push_back(&MI); 441*fe6060f1SDimitry Andric 442*fe6060f1SDimitry Andric if (VCTPs.empty()) { 443*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " no VCTPs\n"); 444*fe6060f1SDimitry Andric return false; 445*fe6060f1SDimitry Andric } 446*fe6060f1SDimitry Andric 447*fe6060f1SDimitry Andric // Check all VCTPs are the same. 448*fe6060f1SDimitry Andric MachineInstr *FirstVCTP = *VCTPs.begin(); 449*fe6060f1SDimitry Andric for (MachineInstr *VCTP : VCTPs) { 450*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP); 451*fe6060f1SDimitry Andric if (VCTP->getOpcode() != FirstVCTP->getOpcode() || 452*fe6060f1SDimitry Andric VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) { 453*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " VCTP's are not identical\n"); 454*fe6060f1SDimitry Andric return false; 455*fe6060f1SDimitry Andric } 456*fe6060f1SDimitry Andric } 457*fe6060f1SDimitry Andric 458*fe6060f1SDimitry Andric // Check for the register being used can be setup before the loop. We expect 459*fe6060f1SDimitry Andric // this to be: 460*fe6060f1SDimitry Andric // $vx = ... 461*fe6060f1SDimitry Andric // loop: 462*fe6060f1SDimitry Andric // $vp = PHI [ $vx ], [ $vd ] 463*fe6060f1SDimitry Andric // .. 464*fe6060f1SDimitry Andric // $vpr = VCTP $vp 465*fe6060f1SDimitry Andric // .. 466*fe6060f1SDimitry Andric // $vd = t2SUBri $vp, #n 467*fe6060f1SDimitry Andric // .. 468*fe6060f1SDimitry Andric Register CountReg = FirstVCTP->getOperand(1).getReg(); 469*fe6060f1SDimitry Andric if (!CountReg.isVirtual()) { 470*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n"); 471*fe6060f1SDimitry Andric return false; 472*fe6060f1SDimitry Andric } 473*fe6060f1SDimitry Andric MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI); 474*fe6060f1SDimitry Andric if (!Phi || Phi->getOpcode() != TargetOpcode::PHI || 475*fe6060f1SDimitry Andric Phi->getNumOperands() != 5 || 476*fe6060f1SDimitry Andric (Phi->getOperand(2).getMBB() != ML->getLoopLatch() && 477*fe6060f1SDimitry Andric Phi->getOperand(4).getMBB() != ML->getLoopLatch())) { 478*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n"); 479*fe6060f1SDimitry Andric return false; 480*fe6060f1SDimitry Andric } 481*fe6060f1SDimitry Andric CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch() 482*fe6060f1SDimitry Andric ? Phi->getOperand(3).getReg() 483*fe6060f1SDimitry Andric : Phi->getOperand(1).getReg(); 484*fe6060f1SDimitry Andric 485*fe6060f1SDimitry Andric // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of 486*fe6060f1SDimitry Andric // the preheader and add the new CountReg to it. We attempt to place it late 487*fe6060f1SDimitry Andric // in the preheader, but may need to move that earlier based on uses. 488*fe6060f1SDimitry Andric MachineBasicBlock *MBB = LoopStart->getParent(); 489*fe6060f1SDimitry Andric MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator(); 490*fe6060f1SDimitry Andric for (MachineInstr &Use : 491*fe6060f1SDimitry Andric MRI->use_instructions(LoopStart->getOperand(0).getReg())) 492*fe6060f1SDimitry Andric if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) || 493*fe6060f1SDimitry Andric !DT->dominates(ML->getHeader(), Use.getParent())) { 494*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n"); 495*fe6060f1SDimitry Andric return false; 496*fe6060f1SDimitry Andric } 497*fe6060f1SDimitry Andric 498*fe6060f1SDimitry Andric unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart 499*fe6060f1SDimitry Andric ? ARM::t2DoLoopStartTP 500*fe6060f1SDimitry Andric : ARM::t2WhileLoopStartTP; 501*fe6060f1SDimitry Andric MachineInstrBuilder MI = 502*fe6060f1SDimitry Andric BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc)) 503*fe6060f1SDimitry Andric .add(LoopStart->getOperand(0)) 504*fe6060f1SDimitry Andric .add(LoopStart->getOperand(1)) 505*fe6060f1SDimitry Andric .addReg(CountReg); 506*fe6060f1SDimitry Andric if (NewOpc == ARM::t2WhileLoopStartTP) 507*fe6060f1SDimitry Andric MI.add(LoopStart->getOperand(2)); 508*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with " 509*fe6060f1SDimitry Andric << *MI.getInstr()); 510*fe6060f1SDimitry Andric MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass); 511*fe6060f1SDimitry Andric LoopStart->eraseFromParent(); 512*fe6060f1SDimitry Andric 513*fe6060f1SDimitry Andric return true; 514*fe6060f1SDimitry Andric } 515*fe6060f1SDimitry Andric 516*fe6060f1SDimitry Andric // Returns true if Opcode is any VCMP Opcode. 517*fe6060f1SDimitry Andric static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; } 518*fe6060f1SDimitry Andric 519*fe6060f1SDimitry Andric // Returns true if a VCMP with this Opcode can have its operands swapped. 520*fe6060f1SDimitry Andric // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs, 521*fe6060f1SDimitry Andric // and VCMPr instructions (since the r is always on the right). 522*fe6060f1SDimitry Andric static bool CanHaveSwappedOperands(unsigned Opcode) { 523*fe6060f1SDimitry Andric switch (Opcode) { 524*fe6060f1SDimitry Andric default: 525*fe6060f1SDimitry Andric return true; 526*fe6060f1SDimitry Andric case ARM::MVE_VCMPf32: 527*fe6060f1SDimitry Andric case ARM::MVE_VCMPf16: 528*fe6060f1SDimitry Andric case ARM::MVE_VCMPf32r: 529*fe6060f1SDimitry Andric case ARM::MVE_VCMPf16r: 530*fe6060f1SDimitry Andric case ARM::MVE_VCMPi8r: 531*fe6060f1SDimitry Andric case ARM::MVE_VCMPi16r: 532*fe6060f1SDimitry Andric case ARM::MVE_VCMPi32r: 533*fe6060f1SDimitry Andric case ARM::MVE_VCMPu8r: 534*fe6060f1SDimitry Andric case ARM::MVE_VCMPu16r: 535*fe6060f1SDimitry Andric case ARM::MVE_VCMPu32r: 536*fe6060f1SDimitry Andric case ARM::MVE_VCMPs8r: 537*fe6060f1SDimitry Andric case ARM::MVE_VCMPs16r: 538*fe6060f1SDimitry Andric case ARM::MVE_VCMPs32r: 539*fe6060f1SDimitry Andric return false; 540*fe6060f1SDimitry Andric } 541*fe6060f1SDimitry Andric } 542*fe6060f1SDimitry Andric 543*fe6060f1SDimitry Andric // Returns the CondCode of a VCMP Instruction. 544*fe6060f1SDimitry Andric static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) { 545*fe6060f1SDimitry Andric assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP"); 546*fe6060f1SDimitry Andric return ARMCC::CondCodes(Instr.getOperand(3).getImm()); 547*fe6060f1SDimitry Andric } 548*fe6060f1SDimitry Andric 549*fe6060f1SDimitry Andric // Returns true if Cond is equivalent to a VPNOT instruction on the result of 550*fe6060f1SDimitry Andric // Prev. Cond and Prev must be VCMPs. 551*fe6060f1SDimitry Andric static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) { 552*fe6060f1SDimitry Andric assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode())); 553*fe6060f1SDimitry Andric 554*fe6060f1SDimitry Andric // Opcodes must match. 555*fe6060f1SDimitry Andric if (Cond.getOpcode() != Prev.getOpcode()) 556*fe6060f1SDimitry Andric return false; 557*fe6060f1SDimitry Andric 558*fe6060f1SDimitry Andric MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2); 559*fe6060f1SDimitry Andric MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2); 560*fe6060f1SDimitry Andric 561*fe6060f1SDimitry Andric // If the VCMP has the opposite condition with the same operands, we can 562*fe6060f1SDimitry Andric // replace it with a VPNOT 563*fe6060f1SDimitry Andric ARMCC::CondCodes ExpectedCode = GetCondCode(Cond); 564*fe6060f1SDimitry Andric ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode); 565*fe6060f1SDimitry Andric if (ExpectedCode == GetCondCode(Prev)) 566*fe6060f1SDimitry Andric if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2)) 567*fe6060f1SDimitry Andric return true; 568*fe6060f1SDimitry Andric // Check again with operands swapped if possible 569*fe6060f1SDimitry Andric if (!CanHaveSwappedOperands(Cond.getOpcode())) 570*fe6060f1SDimitry Andric return false; 571*fe6060f1SDimitry Andric ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode); 572*fe6060f1SDimitry Andric return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) && 573*fe6060f1SDimitry Andric CondOP2.isIdenticalTo(PrevOP1); 574*fe6060f1SDimitry Andric } 575*fe6060f1SDimitry Andric 576*fe6060f1SDimitry Andric // Returns true if Instr writes to VCCR. 577*fe6060f1SDimitry Andric static bool IsWritingToVCCR(MachineInstr &Instr) { 578*fe6060f1SDimitry Andric if (Instr.getNumOperands() == 0) 579*fe6060f1SDimitry Andric return false; 580*fe6060f1SDimitry Andric MachineOperand &Dst = Instr.getOperand(0); 581*fe6060f1SDimitry Andric if (!Dst.isReg()) 582*fe6060f1SDimitry Andric return false; 583*fe6060f1SDimitry Andric Register DstReg = Dst.getReg(); 584*fe6060f1SDimitry Andric if (!DstReg.isVirtual()) 585*fe6060f1SDimitry Andric return false; 586*fe6060f1SDimitry Andric MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo(); 587*fe6060f1SDimitry Andric const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg); 588*fe6060f1SDimitry Andric return RegClass && (RegClass->getID() == ARM::VCCRRegClassID); 589*fe6060f1SDimitry Andric } 590*fe6060f1SDimitry Andric 591*fe6060f1SDimitry Andric // Transforms 592*fe6060f1SDimitry Andric // <Instr that uses %A ('User' Operand)> 593*fe6060f1SDimitry Andric // Into 594*fe6060f1SDimitry Andric // %K = VPNOT %Target 595*fe6060f1SDimitry Andric // <Instr that uses %K ('User' Operand)> 596*fe6060f1SDimitry Andric // And returns the newly inserted VPNOT. 597*fe6060f1SDimitry Andric // This optimization is done in the hopes of preventing spills/reloads of VPR by 598*fe6060f1SDimitry Andric // reducing the number of VCCR values with overlapping lifetimes. 599*fe6060f1SDimitry Andric MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT( 600*fe6060f1SDimitry Andric MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User, 601*fe6060f1SDimitry Andric Register Target) { 602*fe6060f1SDimitry Andric Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target)); 603*fe6060f1SDimitry Andric 604*fe6060f1SDimitry Andric MachineInstrBuilder MIBuilder = 605*fe6060f1SDimitry Andric BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) 606*fe6060f1SDimitry Andric .addDef(NewResult) 607*fe6060f1SDimitry Andric .addReg(Target); 608*fe6060f1SDimitry Andric addUnpredicatedMveVpredNOp(MIBuilder); 609*fe6060f1SDimitry Andric 610*fe6060f1SDimitry Andric // Make the user use NewResult instead, and clear its kill flag. 611*fe6060f1SDimitry Andric User.setReg(NewResult); 612*fe6060f1SDimitry Andric User.setIsKill(false); 613*fe6060f1SDimitry Andric 614*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): "; 615*fe6060f1SDimitry Andric MIBuilder.getInstr()->dump()); 616*fe6060f1SDimitry Andric 617*fe6060f1SDimitry Andric return *MIBuilder.getInstr(); 618*fe6060f1SDimitry Andric } 619*fe6060f1SDimitry Andric 620*fe6060f1SDimitry Andric // Moves a VPNOT before its first user if an instruction that uses Reg is found 621*fe6060f1SDimitry Andric // in-between the VPNOT and its user. 622*fe6060f1SDimitry Andric // Returns true if there is at least one user of the VPNOT in the block. 623*fe6060f1SDimitry Andric static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, 624*fe6060f1SDimitry Andric MachineBasicBlock::iterator Iter, 625*fe6060f1SDimitry Andric Register Reg) { 626*fe6060f1SDimitry Andric assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!"); 627*fe6060f1SDimitry Andric assert(getVPTInstrPredicate(*Iter) == ARMVCC::None && 628*fe6060f1SDimitry Andric "The VPNOT cannot be predicated"); 629*fe6060f1SDimitry Andric 630*fe6060f1SDimitry Andric MachineInstr &VPNOT = *Iter; 631*fe6060f1SDimitry Andric Register VPNOTResult = VPNOT.getOperand(0).getReg(); 632*fe6060f1SDimitry Andric Register VPNOTOperand = VPNOT.getOperand(1).getReg(); 633*fe6060f1SDimitry Andric 634*fe6060f1SDimitry Andric // Whether the VPNOT will need to be moved, and whether we found a user of the 635*fe6060f1SDimitry Andric // VPNOT. 636*fe6060f1SDimitry Andric bool MustMove = false, HasUser = false; 637*fe6060f1SDimitry Andric MachineOperand *VPNOTOperandKiller = nullptr; 638*fe6060f1SDimitry Andric for (; Iter != MBB.end(); ++Iter) { 639*fe6060f1SDimitry Andric if (MachineOperand *MO = 640*fe6060f1SDimitry Andric Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) { 641*fe6060f1SDimitry Andric // If we find the operand that kills the VPNOTOperand's result, save it. 642*fe6060f1SDimitry Andric VPNOTOperandKiller = MO; 643*fe6060f1SDimitry Andric } 644*fe6060f1SDimitry Andric 645*fe6060f1SDimitry Andric if (Iter->findRegisterUseOperandIdx(Reg) != -1) { 646*fe6060f1SDimitry Andric MustMove = true; 647*fe6060f1SDimitry Andric continue; 648*fe6060f1SDimitry Andric } 649*fe6060f1SDimitry Andric 650*fe6060f1SDimitry Andric if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1) 651*fe6060f1SDimitry Andric continue; 652*fe6060f1SDimitry Andric 653*fe6060f1SDimitry Andric HasUser = true; 654*fe6060f1SDimitry Andric if (!MustMove) 655*fe6060f1SDimitry Andric break; 656*fe6060f1SDimitry Andric 657*fe6060f1SDimitry Andric // Move the VPNOT right before Iter 658*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: "; 659*fe6060f1SDimitry Andric Iter->dump()); 660*fe6060f1SDimitry Andric MBB.splice(Iter, &MBB, VPNOT.getIterator()); 661*fe6060f1SDimitry Andric // If we move the instr, and its operand was killed earlier, remove the kill 662*fe6060f1SDimitry Andric // flag. 663*fe6060f1SDimitry Andric if (VPNOTOperandKiller) 664*fe6060f1SDimitry Andric VPNOTOperandKiller->setIsKill(false); 665*fe6060f1SDimitry Andric 666*fe6060f1SDimitry Andric break; 667*fe6060f1SDimitry Andric } 668*fe6060f1SDimitry Andric return HasUser; 669*fe6060f1SDimitry Andric } 670*fe6060f1SDimitry Andric 671*fe6060f1SDimitry Andric // This optimisation attempts to reduce the number of overlapping lifetimes of 672*fe6060f1SDimitry Andric // VCCR values by replacing uses of old VCCR values with VPNOTs. For example, 673*fe6060f1SDimitry Andric // this replaces 674*fe6060f1SDimitry Andric // %A:vccr = (something) 675*fe6060f1SDimitry Andric // %B:vccr = VPNOT %A 676*fe6060f1SDimitry Andric // %Foo = (some op that uses %B) 677*fe6060f1SDimitry Andric // %Bar = (some op that uses %A) 678*fe6060f1SDimitry Andric // With 679*fe6060f1SDimitry Andric // %A:vccr = (something) 680*fe6060f1SDimitry Andric // %B:vccr = VPNOT %A 681*fe6060f1SDimitry Andric // %Foo = (some op that uses %B) 682*fe6060f1SDimitry Andric // %TMP2:vccr = VPNOT %B 683*fe6060f1SDimitry Andric // %Bar = (some op that uses %A) 684*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) { 685*fe6060f1SDimitry Andric MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end(); 686*fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> DeadInstructions; 687*fe6060f1SDimitry Andric bool Modified = false; 688*fe6060f1SDimitry Andric 689*fe6060f1SDimitry Andric while (Iter != End) { 690*fe6060f1SDimitry Andric Register VCCRValue, OppositeVCCRValue; 691*fe6060f1SDimitry Andric // The first loop looks for 2 unpredicated instructions: 692*fe6060f1SDimitry Andric // %A:vccr = (instr) ; A is stored in VCCRValue 693*fe6060f1SDimitry Andric // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue 694*fe6060f1SDimitry Andric for (; Iter != End; ++Iter) { 695*fe6060f1SDimitry Andric // We're only interested in unpredicated instructions that write to VCCR. 696*fe6060f1SDimitry Andric if (!IsWritingToVCCR(*Iter) || 697*fe6060f1SDimitry Andric getVPTInstrPredicate(*Iter) != ARMVCC::None) 698*fe6060f1SDimitry Andric continue; 699*fe6060f1SDimitry Andric Register Dst = Iter->getOperand(0).getReg(); 700*fe6060f1SDimitry Andric 701*fe6060f1SDimitry Andric // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've 702*fe6060f1SDimitry Andric // found what we were looking for. 703*fe6060f1SDimitry Andric if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT && 704*fe6060f1SDimitry Andric Iter->findRegisterUseOperandIdx(VCCRValue) != -1) { 705*fe6060f1SDimitry Andric // Move the VPNOT closer to its first user if needed, and ignore if it 706*fe6060f1SDimitry Andric // has no users. 707*fe6060f1SDimitry Andric if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue)) 708*fe6060f1SDimitry Andric continue; 709*fe6060f1SDimitry Andric 710*fe6060f1SDimitry Andric OppositeVCCRValue = Dst; 711*fe6060f1SDimitry Andric ++Iter; 712*fe6060f1SDimitry Andric break; 713*fe6060f1SDimitry Andric } 714*fe6060f1SDimitry Andric 715*fe6060f1SDimitry Andric // Else, just set VCCRValue. 716*fe6060f1SDimitry Andric VCCRValue = Dst; 717*fe6060f1SDimitry Andric } 718*fe6060f1SDimitry Andric 719*fe6060f1SDimitry Andric // If the first inner loop didn't find anything, stop here. 720*fe6060f1SDimitry Andric if (Iter == End) 721*fe6060f1SDimitry Andric break; 722*fe6060f1SDimitry Andric 723*fe6060f1SDimitry Andric assert(VCCRValue && OppositeVCCRValue && 724*fe6060f1SDimitry Andric "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop " 725*fe6060f1SDimitry Andric "stopped before the end of the block!"); 726*fe6060f1SDimitry Andric assert(VCCRValue != OppositeVCCRValue && 727*fe6060f1SDimitry Andric "VCCRValue should not be equal to OppositeVCCRValue!"); 728*fe6060f1SDimitry Andric 729*fe6060f1SDimitry Andric // LastVPNOTResult always contains the same value as OppositeVCCRValue. 730*fe6060f1SDimitry Andric Register LastVPNOTResult = OppositeVCCRValue; 731*fe6060f1SDimitry Andric 732*fe6060f1SDimitry Andric // This second loop tries to optimize the remaining instructions. 733*fe6060f1SDimitry Andric for (; Iter != End; ++Iter) { 734*fe6060f1SDimitry Andric bool IsInteresting = false; 735*fe6060f1SDimitry Andric 736*fe6060f1SDimitry Andric if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) { 737*fe6060f1SDimitry Andric IsInteresting = true; 738*fe6060f1SDimitry Andric 739*fe6060f1SDimitry Andric // - If the instruction is a VPNOT, it can be removed, and we can just 740*fe6060f1SDimitry Andric // replace its uses with LastVPNOTResult. 741*fe6060f1SDimitry Andric // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue. 742*fe6060f1SDimitry Andric if (Iter->getOpcode() == ARM::MVE_VPNOT) { 743*fe6060f1SDimitry Andric Register Result = Iter->getOperand(0).getReg(); 744*fe6060f1SDimitry Andric 745*fe6060f1SDimitry Andric MRI->replaceRegWith(Result, LastVPNOTResult); 746*fe6060f1SDimitry Andric DeadInstructions.push_back(&*Iter); 747*fe6060f1SDimitry Andric Modified = true; 748*fe6060f1SDimitry Andric 749*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() 750*fe6060f1SDimitry Andric << "Replacing all uses of '" << printReg(Result) 751*fe6060f1SDimitry Andric << "' with '" << printReg(LastVPNOTResult) << "'\n"); 752*fe6060f1SDimitry Andric } else { 753*fe6060f1SDimitry Andric MachineInstr &VPNOT = 754*fe6060f1SDimitry Andric ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult); 755*fe6060f1SDimitry Andric Modified = true; 756*fe6060f1SDimitry Andric 757*fe6060f1SDimitry Andric LastVPNOTResult = VPNOT.getOperand(0).getReg(); 758*fe6060f1SDimitry Andric std::swap(VCCRValue, OppositeVCCRValue); 759*fe6060f1SDimitry Andric 760*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue) 761*fe6060f1SDimitry Andric << "' with '" << printReg(LastVPNOTResult) 762*fe6060f1SDimitry Andric << "' in instr: " << *Iter); 763*fe6060f1SDimitry Andric } 764*fe6060f1SDimitry Andric } else { 765*fe6060f1SDimitry Andric // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult 766*fe6060f1SDimitry Andric // instead as they contain the same value. 767*fe6060f1SDimitry Andric if (MachineOperand *MO = 768*fe6060f1SDimitry Andric Iter->findRegisterUseOperand(OppositeVCCRValue)) { 769*fe6060f1SDimitry Andric IsInteresting = true; 770*fe6060f1SDimitry Andric 771*fe6060f1SDimitry Andric // This is pointless if LastVPNOTResult == OppositeVCCRValue. 772*fe6060f1SDimitry Andric if (LastVPNOTResult != OppositeVCCRValue) { 773*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing usage of '" 774*fe6060f1SDimitry Andric << printReg(OppositeVCCRValue) << "' with '" 775*fe6060f1SDimitry Andric << printReg(LastVPNOTResult) << " for instr: "; 776*fe6060f1SDimitry Andric Iter->dump()); 777*fe6060f1SDimitry Andric MO->setReg(LastVPNOTResult); 778*fe6060f1SDimitry Andric Modified = true; 779*fe6060f1SDimitry Andric } 780*fe6060f1SDimitry Andric 781*fe6060f1SDimitry Andric MO->setIsKill(false); 782*fe6060f1SDimitry Andric } 783*fe6060f1SDimitry Andric 784*fe6060f1SDimitry Andric // If this is an unpredicated VPNOT on 785*fe6060f1SDimitry Andric // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it. 786*fe6060f1SDimitry Andric if (Iter->getOpcode() == ARM::MVE_VPNOT && 787*fe6060f1SDimitry Andric getVPTInstrPredicate(*Iter) == ARMVCC::None) { 788*fe6060f1SDimitry Andric Register VPNOTOperand = Iter->getOperand(1).getReg(); 789*fe6060f1SDimitry Andric if (VPNOTOperand == LastVPNOTResult || 790*fe6060f1SDimitry Andric VPNOTOperand == OppositeVCCRValue) { 791*fe6060f1SDimitry Andric IsInteresting = true; 792*fe6060f1SDimitry Andric 793*fe6060f1SDimitry Andric std::swap(VCCRValue, OppositeVCCRValue); 794*fe6060f1SDimitry Andric LastVPNOTResult = Iter->getOperand(0).getReg(); 795*fe6060f1SDimitry Andric } 796*fe6060f1SDimitry Andric } 797*fe6060f1SDimitry Andric } 798*fe6060f1SDimitry Andric 799*fe6060f1SDimitry Andric // If this instruction was not interesting, and it writes to VCCR, stop. 800*fe6060f1SDimitry Andric if (!IsInteresting && IsWritingToVCCR(*Iter)) 801*fe6060f1SDimitry Andric break; 802*fe6060f1SDimitry Andric } 803*fe6060f1SDimitry Andric } 804*fe6060f1SDimitry Andric 805*fe6060f1SDimitry Andric for (MachineInstr *DeadInstruction : DeadInstructions) 806*fe6060f1SDimitry Andric DeadInstruction->eraseFromParent(); 807*fe6060f1SDimitry Andric 808*fe6060f1SDimitry Andric return Modified; 809*fe6060f1SDimitry Andric } 810*fe6060f1SDimitry Andric 811*fe6060f1SDimitry Andric // This optimisation replaces VCMPs with VPNOTs when they are equivalent. 812*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { 813*fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> DeadInstructions; 814*fe6060f1SDimitry Andric 815*fe6060f1SDimitry Andric // The last VCMP that we have seen and that couldn't be replaced. 816*fe6060f1SDimitry Andric // This is reset when an instruction that writes to VCCR/VPR is found, or when 817*fe6060f1SDimitry Andric // a VCMP is replaced with a VPNOT. 818*fe6060f1SDimitry Andric // We'll only replace VCMPs with VPNOTs when this is not null, and when the 819*fe6060f1SDimitry Andric // current VCMP is the opposite of PrevVCMP. 820*fe6060f1SDimitry Andric MachineInstr *PrevVCMP = nullptr; 821*fe6060f1SDimitry Andric // If we find an instruction that kills the result of PrevVCMP, we save the 822*fe6060f1SDimitry Andric // operand here to remove the kill flag in case we need to use PrevVCMP's 823*fe6060f1SDimitry Andric // result. 824*fe6060f1SDimitry Andric MachineOperand *PrevVCMPResultKiller = nullptr; 825*fe6060f1SDimitry Andric 826*fe6060f1SDimitry Andric for (MachineInstr &Instr : MBB.instrs()) { 827*fe6060f1SDimitry Andric if (PrevVCMP) { 828*fe6060f1SDimitry Andric if (MachineOperand *MO = Instr.findRegisterUseOperand( 829*fe6060f1SDimitry Andric PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) { 830*fe6060f1SDimitry Andric // If we come accross the instr that kills PrevVCMP's result, record it 831*fe6060f1SDimitry Andric // so we can remove the kill flag later if we need to. 832*fe6060f1SDimitry Andric PrevVCMPResultKiller = MO; 833*fe6060f1SDimitry Andric } 834*fe6060f1SDimitry Andric } 835*fe6060f1SDimitry Andric 836*fe6060f1SDimitry Andric // Ignore predicated instructions. 837*fe6060f1SDimitry Andric if (getVPTInstrPredicate(Instr) != ARMVCC::None) 838*fe6060f1SDimitry Andric continue; 839*fe6060f1SDimitry Andric 840*fe6060f1SDimitry Andric // Only look at VCMPs 841*fe6060f1SDimitry Andric if (!IsVCMP(Instr.getOpcode())) { 842*fe6060f1SDimitry Andric // If the instruction writes to VCCR, forget the previous VCMP. 843*fe6060f1SDimitry Andric if (IsWritingToVCCR(Instr)) 844*fe6060f1SDimitry Andric PrevVCMP = nullptr; 845*fe6060f1SDimitry Andric continue; 846*fe6060f1SDimitry Andric } 847*fe6060f1SDimitry Andric 848*fe6060f1SDimitry Andric if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) { 849*fe6060f1SDimitry Andric PrevVCMP = &Instr; 850*fe6060f1SDimitry Andric continue; 851*fe6060f1SDimitry Andric } 852*fe6060f1SDimitry Andric 853*fe6060f1SDimitry Andric // The register containing the result of the VCMP that we're going to 854*fe6060f1SDimitry Andric // replace. 855*fe6060f1SDimitry Andric Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg(); 856*fe6060f1SDimitry Andric 857*fe6060f1SDimitry Andric // Build a VPNOT to replace the VCMP, reusing its operands. 858*fe6060f1SDimitry Andric MachineInstrBuilder MIBuilder = 859*fe6060f1SDimitry Andric BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) 860*fe6060f1SDimitry Andric .add(Instr.getOperand(0)) 861*fe6060f1SDimitry Andric .addReg(PrevVCMPResultReg); 862*fe6060f1SDimitry Andric addUnpredicatedMveVpredNOp(MIBuilder); 863*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): "; 864*fe6060f1SDimitry Andric MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: "; 865*fe6060f1SDimitry Andric Instr.dump()); 866*fe6060f1SDimitry Andric 867*fe6060f1SDimitry Andric // If we found an instruction that uses, and kills PrevVCMP's result, 868*fe6060f1SDimitry Andric // remove the kill flag. 869*fe6060f1SDimitry Andric if (PrevVCMPResultKiller) 870*fe6060f1SDimitry Andric PrevVCMPResultKiller->setIsKill(false); 871*fe6060f1SDimitry Andric 872*fe6060f1SDimitry Andric // Finally, mark the old VCMP for removal and reset 873*fe6060f1SDimitry Andric // PrevVCMP/PrevVCMPResultKiller. 874*fe6060f1SDimitry Andric DeadInstructions.push_back(&Instr); 875*fe6060f1SDimitry Andric PrevVCMP = nullptr; 876*fe6060f1SDimitry Andric PrevVCMPResultKiller = nullptr; 877*fe6060f1SDimitry Andric } 878*fe6060f1SDimitry Andric 879*fe6060f1SDimitry Andric for (MachineInstr *DeadInstruction : DeadInstructions) 880*fe6060f1SDimitry Andric DeadInstruction->eraseFromParent(); 881*fe6060f1SDimitry Andric 882*fe6060f1SDimitry Andric return !DeadInstructions.empty(); 883*fe6060f1SDimitry Andric } 884*fe6060f1SDimitry Andric 885*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB, 886*fe6060f1SDimitry Andric MachineDominatorTree *DT) { 887*fe6060f1SDimitry Andric // Scan through the block, looking for instructions that use constants moves 888*fe6060f1SDimitry Andric // into VPR that are the negative of one another. These are expected to be 889*fe6060f1SDimitry Andric // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant 890*fe6060f1SDimitry Andric // mask is kept it or and VPNOT's of it are added or reused as we scan through 891*fe6060f1SDimitry Andric // the function. 892*fe6060f1SDimitry Andric unsigned LastVPTImm = 0; 893*fe6060f1SDimitry Andric Register LastVPTReg = 0; 894*fe6060f1SDimitry Andric SmallSet<MachineInstr *, 4> DeadInstructions; 895*fe6060f1SDimitry Andric 896*fe6060f1SDimitry Andric for (MachineInstr &Instr : MBB.instrs()) { 897*fe6060f1SDimitry Andric // Look for predicated MVE instructions. 898*fe6060f1SDimitry Andric int PIdx = llvm::findFirstVPTPredOperandIdx(Instr); 899*fe6060f1SDimitry Andric if (PIdx == -1) 900*fe6060f1SDimitry Andric continue; 901*fe6060f1SDimitry Andric Register VPR = Instr.getOperand(PIdx + 1).getReg(); 902*fe6060f1SDimitry Andric if (!VPR.isVirtual()) 903*fe6060f1SDimitry Andric continue; 904*fe6060f1SDimitry Andric 905*fe6060f1SDimitry Andric // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr. 906*fe6060f1SDimitry Andric MachineInstr *Copy = MRI->getVRegDef(VPR); 907*fe6060f1SDimitry Andric if (!Copy || Copy->getOpcode() != TargetOpcode::COPY || 908*fe6060f1SDimitry Andric !Copy->getOperand(1).getReg().isVirtual() || 909*fe6060f1SDimitry Andric MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) { 910*fe6060f1SDimitry Andric LastVPTReg = 0; 911*fe6060f1SDimitry Andric continue; 912*fe6060f1SDimitry Andric } 913*fe6060f1SDimitry Andric Register GPR = Copy->getOperand(1).getReg(); 914*fe6060f1SDimitry Andric 915*fe6060f1SDimitry Andric // Find the Immediate used by the copy. 916*fe6060f1SDimitry Andric auto getImm = [&](Register GPR) -> unsigned { 917*fe6060f1SDimitry Andric MachineInstr *Def = MRI->getVRegDef(GPR); 918*fe6060f1SDimitry Andric if (Def && (Def->getOpcode() == ARM::t2MOVi || 919*fe6060f1SDimitry Andric Def->getOpcode() == ARM::t2MOVi16)) 920*fe6060f1SDimitry Andric return Def->getOperand(1).getImm(); 921*fe6060f1SDimitry Andric return -1U; 922*fe6060f1SDimitry Andric }; 923*fe6060f1SDimitry Andric unsigned Imm = getImm(GPR); 924*fe6060f1SDimitry Andric if (Imm == -1U) { 925*fe6060f1SDimitry Andric LastVPTReg = 0; 926*fe6060f1SDimitry Andric continue; 927*fe6060f1SDimitry Andric } 928*fe6060f1SDimitry Andric 929*fe6060f1SDimitry Andric unsigned NotImm = ~Imm & 0xffff; 930*fe6060f1SDimitry Andric if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) { 931*fe6060f1SDimitry Andric Instr.getOperand(PIdx + 1).setReg(LastVPTReg); 932*fe6060f1SDimitry Andric if (MRI->use_empty(VPR)) { 933*fe6060f1SDimitry Andric DeadInstructions.insert(Copy); 934*fe6060f1SDimitry Andric if (MRI->hasOneUse(GPR)) 935*fe6060f1SDimitry Andric DeadInstructions.insert(MRI->getVRegDef(GPR)); 936*fe6060f1SDimitry Andric } 937*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr); 938*fe6060f1SDimitry Andric } else if (LastVPTReg != 0 && LastVPTImm == NotImm) { 939*fe6060f1SDimitry Andric // We have found the not of a previous constant. Create a VPNot of the 940*fe6060f1SDimitry Andric // earlier predicate reg and use it instead of the copy. 941*fe6060f1SDimitry Andric Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass); 942*fe6060f1SDimitry Andric auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(), 943*fe6060f1SDimitry Andric TII->get(ARM::MVE_VPNOT), NewVPR) 944*fe6060f1SDimitry Andric .addReg(LastVPTReg); 945*fe6060f1SDimitry Andric addUnpredicatedMveVpredNOp(VPNot); 946*fe6060f1SDimitry Andric 947*fe6060f1SDimitry Andric // Use the new register and check if the def is now dead. 948*fe6060f1SDimitry Andric Instr.getOperand(PIdx + 1).setReg(NewVPR); 949*fe6060f1SDimitry Andric if (MRI->use_empty(VPR)) { 950*fe6060f1SDimitry Andric DeadInstructions.insert(Copy); 951*fe6060f1SDimitry Andric if (MRI->hasOneUse(GPR)) 952*fe6060f1SDimitry Andric DeadInstructions.insert(MRI->getVRegDef(GPR)); 953*fe6060f1SDimitry Andric } 954*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at " 955*fe6060f1SDimitry Andric << Instr); 956*fe6060f1SDimitry Andric VPR = NewVPR; 957*fe6060f1SDimitry Andric } 958*fe6060f1SDimitry Andric 959*fe6060f1SDimitry Andric LastVPTImm = Imm; 960*fe6060f1SDimitry Andric LastVPTReg = VPR; 961*fe6060f1SDimitry Andric } 962*fe6060f1SDimitry Andric 963*fe6060f1SDimitry Andric for (MachineInstr *DI : DeadInstructions) 964*fe6060f1SDimitry Andric DI->eraseFromParent(); 965*fe6060f1SDimitry Andric 966*fe6060f1SDimitry Andric return !DeadInstructions.empty(); 967*fe6060f1SDimitry Andric } 968*fe6060f1SDimitry Andric 969*fe6060f1SDimitry Andric // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a 970*fe6060f1SDimitry Andric // somewhat blunt approximation to allow tail predicated with vpsel 971*fe6060f1SDimitry Andric // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly 972*fe6060f1SDimitry Andric // different semantics under tail predication. Until that is modelled we just 973*fe6060f1SDimitry Andric // convert to a VMOVT (via a predicated VORR) instead. 974*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) { 975*fe6060f1SDimitry Andric bool HasVCTP = false; 976*fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> DeadInstructions; 977*fe6060f1SDimitry Andric 978*fe6060f1SDimitry Andric for (MachineInstr &MI : MBB.instrs()) { 979*fe6060f1SDimitry Andric if (isVCTP(&MI)) { 980*fe6060f1SDimitry Andric HasVCTP = true; 981*fe6060f1SDimitry Andric continue; 982*fe6060f1SDimitry Andric } 983*fe6060f1SDimitry Andric 984*fe6060f1SDimitry Andric if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL) 985*fe6060f1SDimitry Andric continue; 986*fe6060f1SDimitry Andric 987*fe6060f1SDimitry Andric MachineInstrBuilder MIBuilder = 988*fe6060f1SDimitry Andric BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR)) 989*fe6060f1SDimitry Andric .add(MI.getOperand(0)) 990*fe6060f1SDimitry Andric .add(MI.getOperand(1)) 991*fe6060f1SDimitry Andric .add(MI.getOperand(1)) 992*fe6060f1SDimitry Andric .addImm(ARMVCC::Then) 993*fe6060f1SDimitry Andric .add(MI.getOperand(4)) 994*fe6060f1SDimitry Andric .add(MI.getOperand(2)); 995*fe6060f1SDimitry Andric // Silence unused variable warning in release builds. 996*fe6060f1SDimitry Andric (void)MIBuilder; 997*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump(); 998*fe6060f1SDimitry Andric dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump()); 999*fe6060f1SDimitry Andric DeadInstructions.push_back(&MI); 1000*fe6060f1SDimitry Andric } 1001*fe6060f1SDimitry Andric 1002*fe6060f1SDimitry Andric for (MachineInstr *DeadInstruction : DeadInstructions) 1003*fe6060f1SDimitry Andric DeadInstruction->eraseFromParent(); 1004*fe6060f1SDimitry Andric 1005*fe6060f1SDimitry Andric return !DeadInstructions.empty(); 1006*fe6060f1SDimitry Andric } 1007*fe6060f1SDimitry Andric 1008*fe6060f1SDimitry Andric // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as 1009*fe6060f1SDimitry Andric // the instruction may be removable as a noop. 1010*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) { 1011*fe6060f1SDimitry Andric bool Changed = false; 1012*fe6060f1SDimitry Andric for (MachineInstr &MI : MBB.instrs()) { 1013*fe6060f1SDimitry Andric if (MI.getOpcode() != ARM::t2DoLoopStart) 1014*fe6060f1SDimitry Andric continue; 1015*fe6060f1SDimitry Andric Register R = MI.getOperand(1).getReg(); 1016*fe6060f1SDimitry Andric MachineFunction *MF = MI.getParent()->getParent(); 1017*fe6060f1SDimitry Andric MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0); 1018*fe6060f1SDimitry Andric Changed = true; 1019*fe6060f1SDimitry Andric } 1020*fe6060f1SDimitry Andric return Changed; 1021*fe6060f1SDimitry Andric } 1022*fe6060f1SDimitry Andric 1023*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { 1024*fe6060f1SDimitry Andric const ARMSubtarget &STI = 1025*fe6060f1SDimitry Andric static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1026*fe6060f1SDimitry Andric 1027*fe6060f1SDimitry Andric if (!STI.isThumb2() || !STI.hasLOB()) 1028*fe6060f1SDimitry Andric return false; 1029*fe6060f1SDimitry Andric 1030*fe6060f1SDimitry Andric TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); 1031*fe6060f1SDimitry Andric MRI = &Fn.getRegInfo(); 1032*fe6060f1SDimitry Andric MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>(); 1033*fe6060f1SDimitry Andric MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); 1034*fe6060f1SDimitry Andric 1035*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n" 1036*fe6060f1SDimitry Andric << "********** Function: " << Fn.getName() << '\n'); 1037*fe6060f1SDimitry Andric 1038*fe6060f1SDimitry Andric bool Modified = false; 1039*fe6060f1SDimitry Andric for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) { 1040*fe6060f1SDimitry Andric Modified |= LowerWhileLoopStart(ML); 1041*fe6060f1SDimitry Andric Modified |= MergeLoopEnd(ML); 1042*fe6060f1SDimitry Andric Modified |= ConvertTailPredLoop(ML, DT); 1043*fe6060f1SDimitry Andric } 1044*fe6060f1SDimitry Andric 1045*fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : Fn) { 1046*fe6060f1SDimitry Andric Modified |= HintDoLoopStartReg(MBB); 1047*fe6060f1SDimitry Andric Modified |= ReplaceConstByVPNOTs(MBB, DT); 1048*fe6060f1SDimitry Andric Modified |= ReplaceVCMPsByVPNOTs(MBB); 1049*fe6060f1SDimitry Andric Modified |= ReduceOldVCCRValueUses(MBB); 1050*fe6060f1SDimitry Andric Modified |= ConvertVPSEL(MBB); 1051*fe6060f1SDimitry Andric } 1052*fe6060f1SDimitry Andric 1053*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "**************************************\n"); 1054*fe6060f1SDimitry Andric return Modified; 1055*fe6060f1SDimitry Andric } 1056*fe6060f1SDimitry Andric 1057*fe6060f1SDimitry Andric /// createMVETPAndVPTOptimisationsPass 1058*fe6060f1SDimitry Andric FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() { 1059*fe6060f1SDimitry Andric return new MVETPAndVPTOptimisations(); 1060*fe6060f1SDimitry Andric } 1061