xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1*fe6060f1SDimitry Andric //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2*fe6060f1SDimitry Andric //
3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*fe6060f1SDimitry Andric //
7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8*fe6060f1SDimitry Andric //
9*fe6060f1SDimitry Andric /// \file This pass does a few optimisations related to Tail predicated loops
10*fe6060f1SDimitry Andric /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11*fe6060f1SDimitry Andric /// the goal is to maximize the sizes of the blocks that will be created by the
12*fe6060f1SDimitry Andric /// MVE VPT Block Insertion pass (which runs after register allocation). For
13*fe6060f1SDimitry Andric /// tail predicated loops we transform the loop into something that will
14*fe6060f1SDimitry Andric /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15*fe6060f1SDimitry Andric ///
16*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
17*fe6060f1SDimitry Andric 
18*fe6060f1SDimitry Andric #include "ARM.h"
19*fe6060f1SDimitry Andric #include "ARMSubtarget.h"
20*fe6060f1SDimitry Andric #include "MCTargetDesc/ARMBaseInfo.h"
21*fe6060f1SDimitry Andric #include "MVETailPredUtils.h"
22*fe6060f1SDimitry Andric #include "Thumb2InstrInfo.h"
23*fe6060f1SDimitry Andric #include "llvm/ADT/SmallVector.h"
24*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
25*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
26*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
27*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
28*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
29*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
30*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
31*fe6060f1SDimitry Andric #include "llvm/Support/Debug.h"
32*fe6060f1SDimitry Andric #include <cassert>
33*fe6060f1SDimitry Andric 
34*fe6060f1SDimitry Andric using namespace llvm;
35*fe6060f1SDimitry Andric 
36*fe6060f1SDimitry Andric #define DEBUG_TYPE "arm-mve-vpt-opts"
37*fe6060f1SDimitry Andric 
38*fe6060f1SDimitry Andric static cl::opt<bool>
39*fe6060f1SDimitry Andric MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40*fe6060f1SDimitry Andric     cl::desc("Enable merging Loop End and Dec instructions."),
41*fe6060f1SDimitry Andric     cl::init(true));
42*fe6060f1SDimitry Andric 
43*fe6060f1SDimitry Andric namespace {
44*fe6060f1SDimitry Andric class MVETPAndVPTOptimisations : public MachineFunctionPass {
45*fe6060f1SDimitry Andric public:
46*fe6060f1SDimitry Andric   static char ID;
47*fe6060f1SDimitry Andric   const Thumb2InstrInfo *TII;
48*fe6060f1SDimitry Andric   MachineRegisterInfo *MRI;
49*fe6060f1SDimitry Andric 
50*fe6060f1SDimitry Andric   MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
51*fe6060f1SDimitry Andric 
52*fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &Fn) override;
53*fe6060f1SDimitry Andric 
54*fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
55*fe6060f1SDimitry Andric     AU.addRequired<MachineLoopInfo>();
56*fe6060f1SDimitry Andric     AU.addPreserved<MachineLoopInfo>();
57*fe6060f1SDimitry Andric     AU.addRequired<MachineDominatorTree>();
58*fe6060f1SDimitry Andric     AU.addPreserved<MachineDominatorTree>();
59*fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
60*fe6060f1SDimitry Andric   }
61*fe6060f1SDimitry Andric 
62*fe6060f1SDimitry Andric   StringRef getPassName() const override {
63*fe6060f1SDimitry Andric     return "ARM MVE TailPred and VPT Optimisation Pass";
64*fe6060f1SDimitry Andric   }
65*fe6060f1SDimitry Andric 
66*fe6060f1SDimitry Andric private:
67*fe6060f1SDimitry Andric   bool LowerWhileLoopStart(MachineLoop *ML);
68*fe6060f1SDimitry Andric   bool MergeLoopEnd(MachineLoop *ML);
69*fe6060f1SDimitry Andric   bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
70*fe6060f1SDimitry Andric   MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
71*fe6060f1SDimitry Andric                                             MachineInstr &Instr,
72*fe6060f1SDimitry Andric                                             MachineOperand &User,
73*fe6060f1SDimitry Andric                                             Register Target);
74*fe6060f1SDimitry Andric   bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
75*fe6060f1SDimitry Andric   bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
76*fe6060f1SDimitry Andric   bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
77*fe6060f1SDimitry Andric   bool ConvertVPSEL(MachineBasicBlock &MBB);
78*fe6060f1SDimitry Andric   bool HintDoLoopStartReg(MachineBasicBlock &MBB);
79*fe6060f1SDimitry Andric   MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
80*fe6060f1SDimitry Andric                                             MachineInstr *LoopStart);
81*fe6060f1SDimitry Andric };
82*fe6060f1SDimitry Andric 
83*fe6060f1SDimitry Andric char MVETPAndVPTOptimisations::ID = 0;
84*fe6060f1SDimitry Andric 
85*fe6060f1SDimitry Andric } // end anonymous namespace
86*fe6060f1SDimitry Andric 
87*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
88*fe6060f1SDimitry Andric                       "ARM MVE TailPred and VPT Optimisations pass", false,
89*fe6060f1SDimitry Andric                       false)
90*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
91*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
92*fe6060f1SDimitry Andric INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
93*fe6060f1SDimitry Andric                     "ARM MVE TailPred and VPT Optimisations pass", false, false)
94*fe6060f1SDimitry Andric 
95*fe6060f1SDimitry Andric static MachineInstr *LookThroughCOPY(MachineInstr *MI,
96*fe6060f1SDimitry Andric                                      MachineRegisterInfo *MRI) {
97*fe6060f1SDimitry Andric   while (MI && MI->getOpcode() == TargetOpcode::COPY &&
98*fe6060f1SDimitry Andric          MI->getOperand(1).getReg().isVirtual())
99*fe6060f1SDimitry Andric     MI = MRI->getVRegDef(MI->getOperand(1).getReg());
100*fe6060f1SDimitry Andric   return MI;
101*fe6060f1SDimitry Andric }
102*fe6060f1SDimitry Andric 
103*fe6060f1SDimitry Andric // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
104*fe6060f1SDimitry Andric // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
105*fe6060f1SDimitry Andric // at the moment, returning a t2DoLoopStart in LoopStart.
106*fe6060f1SDimitry Andric static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
107*fe6060f1SDimitry Andric                                MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
108*fe6060f1SDimitry Andric                                MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
109*fe6060f1SDimitry Andric   MachineBasicBlock *Header = ML->getHeader();
110*fe6060f1SDimitry Andric   MachineBasicBlock *Latch = ML->getLoopLatch();
111*fe6060f1SDimitry Andric   if (!Header || !Latch) {
112*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  no Loop Latch or Header\n");
113*fe6060f1SDimitry Andric     return false;
114*fe6060f1SDimitry Andric   }
115*fe6060f1SDimitry Andric 
116*fe6060f1SDimitry Andric   // Find the loop end from the terminators.
117*fe6060f1SDimitry Andric   LoopEnd = nullptr;
118*fe6060f1SDimitry Andric   for (auto &T : Latch->terminators()) {
119*fe6060f1SDimitry Andric     if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
120*fe6060f1SDimitry Andric       LoopEnd = &T;
121*fe6060f1SDimitry Andric       break;
122*fe6060f1SDimitry Andric     }
123*fe6060f1SDimitry Andric     if (T.getOpcode() == ARM::t2LoopEndDec &&
124*fe6060f1SDimitry Andric         T.getOperand(2).getMBB() == Header) {
125*fe6060f1SDimitry Andric       LoopEnd = &T;
126*fe6060f1SDimitry Andric       break;
127*fe6060f1SDimitry Andric     }
128*fe6060f1SDimitry Andric   }
129*fe6060f1SDimitry Andric   if (!LoopEnd) {
130*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  no LoopEnd\n");
131*fe6060f1SDimitry Andric     return false;
132*fe6060f1SDimitry Andric   }
133*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  found loop end: " << *LoopEnd);
134*fe6060f1SDimitry Andric 
135*fe6060f1SDimitry Andric   // Find the dec from the use of the end. There may be copies between
136*fe6060f1SDimitry Andric   // instructions. We expect the loop to loop like:
137*fe6060f1SDimitry Andric   //   $vs = t2DoLoopStart ...
138*fe6060f1SDimitry Andric   // loop:
139*fe6060f1SDimitry Andric   //   $vp = phi [ $vs ], [ $vd ]
140*fe6060f1SDimitry Andric   //   ...
141*fe6060f1SDimitry Andric   //   $vd = t2LoopDec $vp
142*fe6060f1SDimitry Andric   //   ...
143*fe6060f1SDimitry Andric   //   t2LoopEnd $vd, loop
144*fe6060f1SDimitry Andric   if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
145*fe6060f1SDimitry Andric     LoopDec = LoopEnd;
146*fe6060f1SDimitry Andric   else {
147*fe6060f1SDimitry Andric     LoopDec =
148*fe6060f1SDimitry Andric         LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
149*fe6060f1SDimitry Andric     if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
150*fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  didn't find LoopDec where we expected!\n");
151*fe6060f1SDimitry Andric       return false;
152*fe6060f1SDimitry Andric     }
153*fe6060f1SDimitry Andric   }
154*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  found loop dec: " << *LoopDec);
155*fe6060f1SDimitry Andric 
156*fe6060f1SDimitry Andric   LoopPhi =
157*fe6060f1SDimitry Andric       LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
158*fe6060f1SDimitry Andric   if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
159*fe6060f1SDimitry Andric       LoopPhi->getNumOperands() != 5 ||
160*fe6060f1SDimitry Andric       (LoopPhi->getOperand(2).getMBB() != Latch &&
161*fe6060f1SDimitry Andric        LoopPhi->getOperand(4).getMBB() != Latch)) {
162*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  didn't find PHI where we expected!\n");
163*fe6060f1SDimitry Andric     return false;
164*fe6060f1SDimitry Andric   }
165*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  found loop phi: " << *LoopPhi);
166*fe6060f1SDimitry Andric 
167*fe6060f1SDimitry Andric   Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
168*fe6060f1SDimitry Andric                           ? LoopPhi->getOperand(3).getReg()
169*fe6060f1SDimitry Andric                           : LoopPhi->getOperand(1).getReg();
170*fe6060f1SDimitry Andric   LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
171*fe6060f1SDimitry Andric   if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
172*fe6060f1SDimitry Andric                      LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
173*fe6060f1SDimitry Andric                      LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
174*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  didn't find Start where we expected!\n");
175*fe6060f1SDimitry Andric     return false;
176*fe6060f1SDimitry Andric   }
177*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  found loop start: " << *LoopStart);
178*fe6060f1SDimitry Andric 
179*fe6060f1SDimitry Andric   return true;
180*fe6060f1SDimitry Andric }
181*fe6060f1SDimitry Andric 
182*fe6060f1SDimitry Andric static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
183*fe6060f1SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
184*fe6060f1SDimitry Andric   assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
185*fe6060f1SDimitry Andric          "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
186*fe6060f1SDimitry Andric 
187*fe6060f1SDimitry Andric   // Subs
188*fe6060f1SDimitry Andric   MachineInstrBuilder MIB =
189*fe6060f1SDimitry Andric       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
190*fe6060f1SDimitry Andric   MIB.add(MI->getOperand(0));
191*fe6060f1SDimitry Andric   MIB.add(MI->getOperand(1));
192*fe6060f1SDimitry Andric   MIB.addImm(0);
193*fe6060f1SDimitry Andric   MIB.addImm(ARMCC::AL);
194*fe6060f1SDimitry Andric   MIB.addReg(ARM::NoRegister);
195*fe6060f1SDimitry Andric   MIB.addReg(ARM::CPSR, RegState::Define);
196*fe6060f1SDimitry Andric 
197*fe6060f1SDimitry Andric   // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
198*fe6060f1SDimitry Andric   for (MachineInstr &I : MBB->terminators()) {
199*fe6060f1SDimitry Andric     if (I.getOpcode() == ARM::t2WhileLoopStart) {
200*fe6060f1SDimitry Andric       MachineInstrBuilder MIB =
201*fe6060f1SDimitry Andric           BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
202*fe6060f1SDimitry Andric       MIB.add(MI->getOperand(1)); // branch target
203*fe6060f1SDimitry Andric       MIB.addImm(ARMCC::EQ);
204*fe6060f1SDimitry Andric       MIB.addReg(ARM::CPSR);
205*fe6060f1SDimitry Andric       I.eraseFromParent();
206*fe6060f1SDimitry Andric       break;
207*fe6060f1SDimitry Andric     }
208*fe6060f1SDimitry Andric   }
209*fe6060f1SDimitry Andric 
210*fe6060f1SDimitry Andric   MI->eraseFromParent();
211*fe6060f1SDimitry Andric }
212*fe6060f1SDimitry Andric 
213*fe6060f1SDimitry Andric // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
214*fe6060f1SDimitry Andric // start of a while loop:
215*fe6060f1SDimitry Andric //   %a:gprlr = t2WhileLoopSetup %Cnt
216*fe6060f1SDimitry Andric //   t2WhileLoopStart %a, %BB
217*fe6060f1SDimitry Andric // We want to convert those to a single instruction which, like t2LoopEndDec and
218*fe6060f1SDimitry Andric // t2DoLoopStartTP is both a terminator and produces a value:
219*fe6060f1SDimitry Andric //   %a:grplr: t2WhileLoopStartLR %Cnt, %BB
220*fe6060f1SDimitry Andric //
221*fe6060f1SDimitry Andric // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
222*fe6060f1SDimitry Andric // t2WhileLoopStart are not valid past regalloc.
223*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
224*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
225*fe6060f1SDimitry Andric                     << ML->getHeader()->getName() << "\n");
226*fe6060f1SDimitry Andric 
227*fe6060f1SDimitry Andric   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
228*fe6060f1SDimitry Andric   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
229*fe6060f1SDimitry Andric     return false;
230*fe6060f1SDimitry Andric 
231*fe6060f1SDimitry Andric   if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
232*fe6060f1SDimitry Andric     return false;
233*fe6060f1SDimitry Andric 
234*fe6060f1SDimitry Andric   Register LR = LoopStart->getOperand(0).getReg();
235*fe6060f1SDimitry Andric   auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
236*fe6060f1SDimitry Andric     return MI.getOpcode() == ARM::t2WhileLoopStart;
237*fe6060f1SDimitry Andric   });
238*fe6060f1SDimitry Andric   if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
239*fe6060f1SDimitry Andric     RevertWhileLoopSetup(LoopStart, TII);
240*fe6060f1SDimitry Andric     RevertLoopDec(LoopStart, TII);
241*fe6060f1SDimitry Andric     RevertLoopEnd(LoopStart, TII);
242*fe6060f1SDimitry Andric     return true;
243*fe6060f1SDimitry Andric   }
244*fe6060f1SDimitry Andric 
245*fe6060f1SDimitry Andric   MachineInstrBuilder MI =
246*fe6060f1SDimitry Andric       BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
247*fe6060f1SDimitry Andric               TII->get(ARM::t2WhileLoopStartLR), LR)
248*fe6060f1SDimitry Andric           .add(LoopStart->getOperand(1))
249*fe6060f1SDimitry Andric           .add(WLSIt->getOperand(1));
250*fe6060f1SDimitry Andric   (void)MI;
251*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
252*fe6060f1SDimitry Andric 
253*fe6060f1SDimitry Andric   WLSIt->eraseFromParent();
254*fe6060f1SDimitry Andric   LoopStart->eraseFromParent();
255*fe6060f1SDimitry Andric   return true;
256*fe6060f1SDimitry Andric }
257*fe6060f1SDimitry Andric 
258*fe6060f1SDimitry Andric // Return true if this instruction is invalid in a low overhead loop, usually
259*fe6060f1SDimitry Andric // because it clobbers LR.
260*fe6060f1SDimitry Andric static bool IsInvalidTPInstruction(MachineInstr &MI) {
261*fe6060f1SDimitry Andric   return MI.isCall() || isLoopStart(MI);
262*fe6060f1SDimitry Andric }
263*fe6060f1SDimitry Andric 
264*fe6060f1SDimitry Andric // Starting from PreHeader, search for invalid instructions back until the
265*fe6060f1SDimitry Andric // LoopStart block is reached. If invalid instructions are found, the loop start
266*fe6060f1SDimitry Andric // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
267*fe6060f1SDimitry Andric // return the new DLS LoopStart if updated.
268*fe6060f1SDimitry Andric MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
269*fe6060f1SDimitry Andric     MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
270*fe6060f1SDimitry Andric   SmallVector<MachineBasicBlock *> Worklist;
271*fe6060f1SDimitry Andric   SmallPtrSet<MachineBasicBlock *, 4> Visited;
272*fe6060f1SDimitry Andric   Worklist.push_back(PreHeader);
273*fe6060f1SDimitry Andric   Visited.insert(LoopStart->getParent());
274*fe6060f1SDimitry Andric 
275*fe6060f1SDimitry Andric   while (!Worklist.empty()) {
276*fe6060f1SDimitry Andric     MachineBasicBlock *MBB = Worklist.pop_back_val();
277*fe6060f1SDimitry Andric     if (Visited.count(MBB))
278*fe6060f1SDimitry Andric       continue;
279*fe6060f1SDimitry Andric 
280*fe6060f1SDimitry Andric     for (MachineInstr &MI : *MBB) {
281*fe6060f1SDimitry Andric       if (!IsInvalidTPInstruction(MI))
282*fe6060f1SDimitry Andric         continue;
283*fe6060f1SDimitry Andric 
284*fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
285*fe6060f1SDimitry Andric 
286*fe6060f1SDimitry Andric       // Create a t2DoLoopStart at the end of the preheader.
287*fe6060f1SDimitry Andric       MachineInstrBuilder MIB =
288*fe6060f1SDimitry Andric           BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
289*fe6060f1SDimitry Andric                   LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
290*fe6060f1SDimitry Andric       MIB.add(LoopStart->getOperand(0));
291*fe6060f1SDimitry Andric       MIB.add(LoopStart->getOperand(1));
292*fe6060f1SDimitry Andric 
293*fe6060f1SDimitry Andric       // Make sure to remove the kill flags, to prevent them from being invalid.
294*fe6060f1SDimitry Andric       LoopStart->getOperand(1).setIsKill(false);
295*fe6060f1SDimitry Andric 
296*fe6060f1SDimitry Andric       // Revert the t2WhileLoopStartLR to a CMP and Br.
297*fe6060f1SDimitry Andric       RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
298*fe6060f1SDimitry Andric       return MIB;
299*fe6060f1SDimitry Andric     }
300*fe6060f1SDimitry Andric 
301*fe6060f1SDimitry Andric     Visited.insert(MBB);
302*fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors())
303*fe6060f1SDimitry Andric       Worklist.push_back(Pred);
304*fe6060f1SDimitry Andric   }
305*fe6060f1SDimitry Andric   return LoopStart;
306*fe6060f1SDimitry Andric }
307*fe6060f1SDimitry Andric 
308*fe6060f1SDimitry Andric // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
309*fe6060f1SDimitry Andric // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
310*fe6060f1SDimitry Andric // will be valid to be used for the low overhead loop, which means nothing else
311*fe6060f1SDimitry Andric // is using LR (especially calls) and there are no superfluous copies in the
312*fe6060f1SDimitry Andric // loop. The t2LoopEndDec is a branching terminator that produces a value (the
313*fe6060f1SDimitry Andric // decrement) around the loop edge, which means we need to be careful that they
314*fe6060f1SDimitry Andric // will be valid to allocate without any spilling.
315*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
316*fe6060f1SDimitry Andric   if (!MergeEndDec)
317*fe6060f1SDimitry Andric     return false;
318*fe6060f1SDimitry Andric 
319*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
320*fe6060f1SDimitry Andric                     << "\n");
321*fe6060f1SDimitry Andric 
322*fe6060f1SDimitry Andric   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
323*fe6060f1SDimitry Andric   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
324*fe6060f1SDimitry Andric     return false;
325*fe6060f1SDimitry Andric 
326*fe6060f1SDimitry Andric   // Check if there is an illegal instruction (a call) in the low overhead loop
327*fe6060f1SDimitry Andric   // and if so revert it now before we get any further. While loops also need to
328*fe6060f1SDimitry Andric   // check the preheaders, but can be reverted to a DLS loop if needed.
329*fe6060f1SDimitry Andric   auto *PreHeader = ML->getLoopPreheader();
330*fe6060f1SDimitry Andric   if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
331*fe6060f1SDimitry Andric     LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
332*fe6060f1SDimitry Andric 
333*fe6060f1SDimitry Andric   for (MachineBasicBlock *MBB : ML->blocks()) {
334*fe6060f1SDimitry Andric     for (MachineInstr &MI : *MBB) {
335*fe6060f1SDimitry Andric       if (IsInvalidTPInstruction(MI)) {
336*fe6060f1SDimitry Andric         LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
337*fe6060f1SDimitry Andric         if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
338*fe6060f1SDimitry Andric           RevertDoLoopStart(LoopStart, TII);
339*fe6060f1SDimitry Andric         else
340*fe6060f1SDimitry Andric           RevertWhileLoopStartLR(LoopStart, TII);
341*fe6060f1SDimitry Andric         RevertLoopDec(LoopDec, TII);
342*fe6060f1SDimitry Andric         RevertLoopEnd(LoopEnd, TII);
343*fe6060f1SDimitry Andric         return true;
344*fe6060f1SDimitry Andric       }
345*fe6060f1SDimitry Andric     }
346*fe6060f1SDimitry Andric   }
347*fe6060f1SDimitry Andric 
348*fe6060f1SDimitry Andric   // Remove any copies from the loop, to ensure the phi that remains is both
349*fe6060f1SDimitry Andric   // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
350*fe6060f1SDimitry Andric   // that cannot spill, we need to be careful what remains in the loop.
351*fe6060f1SDimitry Andric   Register PhiReg = LoopPhi->getOperand(0).getReg();
352*fe6060f1SDimitry Andric   Register DecReg = LoopDec->getOperand(0).getReg();
353*fe6060f1SDimitry Andric   Register StartReg = LoopStart->getOperand(0).getReg();
354*fe6060f1SDimitry Andric   // Ensure the uses are expected, and collect any copies we want to remove.
355*fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> Copies;
356*fe6060f1SDimitry Andric   auto CheckUsers = [&Copies](Register BaseReg,
357*fe6060f1SDimitry Andric                               ArrayRef<MachineInstr *> ExpectedUsers,
358*fe6060f1SDimitry Andric                               MachineRegisterInfo *MRI) {
359*fe6060f1SDimitry Andric     SmallVector<Register, 4> Worklist;
360*fe6060f1SDimitry Andric     Worklist.push_back(BaseReg);
361*fe6060f1SDimitry Andric     while (!Worklist.empty()) {
362*fe6060f1SDimitry Andric       Register Reg = Worklist.pop_back_val();
363*fe6060f1SDimitry Andric       for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
364*fe6060f1SDimitry Andric         if (count(ExpectedUsers, &MI))
365*fe6060f1SDimitry Andric           continue;
366*fe6060f1SDimitry Andric         if (MI.getOpcode() != TargetOpcode::COPY ||
367*fe6060f1SDimitry Andric             !MI.getOperand(0).getReg().isVirtual()) {
368*fe6060f1SDimitry Andric           LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
369*fe6060f1SDimitry Andric           return false;
370*fe6060f1SDimitry Andric         }
371*fe6060f1SDimitry Andric         Worklist.push_back(MI.getOperand(0).getReg());
372*fe6060f1SDimitry Andric         Copies.push_back(&MI);
373*fe6060f1SDimitry Andric       }
374*fe6060f1SDimitry Andric     }
375*fe6060f1SDimitry Andric     return true;
376*fe6060f1SDimitry Andric   };
377*fe6060f1SDimitry Andric   if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
378*fe6060f1SDimitry Andric       !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
379*fe6060f1SDimitry Andric       !CheckUsers(StartReg, {LoopPhi}, MRI)) {
380*fe6060f1SDimitry Andric     // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
381*fe6060f1SDimitry Andric     if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
382*fe6060f1SDimitry Andric       RevertWhileLoopStartLR(LoopStart, TII);
383*fe6060f1SDimitry Andric       RevertLoopDec(LoopDec, TII);
384*fe6060f1SDimitry Andric       RevertLoopEnd(LoopEnd, TII);
385*fe6060f1SDimitry Andric       return true;
386*fe6060f1SDimitry Andric     }
387*fe6060f1SDimitry Andric     return false;
388*fe6060f1SDimitry Andric   }
389*fe6060f1SDimitry Andric 
390*fe6060f1SDimitry Andric   MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
391*fe6060f1SDimitry Andric   MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
392*fe6060f1SDimitry Andric   MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
393*fe6060f1SDimitry Andric 
394*fe6060f1SDimitry Andric   if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
395*fe6060f1SDimitry Andric     LoopPhi->getOperand(3).setReg(StartReg);
396*fe6060f1SDimitry Andric     LoopPhi->getOperand(1).setReg(DecReg);
397*fe6060f1SDimitry Andric   } else {
398*fe6060f1SDimitry Andric     LoopPhi->getOperand(1).setReg(StartReg);
399*fe6060f1SDimitry Andric     LoopPhi->getOperand(3).setReg(DecReg);
400*fe6060f1SDimitry Andric   }
401*fe6060f1SDimitry Andric 
402*fe6060f1SDimitry Andric   // Replace the loop dec and loop end as a single instruction.
403*fe6060f1SDimitry Andric   MachineInstrBuilder MI =
404*fe6060f1SDimitry Andric       BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
405*fe6060f1SDimitry Andric               TII->get(ARM::t2LoopEndDec), DecReg)
406*fe6060f1SDimitry Andric           .addReg(PhiReg)
407*fe6060f1SDimitry Andric           .add(LoopEnd->getOperand(1));
408*fe6060f1SDimitry Andric   (void)MI;
409*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
410*fe6060f1SDimitry Andric 
411*fe6060f1SDimitry Andric   LoopDec->eraseFromParent();
412*fe6060f1SDimitry Andric   LoopEnd->eraseFromParent();
413*fe6060f1SDimitry Andric   for (auto *MI : Copies)
414*fe6060f1SDimitry Andric     MI->eraseFromParent();
415*fe6060f1SDimitry Andric   return true;
416*fe6060f1SDimitry Andric }
417*fe6060f1SDimitry Andric 
418*fe6060f1SDimitry Andric // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
419*fe6060f1SDimitry Andric // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
420*fe6060f1SDimitry Andric // instruction, making the backend ARMLowOverheadLoops passes job of finding the
421*fe6060f1SDimitry Andric // VCTP operand much simpler.
422*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
423*fe6060f1SDimitry Andric                                               MachineDominatorTree *DT) {
424*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
425*fe6060f1SDimitry Andric                     << ML->getHeader()->getName() << "\n");
426*fe6060f1SDimitry Andric 
427*fe6060f1SDimitry Andric   // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
428*fe6060f1SDimitry Andric   // in the loop.
429*fe6060f1SDimitry Andric   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
430*fe6060f1SDimitry Andric   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
431*fe6060f1SDimitry Andric     return false;
432*fe6060f1SDimitry Andric   if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
433*fe6060f1SDimitry Andric                              LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
434*fe6060f1SDimitry Andric     return false;
435*fe6060f1SDimitry Andric 
436*fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> VCTPs;
437*fe6060f1SDimitry Andric   for (MachineBasicBlock *BB : ML->blocks())
438*fe6060f1SDimitry Andric     for (MachineInstr &MI : *BB)
439*fe6060f1SDimitry Andric       if (isVCTP(&MI))
440*fe6060f1SDimitry Andric         VCTPs.push_back(&MI);
441*fe6060f1SDimitry Andric 
442*fe6060f1SDimitry Andric   if (VCTPs.empty()) {
443*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  no VCTPs\n");
444*fe6060f1SDimitry Andric     return false;
445*fe6060f1SDimitry Andric   }
446*fe6060f1SDimitry Andric 
447*fe6060f1SDimitry Andric   // Check all VCTPs are the same.
448*fe6060f1SDimitry Andric   MachineInstr *FirstVCTP = *VCTPs.begin();
449*fe6060f1SDimitry Andric   for (MachineInstr *VCTP : VCTPs) {
450*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  with VCTP " << *VCTP);
451*fe6060f1SDimitry Andric     if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
452*fe6060f1SDimitry Andric         VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
453*fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  VCTP's are not identical\n");
454*fe6060f1SDimitry Andric       return false;
455*fe6060f1SDimitry Andric     }
456*fe6060f1SDimitry Andric   }
457*fe6060f1SDimitry Andric 
458*fe6060f1SDimitry Andric   // Check for the register being used can be setup before the loop. We expect
459*fe6060f1SDimitry Andric   // this to be:
460*fe6060f1SDimitry Andric   //   $vx = ...
461*fe6060f1SDimitry Andric   // loop:
462*fe6060f1SDimitry Andric   //   $vp = PHI [ $vx ], [ $vd ]
463*fe6060f1SDimitry Andric   //   ..
464*fe6060f1SDimitry Andric   //   $vpr = VCTP $vp
465*fe6060f1SDimitry Andric   //   ..
466*fe6060f1SDimitry Andric   //   $vd = t2SUBri $vp, #n
467*fe6060f1SDimitry Andric   //   ..
468*fe6060f1SDimitry Andric   Register CountReg = FirstVCTP->getOperand(1).getReg();
469*fe6060f1SDimitry Andric   if (!CountReg.isVirtual()) {
470*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  cannot determine VCTP PHI\n");
471*fe6060f1SDimitry Andric     return false;
472*fe6060f1SDimitry Andric   }
473*fe6060f1SDimitry Andric   MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
474*fe6060f1SDimitry Andric   if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
475*fe6060f1SDimitry Andric       Phi->getNumOperands() != 5 ||
476*fe6060f1SDimitry Andric       (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
477*fe6060f1SDimitry Andric        Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
478*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  cannot determine VCTP Count\n");
479*fe6060f1SDimitry Andric     return false;
480*fe6060f1SDimitry Andric   }
481*fe6060f1SDimitry Andric   CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
482*fe6060f1SDimitry Andric                  ? Phi->getOperand(3).getReg()
483*fe6060f1SDimitry Andric                  : Phi->getOperand(1).getReg();
484*fe6060f1SDimitry Andric 
485*fe6060f1SDimitry Andric   // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
486*fe6060f1SDimitry Andric   // the preheader and add the new CountReg to it. We attempt to place it late
487*fe6060f1SDimitry Andric   // in the preheader, but may need to move that earlier based on uses.
488*fe6060f1SDimitry Andric   MachineBasicBlock *MBB = LoopStart->getParent();
489*fe6060f1SDimitry Andric   MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
490*fe6060f1SDimitry Andric   for (MachineInstr &Use :
491*fe6060f1SDimitry Andric        MRI->use_instructions(LoopStart->getOperand(0).getReg()))
492*fe6060f1SDimitry Andric     if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
493*fe6060f1SDimitry Andric         !DT->dominates(ML->getHeader(), Use.getParent())) {
494*fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  InsertPt could not be a terminator!\n");
495*fe6060f1SDimitry Andric       return false;
496*fe6060f1SDimitry Andric     }
497*fe6060f1SDimitry Andric 
498*fe6060f1SDimitry Andric   unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
499*fe6060f1SDimitry Andric                         ? ARM::t2DoLoopStartTP
500*fe6060f1SDimitry Andric                         : ARM::t2WhileLoopStartTP;
501*fe6060f1SDimitry Andric   MachineInstrBuilder MI =
502*fe6060f1SDimitry Andric       BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
503*fe6060f1SDimitry Andric           .add(LoopStart->getOperand(0))
504*fe6060f1SDimitry Andric           .add(LoopStart->getOperand(1))
505*fe6060f1SDimitry Andric           .addReg(CountReg);
506*fe6060f1SDimitry Andric   if (NewOpc == ARM::t2WhileLoopStartTP)
507*fe6060f1SDimitry Andric     MI.add(LoopStart->getOperand(2));
508*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << "  with "
509*fe6060f1SDimitry Andric                     << *MI.getInstr());
510*fe6060f1SDimitry Andric   MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
511*fe6060f1SDimitry Andric   LoopStart->eraseFromParent();
512*fe6060f1SDimitry Andric 
513*fe6060f1SDimitry Andric   return true;
514*fe6060f1SDimitry Andric }
515*fe6060f1SDimitry Andric 
516*fe6060f1SDimitry Andric // Returns true if Opcode is any VCMP Opcode.
517*fe6060f1SDimitry Andric static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
518*fe6060f1SDimitry Andric 
519*fe6060f1SDimitry Andric // Returns true if a VCMP with this Opcode can have its operands swapped.
520*fe6060f1SDimitry Andric // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
521*fe6060f1SDimitry Andric // and VCMPr instructions (since the r is always on the right).
522*fe6060f1SDimitry Andric static bool CanHaveSwappedOperands(unsigned Opcode) {
523*fe6060f1SDimitry Andric   switch (Opcode) {
524*fe6060f1SDimitry Andric   default:
525*fe6060f1SDimitry Andric     return true;
526*fe6060f1SDimitry Andric   case ARM::MVE_VCMPf32:
527*fe6060f1SDimitry Andric   case ARM::MVE_VCMPf16:
528*fe6060f1SDimitry Andric   case ARM::MVE_VCMPf32r:
529*fe6060f1SDimitry Andric   case ARM::MVE_VCMPf16r:
530*fe6060f1SDimitry Andric   case ARM::MVE_VCMPi8r:
531*fe6060f1SDimitry Andric   case ARM::MVE_VCMPi16r:
532*fe6060f1SDimitry Andric   case ARM::MVE_VCMPi32r:
533*fe6060f1SDimitry Andric   case ARM::MVE_VCMPu8r:
534*fe6060f1SDimitry Andric   case ARM::MVE_VCMPu16r:
535*fe6060f1SDimitry Andric   case ARM::MVE_VCMPu32r:
536*fe6060f1SDimitry Andric   case ARM::MVE_VCMPs8r:
537*fe6060f1SDimitry Andric   case ARM::MVE_VCMPs16r:
538*fe6060f1SDimitry Andric   case ARM::MVE_VCMPs32r:
539*fe6060f1SDimitry Andric     return false;
540*fe6060f1SDimitry Andric   }
541*fe6060f1SDimitry Andric }
542*fe6060f1SDimitry Andric 
543*fe6060f1SDimitry Andric // Returns the CondCode of a VCMP Instruction.
544*fe6060f1SDimitry Andric static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
545*fe6060f1SDimitry Andric   assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
546*fe6060f1SDimitry Andric   return ARMCC::CondCodes(Instr.getOperand(3).getImm());
547*fe6060f1SDimitry Andric }
548*fe6060f1SDimitry Andric 
549*fe6060f1SDimitry Andric // Returns true if Cond is equivalent to a VPNOT instruction on the result of
550*fe6060f1SDimitry Andric // Prev. Cond and Prev must be VCMPs.
551*fe6060f1SDimitry Andric static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
552*fe6060f1SDimitry Andric   assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
553*fe6060f1SDimitry Andric 
554*fe6060f1SDimitry Andric   // Opcodes must match.
555*fe6060f1SDimitry Andric   if (Cond.getOpcode() != Prev.getOpcode())
556*fe6060f1SDimitry Andric     return false;
557*fe6060f1SDimitry Andric 
558*fe6060f1SDimitry Andric   MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
559*fe6060f1SDimitry Andric   MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
560*fe6060f1SDimitry Andric 
561*fe6060f1SDimitry Andric   // If the VCMP has the opposite condition with the same operands, we can
562*fe6060f1SDimitry Andric   // replace it with a VPNOT
563*fe6060f1SDimitry Andric   ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
564*fe6060f1SDimitry Andric   ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
565*fe6060f1SDimitry Andric   if (ExpectedCode == GetCondCode(Prev))
566*fe6060f1SDimitry Andric     if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
567*fe6060f1SDimitry Andric       return true;
568*fe6060f1SDimitry Andric   // Check again with operands swapped if possible
569*fe6060f1SDimitry Andric   if (!CanHaveSwappedOperands(Cond.getOpcode()))
570*fe6060f1SDimitry Andric     return false;
571*fe6060f1SDimitry Andric   ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
572*fe6060f1SDimitry Andric   return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
573*fe6060f1SDimitry Andric          CondOP2.isIdenticalTo(PrevOP1);
574*fe6060f1SDimitry Andric }
575*fe6060f1SDimitry Andric 
576*fe6060f1SDimitry Andric // Returns true if Instr writes to VCCR.
577*fe6060f1SDimitry Andric static bool IsWritingToVCCR(MachineInstr &Instr) {
578*fe6060f1SDimitry Andric   if (Instr.getNumOperands() == 0)
579*fe6060f1SDimitry Andric     return false;
580*fe6060f1SDimitry Andric   MachineOperand &Dst = Instr.getOperand(0);
581*fe6060f1SDimitry Andric   if (!Dst.isReg())
582*fe6060f1SDimitry Andric     return false;
583*fe6060f1SDimitry Andric   Register DstReg = Dst.getReg();
584*fe6060f1SDimitry Andric   if (!DstReg.isVirtual())
585*fe6060f1SDimitry Andric     return false;
586*fe6060f1SDimitry Andric   MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
587*fe6060f1SDimitry Andric   const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
588*fe6060f1SDimitry Andric   return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
589*fe6060f1SDimitry Andric }
590*fe6060f1SDimitry Andric 
591*fe6060f1SDimitry Andric // Transforms
592*fe6060f1SDimitry Andric //    <Instr that uses %A ('User' Operand)>
593*fe6060f1SDimitry Andric // Into
594*fe6060f1SDimitry Andric //    %K = VPNOT %Target
595*fe6060f1SDimitry Andric //    <Instr that uses %K ('User' Operand)>
596*fe6060f1SDimitry Andric // And returns the newly inserted VPNOT.
597*fe6060f1SDimitry Andric // This optimization is done in the hopes of preventing spills/reloads of VPR by
598*fe6060f1SDimitry Andric // reducing the number of VCCR values with overlapping lifetimes.
599*fe6060f1SDimitry Andric MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
600*fe6060f1SDimitry Andric     MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
601*fe6060f1SDimitry Andric     Register Target) {
602*fe6060f1SDimitry Andric   Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
603*fe6060f1SDimitry Andric 
604*fe6060f1SDimitry Andric   MachineInstrBuilder MIBuilder =
605*fe6060f1SDimitry Andric       BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
606*fe6060f1SDimitry Andric           .addDef(NewResult)
607*fe6060f1SDimitry Andric           .addReg(Target);
608*fe6060f1SDimitry Andric   addUnpredicatedMveVpredNOp(MIBuilder);
609*fe6060f1SDimitry Andric 
610*fe6060f1SDimitry Andric   // Make the user use NewResult instead, and clear its kill flag.
611*fe6060f1SDimitry Andric   User.setReg(NewResult);
612*fe6060f1SDimitry Andric   User.setIsKill(false);
613*fe6060f1SDimitry Andric 
614*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  Inserting VPNOT (for spill prevention): ";
615*fe6060f1SDimitry Andric              MIBuilder.getInstr()->dump());
616*fe6060f1SDimitry Andric 
617*fe6060f1SDimitry Andric   return *MIBuilder.getInstr();
618*fe6060f1SDimitry Andric }
619*fe6060f1SDimitry Andric 
620*fe6060f1SDimitry Andric // Moves a VPNOT before its first user if an instruction that uses Reg is found
621*fe6060f1SDimitry Andric // in-between the VPNOT and its user.
622*fe6060f1SDimitry Andric // Returns true if there is at least one user of the VPNOT in the block.
623*fe6060f1SDimitry Andric static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
624*fe6060f1SDimitry Andric                                      MachineBasicBlock::iterator Iter,
625*fe6060f1SDimitry Andric                                      Register Reg) {
626*fe6060f1SDimitry Andric   assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
627*fe6060f1SDimitry Andric   assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
628*fe6060f1SDimitry Andric          "The VPNOT cannot be predicated");
629*fe6060f1SDimitry Andric 
630*fe6060f1SDimitry Andric   MachineInstr &VPNOT = *Iter;
631*fe6060f1SDimitry Andric   Register VPNOTResult = VPNOT.getOperand(0).getReg();
632*fe6060f1SDimitry Andric   Register VPNOTOperand = VPNOT.getOperand(1).getReg();
633*fe6060f1SDimitry Andric 
634*fe6060f1SDimitry Andric   // Whether the VPNOT will need to be moved, and whether we found a user of the
635*fe6060f1SDimitry Andric   // VPNOT.
636*fe6060f1SDimitry Andric   bool MustMove = false, HasUser = false;
637*fe6060f1SDimitry Andric   MachineOperand *VPNOTOperandKiller = nullptr;
638*fe6060f1SDimitry Andric   for (; Iter != MBB.end(); ++Iter) {
639*fe6060f1SDimitry Andric     if (MachineOperand *MO =
640*fe6060f1SDimitry Andric             Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
641*fe6060f1SDimitry Andric       // If we find the operand that kills the VPNOTOperand's result, save it.
642*fe6060f1SDimitry Andric       VPNOTOperandKiller = MO;
643*fe6060f1SDimitry Andric     }
644*fe6060f1SDimitry Andric 
645*fe6060f1SDimitry Andric     if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
646*fe6060f1SDimitry Andric       MustMove = true;
647*fe6060f1SDimitry Andric       continue;
648*fe6060f1SDimitry Andric     }
649*fe6060f1SDimitry Andric 
650*fe6060f1SDimitry Andric     if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
651*fe6060f1SDimitry Andric       continue;
652*fe6060f1SDimitry Andric 
653*fe6060f1SDimitry Andric     HasUser = true;
654*fe6060f1SDimitry Andric     if (!MustMove)
655*fe6060f1SDimitry Andric       break;
656*fe6060f1SDimitry Andric 
657*fe6060f1SDimitry Andric     // Move the VPNOT right before Iter
658*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << "  Before: ";
659*fe6060f1SDimitry Andric                Iter->dump());
660*fe6060f1SDimitry Andric     MBB.splice(Iter, &MBB, VPNOT.getIterator());
661*fe6060f1SDimitry Andric     // If we move the instr, and its operand was killed earlier, remove the kill
662*fe6060f1SDimitry Andric     // flag.
663*fe6060f1SDimitry Andric     if (VPNOTOperandKiller)
664*fe6060f1SDimitry Andric       VPNOTOperandKiller->setIsKill(false);
665*fe6060f1SDimitry Andric 
666*fe6060f1SDimitry Andric     break;
667*fe6060f1SDimitry Andric   }
668*fe6060f1SDimitry Andric   return HasUser;
669*fe6060f1SDimitry Andric }
670*fe6060f1SDimitry Andric 
671*fe6060f1SDimitry Andric // This optimisation attempts to reduce the number of overlapping lifetimes of
672*fe6060f1SDimitry Andric // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
673*fe6060f1SDimitry Andric // this replaces
674*fe6060f1SDimitry Andric //    %A:vccr = (something)
675*fe6060f1SDimitry Andric //    %B:vccr = VPNOT %A
676*fe6060f1SDimitry Andric //    %Foo = (some op that uses %B)
677*fe6060f1SDimitry Andric //    %Bar = (some op that uses %A)
678*fe6060f1SDimitry Andric // With
679*fe6060f1SDimitry Andric //    %A:vccr = (something)
680*fe6060f1SDimitry Andric //    %B:vccr = VPNOT %A
681*fe6060f1SDimitry Andric //    %Foo = (some op that uses %B)
682*fe6060f1SDimitry Andric //    %TMP2:vccr = VPNOT %B
683*fe6060f1SDimitry Andric //    %Bar = (some op that uses %A)
684*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
685*fe6060f1SDimitry Andric   MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
686*fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> DeadInstructions;
687*fe6060f1SDimitry Andric   bool Modified = false;
688*fe6060f1SDimitry Andric 
689*fe6060f1SDimitry Andric   while (Iter != End) {
690*fe6060f1SDimitry Andric     Register VCCRValue, OppositeVCCRValue;
691*fe6060f1SDimitry Andric     // The first loop looks for 2 unpredicated instructions:
692*fe6060f1SDimitry Andric     //    %A:vccr = (instr)     ; A is stored in VCCRValue
693*fe6060f1SDimitry Andric     //    %B:vccr = VPNOT %A    ; B is stored in OppositeVCCRValue
694*fe6060f1SDimitry Andric     for (; Iter != End; ++Iter) {
695*fe6060f1SDimitry Andric       // We're only interested in unpredicated instructions that write to VCCR.
696*fe6060f1SDimitry Andric       if (!IsWritingToVCCR(*Iter) ||
697*fe6060f1SDimitry Andric           getVPTInstrPredicate(*Iter) != ARMVCC::None)
698*fe6060f1SDimitry Andric         continue;
699*fe6060f1SDimitry Andric       Register Dst = Iter->getOperand(0).getReg();
700*fe6060f1SDimitry Andric 
701*fe6060f1SDimitry Andric       // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
702*fe6060f1SDimitry Andric       // found what we were looking for.
703*fe6060f1SDimitry Andric       if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
704*fe6060f1SDimitry Andric           Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
705*fe6060f1SDimitry Andric         // Move the VPNOT closer to its first user if needed, and ignore if it
706*fe6060f1SDimitry Andric         // has no users.
707*fe6060f1SDimitry Andric         if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
708*fe6060f1SDimitry Andric           continue;
709*fe6060f1SDimitry Andric 
710*fe6060f1SDimitry Andric         OppositeVCCRValue = Dst;
711*fe6060f1SDimitry Andric         ++Iter;
712*fe6060f1SDimitry Andric         break;
713*fe6060f1SDimitry Andric       }
714*fe6060f1SDimitry Andric 
715*fe6060f1SDimitry Andric       // Else, just set VCCRValue.
716*fe6060f1SDimitry Andric       VCCRValue = Dst;
717*fe6060f1SDimitry Andric     }
718*fe6060f1SDimitry Andric 
719*fe6060f1SDimitry Andric     // If the first inner loop didn't find anything, stop here.
720*fe6060f1SDimitry Andric     if (Iter == End)
721*fe6060f1SDimitry Andric       break;
722*fe6060f1SDimitry Andric 
723*fe6060f1SDimitry Andric     assert(VCCRValue && OppositeVCCRValue &&
724*fe6060f1SDimitry Andric            "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
725*fe6060f1SDimitry Andric            "stopped before the end of the block!");
726*fe6060f1SDimitry Andric     assert(VCCRValue != OppositeVCCRValue &&
727*fe6060f1SDimitry Andric            "VCCRValue should not be equal to OppositeVCCRValue!");
728*fe6060f1SDimitry Andric 
729*fe6060f1SDimitry Andric     // LastVPNOTResult always contains the same value as OppositeVCCRValue.
730*fe6060f1SDimitry Andric     Register LastVPNOTResult = OppositeVCCRValue;
731*fe6060f1SDimitry Andric 
732*fe6060f1SDimitry Andric     // This second loop tries to optimize the remaining instructions.
733*fe6060f1SDimitry Andric     for (; Iter != End; ++Iter) {
734*fe6060f1SDimitry Andric       bool IsInteresting = false;
735*fe6060f1SDimitry Andric 
736*fe6060f1SDimitry Andric       if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
737*fe6060f1SDimitry Andric         IsInteresting = true;
738*fe6060f1SDimitry Andric 
739*fe6060f1SDimitry Andric         // - If the instruction is a VPNOT, it can be removed, and we can just
740*fe6060f1SDimitry Andric         //   replace its uses with LastVPNOTResult.
741*fe6060f1SDimitry Andric         // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
742*fe6060f1SDimitry Andric         if (Iter->getOpcode() == ARM::MVE_VPNOT) {
743*fe6060f1SDimitry Andric           Register Result = Iter->getOperand(0).getReg();
744*fe6060f1SDimitry Andric 
745*fe6060f1SDimitry Andric           MRI->replaceRegWith(Result, LastVPNOTResult);
746*fe6060f1SDimitry Andric           DeadInstructions.push_back(&*Iter);
747*fe6060f1SDimitry Andric           Modified = true;
748*fe6060f1SDimitry Andric 
749*fe6060f1SDimitry Andric           LLVM_DEBUG(dbgs()
750*fe6060f1SDimitry Andric                      << "Replacing all uses of '" << printReg(Result)
751*fe6060f1SDimitry Andric                      << "' with '" << printReg(LastVPNOTResult) << "'\n");
752*fe6060f1SDimitry Andric         } else {
753*fe6060f1SDimitry Andric           MachineInstr &VPNOT =
754*fe6060f1SDimitry Andric               ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
755*fe6060f1SDimitry Andric           Modified = true;
756*fe6060f1SDimitry Andric 
757*fe6060f1SDimitry Andric           LastVPNOTResult = VPNOT.getOperand(0).getReg();
758*fe6060f1SDimitry Andric           std::swap(VCCRValue, OppositeVCCRValue);
759*fe6060f1SDimitry Andric 
760*fe6060f1SDimitry Andric           LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
761*fe6060f1SDimitry Andric                             << "' with '" << printReg(LastVPNOTResult)
762*fe6060f1SDimitry Andric                             << "' in instr: " << *Iter);
763*fe6060f1SDimitry Andric         }
764*fe6060f1SDimitry Andric       } else {
765*fe6060f1SDimitry Andric         // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
766*fe6060f1SDimitry Andric         // instead as they contain the same value.
767*fe6060f1SDimitry Andric         if (MachineOperand *MO =
768*fe6060f1SDimitry Andric                 Iter->findRegisterUseOperand(OppositeVCCRValue)) {
769*fe6060f1SDimitry Andric           IsInteresting = true;
770*fe6060f1SDimitry Andric 
771*fe6060f1SDimitry Andric           // This is pointless if LastVPNOTResult == OppositeVCCRValue.
772*fe6060f1SDimitry Andric           if (LastVPNOTResult != OppositeVCCRValue) {
773*fe6060f1SDimitry Andric             LLVM_DEBUG(dbgs() << "Replacing usage of '"
774*fe6060f1SDimitry Andric                               << printReg(OppositeVCCRValue) << "' with '"
775*fe6060f1SDimitry Andric                               << printReg(LastVPNOTResult) << " for instr: ";
776*fe6060f1SDimitry Andric                        Iter->dump());
777*fe6060f1SDimitry Andric             MO->setReg(LastVPNOTResult);
778*fe6060f1SDimitry Andric             Modified = true;
779*fe6060f1SDimitry Andric           }
780*fe6060f1SDimitry Andric 
781*fe6060f1SDimitry Andric           MO->setIsKill(false);
782*fe6060f1SDimitry Andric         }
783*fe6060f1SDimitry Andric 
784*fe6060f1SDimitry Andric         // If this is an unpredicated VPNOT on
785*fe6060f1SDimitry Andric         // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
786*fe6060f1SDimitry Andric         if (Iter->getOpcode() == ARM::MVE_VPNOT &&
787*fe6060f1SDimitry Andric             getVPTInstrPredicate(*Iter) == ARMVCC::None) {
788*fe6060f1SDimitry Andric           Register VPNOTOperand = Iter->getOperand(1).getReg();
789*fe6060f1SDimitry Andric           if (VPNOTOperand == LastVPNOTResult ||
790*fe6060f1SDimitry Andric               VPNOTOperand == OppositeVCCRValue) {
791*fe6060f1SDimitry Andric             IsInteresting = true;
792*fe6060f1SDimitry Andric 
793*fe6060f1SDimitry Andric             std::swap(VCCRValue, OppositeVCCRValue);
794*fe6060f1SDimitry Andric             LastVPNOTResult = Iter->getOperand(0).getReg();
795*fe6060f1SDimitry Andric           }
796*fe6060f1SDimitry Andric         }
797*fe6060f1SDimitry Andric       }
798*fe6060f1SDimitry Andric 
799*fe6060f1SDimitry Andric       // If this instruction was not interesting, and it writes to VCCR, stop.
800*fe6060f1SDimitry Andric       if (!IsInteresting && IsWritingToVCCR(*Iter))
801*fe6060f1SDimitry Andric         break;
802*fe6060f1SDimitry Andric     }
803*fe6060f1SDimitry Andric   }
804*fe6060f1SDimitry Andric 
805*fe6060f1SDimitry Andric   for (MachineInstr *DeadInstruction : DeadInstructions)
806*fe6060f1SDimitry Andric     DeadInstruction->eraseFromParent();
807*fe6060f1SDimitry Andric 
808*fe6060f1SDimitry Andric   return Modified;
809*fe6060f1SDimitry Andric }
810*fe6060f1SDimitry Andric 
811*fe6060f1SDimitry Andric // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
812*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
813*fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> DeadInstructions;
814*fe6060f1SDimitry Andric 
815*fe6060f1SDimitry Andric   // The last VCMP that we have seen and that couldn't be replaced.
816*fe6060f1SDimitry Andric   // This is reset when an instruction that writes to VCCR/VPR is found, or when
817*fe6060f1SDimitry Andric   // a VCMP is replaced with a VPNOT.
818*fe6060f1SDimitry Andric   // We'll only replace VCMPs with VPNOTs when this is not null, and when the
819*fe6060f1SDimitry Andric   // current VCMP is the opposite of PrevVCMP.
820*fe6060f1SDimitry Andric   MachineInstr *PrevVCMP = nullptr;
821*fe6060f1SDimitry Andric   // If we find an instruction that kills the result of PrevVCMP, we save the
822*fe6060f1SDimitry Andric   // operand here to remove the kill flag in case we need to use PrevVCMP's
823*fe6060f1SDimitry Andric   // result.
824*fe6060f1SDimitry Andric   MachineOperand *PrevVCMPResultKiller = nullptr;
825*fe6060f1SDimitry Andric 
826*fe6060f1SDimitry Andric   for (MachineInstr &Instr : MBB.instrs()) {
827*fe6060f1SDimitry Andric     if (PrevVCMP) {
828*fe6060f1SDimitry Andric       if (MachineOperand *MO = Instr.findRegisterUseOperand(
829*fe6060f1SDimitry Andric               PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
830*fe6060f1SDimitry Andric         // If we come accross the instr that kills PrevVCMP's result, record it
831*fe6060f1SDimitry Andric         // so we can remove the kill flag later if we need to.
832*fe6060f1SDimitry Andric         PrevVCMPResultKiller = MO;
833*fe6060f1SDimitry Andric       }
834*fe6060f1SDimitry Andric     }
835*fe6060f1SDimitry Andric 
836*fe6060f1SDimitry Andric     // Ignore predicated instructions.
837*fe6060f1SDimitry Andric     if (getVPTInstrPredicate(Instr) != ARMVCC::None)
838*fe6060f1SDimitry Andric       continue;
839*fe6060f1SDimitry Andric 
840*fe6060f1SDimitry Andric     // Only look at VCMPs
841*fe6060f1SDimitry Andric     if (!IsVCMP(Instr.getOpcode())) {
842*fe6060f1SDimitry Andric       // If the instruction writes to VCCR, forget the previous VCMP.
843*fe6060f1SDimitry Andric       if (IsWritingToVCCR(Instr))
844*fe6060f1SDimitry Andric         PrevVCMP = nullptr;
845*fe6060f1SDimitry Andric       continue;
846*fe6060f1SDimitry Andric     }
847*fe6060f1SDimitry Andric 
848*fe6060f1SDimitry Andric     if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
849*fe6060f1SDimitry Andric       PrevVCMP = &Instr;
850*fe6060f1SDimitry Andric       continue;
851*fe6060f1SDimitry Andric     }
852*fe6060f1SDimitry Andric 
853*fe6060f1SDimitry Andric     // The register containing the result of the VCMP that we're going to
854*fe6060f1SDimitry Andric     // replace.
855*fe6060f1SDimitry Andric     Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
856*fe6060f1SDimitry Andric 
857*fe6060f1SDimitry Andric     // Build a VPNOT to replace the VCMP, reusing its operands.
858*fe6060f1SDimitry Andric     MachineInstrBuilder MIBuilder =
859*fe6060f1SDimitry Andric         BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
860*fe6060f1SDimitry Andric             .add(Instr.getOperand(0))
861*fe6060f1SDimitry Andric             .addReg(PrevVCMPResultReg);
862*fe6060f1SDimitry Andric     addUnpredicatedMveVpredNOp(MIBuilder);
863*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
864*fe6060f1SDimitry Andric                MIBuilder.getInstr()->dump(); dbgs() << "  Removed VCMP: ";
865*fe6060f1SDimitry Andric                Instr.dump());
866*fe6060f1SDimitry Andric 
867*fe6060f1SDimitry Andric     // If we found an instruction that uses, and kills PrevVCMP's result,
868*fe6060f1SDimitry Andric     // remove the kill flag.
869*fe6060f1SDimitry Andric     if (PrevVCMPResultKiller)
870*fe6060f1SDimitry Andric       PrevVCMPResultKiller->setIsKill(false);
871*fe6060f1SDimitry Andric 
872*fe6060f1SDimitry Andric     // Finally, mark the old VCMP for removal and reset
873*fe6060f1SDimitry Andric     // PrevVCMP/PrevVCMPResultKiller.
874*fe6060f1SDimitry Andric     DeadInstructions.push_back(&Instr);
875*fe6060f1SDimitry Andric     PrevVCMP = nullptr;
876*fe6060f1SDimitry Andric     PrevVCMPResultKiller = nullptr;
877*fe6060f1SDimitry Andric   }
878*fe6060f1SDimitry Andric 
879*fe6060f1SDimitry Andric   for (MachineInstr *DeadInstruction : DeadInstructions)
880*fe6060f1SDimitry Andric     DeadInstruction->eraseFromParent();
881*fe6060f1SDimitry Andric 
882*fe6060f1SDimitry Andric   return !DeadInstructions.empty();
883*fe6060f1SDimitry Andric }
884*fe6060f1SDimitry Andric 
885*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
886*fe6060f1SDimitry Andric                                                MachineDominatorTree *DT) {
887*fe6060f1SDimitry Andric   // Scan through the block, looking for instructions that use constants moves
888*fe6060f1SDimitry Andric   // into VPR that are the negative of one another. These are expected to be
889*fe6060f1SDimitry Andric   // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
890*fe6060f1SDimitry Andric   // mask is kept it or and VPNOT's of it are added or reused as we scan through
891*fe6060f1SDimitry Andric   // the function.
892*fe6060f1SDimitry Andric   unsigned LastVPTImm = 0;
893*fe6060f1SDimitry Andric   Register LastVPTReg = 0;
894*fe6060f1SDimitry Andric   SmallSet<MachineInstr *, 4> DeadInstructions;
895*fe6060f1SDimitry Andric 
896*fe6060f1SDimitry Andric   for (MachineInstr &Instr : MBB.instrs()) {
897*fe6060f1SDimitry Andric     // Look for predicated MVE instructions.
898*fe6060f1SDimitry Andric     int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
899*fe6060f1SDimitry Andric     if (PIdx == -1)
900*fe6060f1SDimitry Andric       continue;
901*fe6060f1SDimitry Andric     Register VPR = Instr.getOperand(PIdx + 1).getReg();
902*fe6060f1SDimitry Andric     if (!VPR.isVirtual())
903*fe6060f1SDimitry Andric       continue;
904*fe6060f1SDimitry Andric 
905*fe6060f1SDimitry Andric     // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
906*fe6060f1SDimitry Andric     MachineInstr *Copy = MRI->getVRegDef(VPR);
907*fe6060f1SDimitry Andric     if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
908*fe6060f1SDimitry Andric         !Copy->getOperand(1).getReg().isVirtual() ||
909*fe6060f1SDimitry Andric         MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
910*fe6060f1SDimitry Andric       LastVPTReg = 0;
911*fe6060f1SDimitry Andric       continue;
912*fe6060f1SDimitry Andric     }
913*fe6060f1SDimitry Andric     Register GPR = Copy->getOperand(1).getReg();
914*fe6060f1SDimitry Andric 
915*fe6060f1SDimitry Andric     // Find the Immediate used by the copy.
916*fe6060f1SDimitry Andric     auto getImm = [&](Register GPR) -> unsigned {
917*fe6060f1SDimitry Andric       MachineInstr *Def = MRI->getVRegDef(GPR);
918*fe6060f1SDimitry Andric       if (Def && (Def->getOpcode() == ARM::t2MOVi ||
919*fe6060f1SDimitry Andric                   Def->getOpcode() == ARM::t2MOVi16))
920*fe6060f1SDimitry Andric         return Def->getOperand(1).getImm();
921*fe6060f1SDimitry Andric       return -1U;
922*fe6060f1SDimitry Andric     };
923*fe6060f1SDimitry Andric     unsigned Imm = getImm(GPR);
924*fe6060f1SDimitry Andric     if (Imm == -1U) {
925*fe6060f1SDimitry Andric       LastVPTReg = 0;
926*fe6060f1SDimitry Andric       continue;
927*fe6060f1SDimitry Andric     }
928*fe6060f1SDimitry Andric 
929*fe6060f1SDimitry Andric     unsigned NotImm = ~Imm & 0xffff;
930*fe6060f1SDimitry Andric     if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
931*fe6060f1SDimitry Andric       Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
932*fe6060f1SDimitry Andric       if (MRI->use_empty(VPR)) {
933*fe6060f1SDimitry Andric         DeadInstructions.insert(Copy);
934*fe6060f1SDimitry Andric         if (MRI->hasOneUse(GPR))
935*fe6060f1SDimitry Andric           DeadInstructions.insert(MRI->getVRegDef(GPR));
936*fe6060f1SDimitry Andric       }
937*fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Reusing predicate: in  " << Instr);
938*fe6060f1SDimitry Andric     } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
939*fe6060f1SDimitry Andric       // We have found the not of a previous constant. Create a VPNot of the
940*fe6060f1SDimitry Andric       // earlier predicate reg and use it instead of the copy.
941*fe6060f1SDimitry Andric       Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
942*fe6060f1SDimitry Andric       auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
943*fe6060f1SDimitry Andric                            TII->get(ARM::MVE_VPNOT), NewVPR)
944*fe6060f1SDimitry Andric                        .addReg(LastVPTReg);
945*fe6060f1SDimitry Andric       addUnpredicatedMveVpredNOp(VPNot);
946*fe6060f1SDimitry Andric 
947*fe6060f1SDimitry Andric       // Use the new register and check if the def is now dead.
948*fe6060f1SDimitry Andric       Instr.getOperand(PIdx + 1).setReg(NewVPR);
949*fe6060f1SDimitry Andric       if (MRI->use_empty(VPR)) {
950*fe6060f1SDimitry Andric         DeadInstructions.insert(Copy);
951*fe6060f1SDimitry Andric         if (MRI->hasOneUse(GPR))
952*fe6060f1SDimitry Andric           DeadInstructions.insert(MRI->getVRegDef(GPR));
953*fe6060f1SDimitry Andric       }
954*fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << "  to replace use at "
955*fe6060f1SDimitry Andric                         << Instr);
956*fe6060f1SDimitry Andric       VPR = NewVPR;
957*fe6060f1SDimitry Andric     }
958*fe6060f1SDimitry Andric 
959*fe6060f1SDimitry Andric     LastVPTImm = Imm;
960*fe6060f1SDimitry Andric     LastVPTReg = VPR;
961*fe6060f1SDimitry Andric   }
962*fe6060f1SDimitry Andric 
963*fe6060f1SDimitry Andric   for (MachineInstr *DI : DeadInstructions)
964*fe6060f1SDimitry Andric     DI->eraseFromParent();
965*fe6060f1SDimitry Andric 
966*fe6060f1SDimitry Andric   return !DeadInstructions.empty();
967*fe6060f1SDimitry Andric }
968*fe6060f1SDimitry Andric 
969*fe6060f1SDimitry Andric // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
970*fe6060f1SDimitry Andric // somewhat blunt approximation to allow tail predicated with vpsel
971*fe6060f1SDimitry Andric // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
972*fe6060f1SDimitry Andric // different semantics under tail predication. Until that is modelled we just
973*fe6060f1SDimitry Andric // convert to a VMOVT (via a predicated VORR) instead.
974*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
975*fe6060f1SDimitry Andric   bool HasVCTP = false;
976*fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> DeadInstructions;
977*fe6060f1SDimitry Andric 
978*fe6060f1SDimitry Andric   for (MachineInstr &MI : MBB.instrs()) {
979*fe6060f1SDimitry Andric     if (isVCTP(&MI)) {
980*fe6060f1SDimitry Andric       HasVCTP = true;
981*fe6060f1SDimitry Andric       continue;
982*fe6060f1SDimitry Andric     }
983*fe6060f1SDimitry Andric 
984*fe6060f1SDimitry Andric     if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
985*fe6060f1SDimitry Andric       continue;
986*fe6060f1SDimitry Andric 
987*fe6060f1SDimitry Andric     MachineInstrBuilder MIBuilder =
988*fe6060f1SDimitry Andric         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
989*fe6060f1SDimitry Andric             .add(MI.getOperand(0))
990*fe6060f1SDimitry Andric             .add(MI.getOperand(1))
991*fe6060f1SDimitry Andric             .add(MI.getOperand(1))
992*fe6060f1SDimitry Andric             .addImm(ARMVCC::Then)
993*fe6060f1SDimitry Andric             .add(MI.getOperand(4))
994*fe6060f1SDimitry Andric             .add(MI.getOperand(2));
995*fe6060f1SDimitry Andric     // Silence unused variable warning in release builds.
996*fe6060f1SDimitry Andric     (void)MIBuilder;
997*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
998*fe6060f1SDimitry Andric                dbgs() << "     with VMOVT: "; MIBuilder.getInstr()->dump());
999*fe6060f1SDimitry Andric     DeadInstructions.push_back(&MI);
1000*fe6060f1SDimitry Andric   }
1001*fe6060f1SDimitry Andric 
1002*fe6060f1SDimitry Andric   for (MachineInstr *DeadInstruction : DeadInstructions)
1003*fe6060f1SDimitry Andric     DeadInstruction->eraseFromParent();
1004*fe6060f1SDimitry Andric 
1005*fe6060f1SDimitry Andric   return !DeadInstructions.empty();
1006*fe6060f1SDimitry Andric }
1007*fe6060f1SDimitry Andric 
1008*fe6060f1SDimitry Andric // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1009*fe6060f1SDimitry Andric // the instruction may be removable as a noop.
1010*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1011*fe6060f1SDimitry Andric   bool Changed = false;
1012*fe6060f1SDimitry Andric   for (MachineInstr &MI : MBB.instrs()) {
1013*fe6060f1SDimitry Andric     if (MI.getOpcode() != ARM::t2DoLoopStart)
1014*fe6060f1SDimitry Andric       continue;
1015*fe6060f1SDimitry Andric     Register R = MI.getOperand(1).getReg();
1016*fe6060f1SDimitry Andric     MachineFunction *MF = MI.getParent()->getParent();
1017*fe6060f1SDimitry Andric     MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
1018*fe6060f1SDimitry Andric     Changed = true;
1019*fe6060f1SDimitry Andric   }
1020*fe6060f1SDimitry Andric   return Changed;
1021*fe6060f1SDimitry Andric }
1022*fe6060f1SDimitry Andric 
1023*fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1024*fe6060f1SDimitry Andric   const ARMSubtarget &STI =
1025*fe6060f1SDimitry Andric       static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1026*fe6060f1SDimitry Andric 
1027*fe6060f1SDimitry Andric   if (!STI.isThumb2() || !STI.hasLOB())
1028*fe6060f1SDimitry Andric     return false;
1029*fe6060f1SDimitry Andric 
1030*fe6060f1SDimitry Andric   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1031*fe6060f1SDimitry Andric   MRI = &Fn.getRegInfo();
1032*fe6060f1SDimitry Andric   MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
1033*fe6060f1SDimitry Andric   MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
1034*fe6060f1SDimitry Andric 
1035*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1036*fe6060f1SDimitry Andric                     << "********** Function: " << Fn.getName() << '\n');
1037*fe6060f1SDimitry Andric 
1038*fe6060f1SDimitry Andric   bool Modified = false;
1039*fe6060f1SDimitry Andric   for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
1040*fe6060f1SDimitry Andric     Modified |= LowerWhileLoopStart(ML);
1041*fe6060f1SDimitry Andric     Modified |= MergeLoopEnd(ML);
1042*fe6060f1SDimitry Andric     Modified |= ConvertTailPredLoop(ML, DT);
1043*fe6060f1SDimitry Andric   }
1044*fe6060f1SDimitry Andric 
1045*fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : Fn) {
1046*fe6060f1SDimitry Andric     Modified |= HintDoLoopStartReg(MBB);
1047*fe6060f1SDimitry Andric     Modified |= ReplaceConstByVPNOTs(MBB, DT);
1048*fe6060f1SDimitry Andric     Modified |= ReplaceVCMPsByVPNOTs(MBB);
1049*fe6060f1SDimitry Andric     Modified |= ReduceOldVCCRValueUses(MBB);
1050*fe6060f1SDimitry Andric     Modified |= ConvertVPSEL(MBB);
1051*fe6060f1SDimitry Andric   }
1052*fe6060f1SDimitry Andric 
1053*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "**************************************\n");
1054*fe6060f1SDimitry Andric   return Modified;
1055*fe6060f1SDimitry Andric }
1056*fe6060f1SDimitry Andric 
1057*fe6060f1SDimitry Andric /// createMVETPAndVPTOptimisationsPass
1058*fe6060f1SDimitry Andric FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
1059*fe6060f1SDimitry Andric   return new MVETPAndVPTOptimisations();
1060*fe6060f1SDimitry Andric }
1061