xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp (revision 035dd78d30ba28a3dc15c05ec85ad10127165677)
1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass does a few optimisations related to Tail predicated loops
10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11 /// the goal is to maximize the sizes of the blocks that will be created by the
12 /// MVE VPT Block Insertion pass (which runs after register allocation). For
13 /// tail predicated loops we transform the loop into something that will
14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "ARM.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMBaseInfo.h"
21 #include "MVETailPredUtils.h"
22 #include "Thumb2InstrInfo.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/Debug.h"
32 #include <cassert>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "arm-mve-vpt-opts"
37 
38 static cl::opt<bool>
39 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40     cl::desc("Enable merging Loop End and Dec instructions."),
41     cl::init(true));
42 
43 static cl::opt<bool>
44 SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
45     cl::desc("Enable setting lr as a predicate in tail predication regions."),
46     cl::init(true));
47 
48 namespace {
49 class MVETPAndVPTOptimisations : public MachineFunctionPass {
50 public:
51   static char ID;
52   const Thumb2InstrInfo *TII;
53   MachineRegisterInfo *MRI;
54 
55   MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
56 
57   bool runOnMachineFunction(MachineFunction &Fn) override;
58 
59   void getAnalysisUsage(AnalysisUsage &AU) const override {
60     AU.addRequired<MachineLoopInfo>();
61     AU.addPreserved<MachineLoopInfo>();
62     AU.addRequired<MachineDominatorTree>();
63     AU.addPreserved<MachineDominatorTree>();
64     MachineFunctionPass::getAnalysisUsage(AU);
65   }
66 
67   StringRef getPassName() const override {
68     return "ARM MVE TailPred and VPT Optimisation Pass";
69   }
70 
71 private:
72   bool LowerWhileLoopStart(MachineLoop *ML);
73   bool MergeLoopEnd(MachineLoop *ML);
74   bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
75   MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
76                                             MachineInstr &Instr,
77                                             MachineOperand &User,
78                                             Register Target);
79   bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
80   bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
81   bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
82   bool ConvertVPSEL(MachineBasicBlock &MBB);
83   bool HintDoLoopStartReg(MachineBasicBlock &MBB);
84   MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
85                                             MachineInstr *LoopStart);
86 };
87 
88 char MVETPAndVPTOptimisations::ID = 0;
89 
90 } // end anonymous namespace
91 
92 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
93                       "ARM MVE TailPred and VPT Optimisations pass", false,
94                       false)
95 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
96 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
97 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
98                     "ARM MVE TailPred and VPT Optimisations pass", false, false)
99 
100 static MachineInstr *LookThroughCOPY(MachineInstr *MI,
101                                      MachineRegisterInfo *MRI) {
102   while (MI && MI->getOpcode() == TargetOpcode::COPY &&
103          MI->getOperand(1).getReg().isVirtual())
104     MI = MRI->getVRegDef(MI->getOperand(1).getReg());
105   return MI;
106 }
107 
108 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
109 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
110 // at the moment, returning a t2DoLoopStart in LoopStart.
111 static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
112                                MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
113                                MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
114   MachineBasicBlock *Header = ML->getHeader();
115   MachineBasicBlock *Latch = ML->getLoopLatch();
116   if (!Header || !Latch) {
117     LLVM_DEBUG(dbgs() << "  no Loop Latch or Header\n");
118     return false;
119   }
120 
121   // Find the loop end from the terminators.
122   LoopEnd = nullptr;
123   for (auto &T : Latch->terminators()) {
124     if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
125       LoopEnd = &T;
126       break;
127     }
128     if (T.getOpcode() == ARM::t2LoopEndDec &&
129         T.getOperand(2).getMBB() == Header) {
130       LoopEnd = &T;
131       break;
132     }
133   }
134   if (!LoopEnd) {
135     LLVM_DEBUG(dbgs() << "  no LoopEnd\n");
136     return false;
137   }
138   LLVM_DEBUG(dbgs() << "  found loop end: " << *LoopEnd);
139 
140   // Find the dec from the use of the end. There may be copies between
141   // instructions. We expect the loop to loop like:
142   //   $vs = t2DoLoopStart ...
143   // loop:
144   //   $vp = phi [ $vs ], [ $vd ]
145   //   ...
146   //   $vd = t2LoopDec $vp
147   //   ...
148   //   t2LoopEnd $vd, loop
149   if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
150     LoopDec = LoopEnd;
151   else {
152     LoopDec =
153         LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
154     if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
155       LLVM_DEBUG(dbgs() << "  didn't find LoopDec where we expected!\n");
156       return false;
157     }
158   }
159   LLVM_DEBUG(dbgs() << "  found loop dec: " << *LoopDec);
160 
161   LoopPhi =
162       LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
163   if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
164       LoopPhi->getNumOperands() != 5 ||
165       (LoopPhi->getOperand(2).getMBB() != Latch &&
166        LoopPhi->getOperand(4).getMBB() != Latch)) {
167     LLVM_DEBUG(dbgs() << "  didn't find PHI where we expected!\n");
168     return false;
169   }
170   LLVM_DEBUG(dbgs() << "  found loop phi: " << *LoopPhi);
171 
172   Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
173                           ? LoopPhi->getOperand(3).getReg()
174                           : LoopPhi->getOperand(1).getReg();
175   LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
176   if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
177                      LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
178                      LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
179     LLVM_DEBUG(dbgs() << "  didn't find Start where we expected!\n");
180     return false;
181   }
182   LLVM_DEBUG(dbgs() << "  found loop start: " << *LoopStart);
183 
184   return true;
185 }
186 
187 static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
188   MachineBasicBlock *MBB = MI->getParent();
189   assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
190          "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
191 
192   // Subs
193   MachineInstrBuilder MIB =
194       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
195   MIB.add(MI->getOperand(0));
196   MIB.add(MI->getOperand(1));
197   MIB.addImm(0);
198   MIB.addImm(ARMCC::AL);
199   MIB.addReg(ARM::NoRegister);
200   MIB.addReg(ARM::CPSR, RegState::Define);
201 
202   // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
203   for (MachineInstr &I : MBB->terminators()) {
204     if (I.getOpcode() == ARM::t2WhileLoopStart) {
205       MachineInstrBuilder MIB =
206           BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
207       MIB.add(MI->getOperand(1)); // branch target
208       MIB.addImm(ARMCC::EQ);
209       MIB.addReg(ARM::CPSR);
210       I.eraseFromParent();
211       break;
212     }
213   }
214 
215   MI->eraseFromParent();
216 }
217 
218 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
219 // start of a while loop:
220 //   %a:gprlr = t2WhileLoopSetup %Cnt
221 //   t2WhileLoopStart %a, %BB
222 // We want to convert those to a single instruction which, like t2LoopEndDec and
223 // t2DoLoopStartTP is both a terminator and produces a value:
224 //   %a:grplr: t2WhileLoopStartLR %Cnt, %BB
225 //
226 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
227 // t2WhileLoopStart are not valid past regalloc.
228 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
229   LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
230                     << ML->getHeader()->getName() << "\n");
231 
232   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
233   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
234     return false;
235 
236   if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
237     return false;
238 
239   Register LR = LoopStart->getOperand(0).getReg();
240   auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
241     return MI.getOpcode() == ARM::t2WhileLoopStart;
242   });
243   if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
244     RevertWhileLoopSetup(LoopStart, TII);
245     RevertLoopDec(LoopStart, TII);
246     RevertLoopEnd(LoopStart, TII);
247     return true;
248   }
249 
250   MachineInstrBuilder MI =
251       BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
252               TII->get(ARM::t2WhileLoopStartLR), LR)
253           .add(LoopStart->getOperand(1))
254           .add(WLSIt->getOperand(1));
255   (void)MI;
256   LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
257 
258   WLSIt->eraseFromParent();
259   LoopStart->eraseFromParent();
260   return true;
261 }
262 
263 // Return true if this instruction is invalid in a low overhead loop, usually
264 // because it clobbers LR.
265 static bool IsInvalidTPInstruction(MachineInstr &MI) {
266   return MI.isCall() || isLoopStart(MI);
267 }
268 
269 // Starting from PreHeader, search for invalid instructions back until the
270 // LoopStart block is reached. If invalid instructions are found, the loop start
271 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
272 // return the new DLS LoopStart if updated.
273 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
274     MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
275   SmallVector<MachineBasicBlock *> Worklist;
276   SmallPtrSet<MachineBasicBlock *, 4> Visited;
277   Worklist.push_back(PreHeader);
278   Visited.insert(LoopStart->getParent());
279 
280   while (!Worklist.empty()) {
281     MachineBasicBlock *MBB = Worklist.pop_back_val();
282     if (Visited.count(MBB))
283       continue;
284 
285     for (MachineInstr &MI : *MBB) {
286       if (!IsInvalidTPInstruction(MI))
287         continue;
288 
289       LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
290 
291       // Create a t2DoLoopStart at the end of the preheader.
292       MachineInstrBuilder MIB =
293           BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
294                   LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
295       MIB.add(LoopStart->getOperand(0));
296       MIB.add(LoopStart->getOperand(1));
297 
298       // Make sure to remove the kill flags, to prevent them from being invalid.
299       LoopStart->getOperand(1).setIsKill(false);
300 
301       // Revert the t2WhileLoopStartLR to a CMP and Br.
302       RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
303       return MIB;
304     }
305 
306     Visited.insert(MBB);
307     for (auto *Pred : MBB->predecessors())
308       Worklist.push_back(Pred);
309   }
310   return LoopStart;
311 }
312 
313 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
314 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
315 // will be valid to be used for the low overhead loop, which means nothing else
316 // is using LR (especially calls) and there are no superfluous copies in the
317 // loop. The t2LoopEndDec is a branching terminator that produces a value (the
318 // decrement) around the loop edge, which means we need to be careful that they
319 // will be valid to allocate without any spilling.
320 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
321   if (!MergeEndDec)
322     return false;
323 
324   LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
325                     << "\n");
326 
327   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
328   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
329     return false;
330 
331   // Check if there is an illegal instruction (a call) in the low overhead loop
332   // and if so revert it now before we get any further. While loops also need to
333   // check the preheaders, but can be reverted to a DLS loop if needed.
334   auto *PreHeader = ML->getLoopPreheader();
335   if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
336     LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
337 
338   for (MachineBasicBlock *MBB : ML->blocks()) {
339     for (MachineInstr &MI : *MBB) {
340       if (IsInvalidTPInstruction(MI)) {
341         LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
342         if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
343           RevertDoLoopStart(LoopStart, TII);
344         else
345           RevertWhileLoopStartLR(LoopStart, TII);
346         RevertLoopDec(LoopDec, TII);
347         RevertLoopEnd(LoopEnd, TII);
348         return true;
349       }
350     }
351   }
352 
353   // Remove any copies from the loop, to ensure the phi that remains is both
354   // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
355   // that cannot spill, we need to be careful what remains in the loop.
356   Register PhiReg = LoopPhi->getOperand(0).getReg();
357   Register DecReg = LoopDec->getOperand(0).getReg();
358   Register StartReg = LoopStart->getOperand(0).getReg();
359   // Ensure the uses are expected, and collect any copies we want to remove.
360   SmallVector<MachineInstr *, 4> Copies;
361   auto CheckUsers = [&Copies](Register BaseReg,
362                               ArrayRef<MachineInstr *> ExpectedUsers,
363                               MachineRegisterInfo *MRI) {
364     SmallVector<Register, 4> Worklist;
365     Worklist.push_back(BaseReg);
366     while (!Worklist.empty()) {
367       Register Reg = Worklist.pop_back_val();
368       for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
369         if (llvm::is_contained(ExpectedUsers, &MI))
370           continue;
371         if (MI.getOpcode() != TargetOpcode::COPY ||
372             !MI.getOperand(0).getReg().isVirtual()) {
373           LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
374           return false;
375         }
376         Worklist.push_back(MI.getOperand(0).getReg());
377         Copies.push_back(&MI);
378       }
379     }
380     return true;
381   };
382   if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
383       !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
384       !CheckUsers(StartReg, {LoopPhi}, MRI)) {
385     // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
386     if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
387       RevertWhileLoopStartLR(LoopStart, TII);
388       RevertLoopDec(LoopDec, TII);
389       RevertLoopEnd(LoopEnd, TII);
390       return true;
391     }
392     return false;
393   }
394 
395   MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
396   MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
397   MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
398 
399   if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
400     LoopPhi->getOperand(3).setReg(StartReg);
401     LoopPhi->getOperand(1).setReg(DecReg);
402   } else {
403     LoopPhi->getOperand(1).setReg(StartReg);
404     LoopPhi->getOperand(3).setReg(DecReg);
405   }
406 
407   SmallVector<MachineOperand, 4> Cond;              // For analyzeBranch.
408   MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
409   if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
410     // If the LoopEnd falls through, need to insert a t2B to the fall-through
411     // block so that the non-analyzable t2LoopEndDec doesn't fall through.
412     MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator();
413     BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
414         .addMBB(&*MBBI)
415         .add(predOps(ARMCC::AL));
416   }
417 
418   // Replace the loop dec and loop end as a single instruction.
419   MachineInstrBuilder MI =
420       BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
421               TII->get(ARM::t2LoopEndDec), DecReg)
422           .addReg(PhiReg)
423           .add(LoopEnd->getOperand(1));
424   (void)MI;
425   LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
426 
427   LoopDec->eraseFromParent();
428   LoopEnd->eraseFromParent();
429   for (auto *MI : Copies)
430     MI->eraseFromParent();
431   return true;
432 }
433 
434 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
435 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
436 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
437 // VCTP operand much simpler.
438 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
439                                               MachineDominatorTree *DT) {
440   LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
441                     << ML->getHeader()->getName() << "\n");
442 
443   // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
444   // in the loop.
445   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
446   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
447     return false;
448   if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
449                              LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
450     return false;
451 
452   SmallVector<MachineInstr *, 4> VCTPs;
453   SmallVector<MachineInstr *, 4> MVEInstrs;
454   for (MachineBasicBlock *BB : ML->blocks()) {
455     for (MachineInstr &MI : *BB)
456       if (isVCTP(&MI))
457         VCTPs.push_back(&MI);
458       else if (findFirstVPTPredOperandIdx(MI) != -1)
459         MVEInstrs.push_back(&MI);
460   }
461 
462   if (VCTPs.empty()) {
463     LLVM_DEBUG(dbgs() << "  no VCTPs\n");
464     return false;
465   }
466 
467   // Check all VCTPs are the same.
468   MachineInstr *FirstVCTP = *VCTPs.begin();
469   for (MachineInstr *VCTP : VCTPs) {
470     LLVM_DEBUG(dbgs() << "  with VCTP " << *VCTP);
471     if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
472         VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
473       LLVM_DEBUG(dbgs() << "  VCTP's are not identical\n");
474       return false;
475     }
476   }
477 
478   // Check for the register being used can be setup before the loop. We expect
479   // this to be:
480   //   $vx = ...
481   // loop:
482   //   $vp = PHI [ $vx ], [ $vd ]
483   //   ..
484   //   $vpr = VCTP $vp
485   //   ..
486   //   $vd = t2SUBri $vp, #n
487   //   ..
488   Register CountReg = FirstVCTP->getOperand(1).getReg();
489   if (!CountReg.isVirtual()) {
490     LLVM_DEBUG(dbgs() << "  cannot determine VCTP PHI\n");
491     return false;
492   }
493   MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
494   if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
495       Phi->getNumOperands() != 5 ||
496       (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
497        Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
498     LLVM_DEBUG(dbgs() << "  cannot determine VCTP Count\n");
499     return false;
500   }
501   CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
502                  ? Phi->getOperand(3).getReg()
503                  : Phi->getOperand(1).getReg();
504 
505   // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
506   // the preheader and add the new CountReg to it. We attempt to place it late
507   // in the preheader, but may need to move that earlier based on uses.
508   MachineBasicBlock *MBB = LoopStart->getParent();
509   MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
510   for (MachineInstr &Use :
511        MRI->use_instructions(LoopStart->getOperand(0).getReg()))
512     if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
513         !DT->dominates(ML->getHeader(), Use.getParent())) {
514       LLVM_DEBUG(dbgs() << "  InsertPt could not be a terminator!\n");
515       return false;
516     }
517 
518   unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
519                         ? ARM::t2DoLoopStartTP
520                         : ARM::t2WhileLoopStartTP;
521   MachineInstrBuilder MI =
522       BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
523           .add(LoopStart->getOperand(0))
524           .add(LoopStart->getOperand(1))
525           .addReg(CountReg);
526   if (NewOpc == ARM::t2WhileLoopStartTP)
527     MI.add(LoopStart->getOperand(2));
528   LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << "  with "
529                     << *MI.getInstr());
530   MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
531   LoopStart->eraseFromParent();
532 
533   if (SetLRPredicate) {
534     // Each instruction in the loop needs to be using LR as the predicate from
535     // the Phi as the predicate.
536     Register LR = LoopPhi->getOperand(0).getReg();
537     for (MachineInstr *MI : MVEInstrs) {
538       int Idx = findFirstVPTPredOperandIdx(*MI);
539       MI->getOperand(Idx + 2).setReg(LR);
540     }
541   }
542 
543   return true;
544 }
545 
546 // Returns true if Opcode is any VCMP Opcode.
547 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
548 
549 // Returns true if a VCMP with this Opcode can have its operands swapped.
550 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
551 // and VCMPr instructions (since the r is always on the right).
552 static bool CanHaveSwappedOperands(unsigned Opcode) {
553   switch (Opcode) {
554   default:
555     return true;
556   case ARM::MVE_VCMPf32:
557   case ARM::MVE_VCMPf16:
558   case ARM::MVE_VCMPf32r:
559   case ARM::MVE_VCMPf16r:
560   case ARM::MVE_VCMPi8r:
561   case ARM::MVE_VCMPi16r:
562   case ARM::MVE_VCMPi32r:
563   case ARM::MVE_VCMPu8r:
564   case ARM::MVE_VCMPu16r:
565   case ARM::MVE_VCMPu32r:
566   case ARM::MVE_VCMPs8r:
567   case ARM::MVE_VCMPs16r:
568   case ARM::MVE_VCMPs32r:
569     return false;
570   }
571 }
572 
573 // Returns the CondCode of a VCMP Instruction.
574 static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
575   assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
576   return ARMCC::CondCodes(Instr.getOperand(3).getImm());
577 }
578 
579 // Returns true if Cond is equivalent to a VPNOT instruction on the result of
580 // Prev. Cond and Prev must be VCMPs.
581 static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
582   assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
583 
584   // Opcodes must match.
585   if (Cond.getOpcode() != Prev.getOpcode())
586     return false;
587 
588   MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
589   MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
590 
591   // If the VCMP has the opposite condition with the same operands, we can
592   // replace it with a VPNOT
593   ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
594   ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
595   if (ExpectedCode == GetCondCode(Prev))
596     if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
597       return true;
598   // Check again with operands swapped if possible
599   if (!CanHaveSwappedOperands(Cond.getOpcode()))
600     return false;
601   ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
602   return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
603          CondOP2.isIdenticalTo(PrevOP1);
604 }
605 
606 // Returns true if Instr writes to VCCR.
607 static bool IsWritingToVCCR(MachineInstr &Instr) {
608   if (Instr.getNumOperands() == 0)
609     return false;
610   MachineOperand &Dst = Instr.getOperand(0);
611   if (!Dst.isReg())
612     return false;
613   Register DstReg = Dst.getReg();
614   if (!DstReg.isVirtual())
615     return false;
616   MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
617   const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
618   return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
619 }
620 
621 // Transforms
622 //    <Instr that uses %A ('User' Operand)>
623 // Into
624 //    %K = VPNOT %Target
625 //    <Instr that uses %K ('User' Operand)>
626 // And returns the newly inserted VPNOT.
627 // This optimization is done in the hopes of preventing spills/reloads of VPR by
628 // reducing the number of VCCR values with overlapping lifetimes.
629 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
630     MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
631     Register Target) {
632   Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
633 
634   MachineInstrBuilder MIBuilder =
635       BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
636           .addDef(NewResult)
637           .addReg(Target);
638   addUnpredicatedMveVpredNOp(MIBuilder);
639 
640   // Make the user use NewResult instead, and clear its kill flag.
641   User.setReg(NewResult);
642   User.setIsKill(false);
643 
644   LLVM_DEBUG(dbgs() << "  Inserting VPNOT (for spill prevention): ";
645              MIBuilder.getInstr()->dump());
646 
647   return *MIBuilder.getInstr();
648 }
649 
650 // Moves a VPNOT before its first user if an instruction that uses Reg is found
651 // in-between the VPNOT and its user.
652 // Returns true if there is at least one user of the VPNOT in the block.
653 static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
654                                      MachineBasicBlock::iterator Iter,
655                                      Register Reg) {
656   assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
657   assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
658          "The VPNOT cannot be predicated");
659 
660   MachineInstr &VPNOT = *Iter;
661   Register VPNOTResult = VPNOT.getOperand(0).getReg();
662   Register VPNOTOperand = VPNOT.getOperand(1).getReg();
663 
664   // Whether the VPNOT will need to be moved, and whether we found a user of the
665   // VPNOT.
666   bool MustMove = false, HasUser = false;
667   MachineOperand *VPNOTOperandKiller = nullptr;
668   for (; Iter != MBB.end(); ++Iter) {
669     if (MachineOperand *MO =
670             Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
671       // If we find the operand that kills the VPNOTOperand's result, save it.
672       VPNOTOperandKiller = MO;
673     }
674 
675     if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
676       MustMove = true;
677       continue;
678     }
679 
680     if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
681       continue;
682 
683     HasUser = true;
684     if (!MustMove)
685       break;
686 
687     // Move the VPNOT right before Iter
688     LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << "  Before: ";
689                Iter->dump());
690     MBB.splice(Iter, &MBB, VPNOT.getIterator());
691     // If we move the instr, and its operand was killed earlier, remove the kill
692     // flag.
693     if (VPNOTOperandKiller)
694       VPNOTOperandKiller->setIsKill(false);
695 
696     break;
697   }
698   return HasUser;
699 }
700 
701 // This optimisation attempts to reduce the number of overlapping lifetimes of
702 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
703 // this replaces
704 //    %A:vccr = (something)
705 //    %B:vccr = VPNOT %A
706 //    %Foo = (some op that uses %B)
707 //    %Bar = (some op that uses %A)
708 // With
709 //    %A:vccr = (something)
710 //    %B:vccr = VPNOT %A
711 //    %Foo = (some op that uses %B)
712 //    %TMP2:vccr = VPNOT %B
713 //    %Bar = (some op that uses %A)
714 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
715   MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
716   SmallVector<MachineInstr *, 4> DeadInstructions;
717   bool Modified = false;
718 
719   while (Iter != End) {
720     Register VCCRValue, OppositeVCCRValue;
721     // The first loop looks for 2 unpredicated instructions:
722     //    %A:vccr = (instr)     ; A is stored in VCCRValue
723     //    %B:vccr = VPNOT %A    ; B is stored in OppositeVCCRValue
724     for (; Iter != End; ++Iter) {
725       // We're only interested in unpredicated instructions that write to VCCR.
726       if (!IsWritingToVCCR(*Iter) ||
727           getVPTInstrPredicate(*Iter) != ARMVCC::None)
728         continue;
729       Register Dst = Iter->getOperand(0).getReg();
730 
731       // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
732       // found what we were looking for.
733       if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
734           Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
735         // Move the VPNOT closer to its first user if needed, and ignore if it
736         // has no users.
737         if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
738           continue;
739 
740         OppositeVCCRValue = Dst;
741         ++Iter;
742         break;
743       }
744 
745       // Else, just set VCCRValue.
746       VCCRValue = Dst;
747     }
748 
749     // If the first inner loop didn't find anything, stop here.
750     if (Iter == End)
751       break;
752 
753     assert(VCCRValue && OppositeVCCRValue &&
754            "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
755            "stopped before the end of the block!");
756     assert(VCCRValue != OppositeVCCRValue &&
757            "VCCRValue should not be equal to OppositeVCCRValue!");
758 
759     // LastVPNOTResult always contains the same value as OppositeVCCRValue.
760     Register LastVPNOTResult = OppositeVCCRValue;
761 
762     // This second loop tries to optimize the remaining instructions.
763     for (; Iter != End; ++Iter) {
764       bool IsInteresting = false;
765 
766       if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
767         IsInteresting = true;
768 
769         // - If the instruction is a VPNOT, it can be removed, and we can just
770         //   replace its uses with LastVPNOTResult.
771         // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
772         if (Iter->getOpcode() == ARM::MVE_VPNOT) {
773           Register Result = Iter->getOperand(0).getReg();
774 
775           MRI->replaceRegWith(Result, LastVPNOTResult);
776           DeadInstructions.push_back(&*Iter);
777           Modified = true;
778 
779           LLVM_DEBUG(dbgs()
780                      << "Replacing all uses of '" << printReg(Result)
781                      << "' with '" << printReg(LastVPNOTResult) << "'\n");
782         } else {
783           MachineInstr &VPNOT =
784               ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
785           Modified = true;
786 
787           LastVPNOTResult = VPNOT.getOperand(0).getReg();
788           std::swap(VCCRValue, OppositeVCCRValue);
789 
790           LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
791                             << "' with '" << printReg(LastVPNOTResult)
792                             << "' in instr: " << *Iter);
793         }
794       } else {
795         // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
796         // instead as they contain the same value.
797         if (MachineOperand *MO =
798                 Iter->findRegisterUseOperand(OppositeVCCRValue)) {
799           IsInteresting = true;
800 
801           // This is pointless if LastVPNOTResult == OppositeVCCRValue.
802           if (LastVPNOTResult != OppositeVCCRValue) {
803             LLVM_DEBUG(dbgs() << "Replacing usage of '"
804                               << printReg(OppositeVCCRValue) << "' with '"
805                               << printReg(LastVPNOTResult) << " for instr: ";
806                        Iter->dump());
807             MO->setReg(LastVPNOTResult);
808             Modified = true;
809           }
810 
811           MO->setIsKill(false);
812         }
813 
814         // If this is an unpredicated VPNOT on
815         // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
816         if (Iter->getOpcode() == ARM::MVE_VPNOT &&
817             getVPTInstrPredicate(*Iter) == ARMVCC::None) {
818           Register VPNOTOperand = Iter->getOperand(1).getReg();
819           if (VPNOTOperand == LastVPNOTResult ||
820               VPNOTOperand == OppositeVCCRValue) {
821             IsInteresting = true;
822 
823             std::swap(VCCRValue, OppositeVCCRValue);
824             LastVPNOTResult = Iter->getOperand(0).getReg();
825           }
826         }
827       }
828 
829       // If this instruction was not interesting, and it writes to VCCR, stop.
830       if (!IsInteresting && IsWritingToVCCR(*Iter))
831         break;
832     }
833   }
834 
835   for (MachineInstr *DeadInstruction : DeadInstructions)
836     DeadInstruction->eraseFromParent();
837 
838   return Modified;
839 }
840 
841 // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
842 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
843   SmallVector<MachineInstr *, 4> DeadInstructions;
844 
845   // The last VCMP that we have seen and that couldn't be replaced.
846   // This is reset when an instruction that writes to VCCR/VPR is found, or when
847   // a VCMP is replaced with a VPNOT.
848   // We'll only replace VCMPs with VPNOTs when this is not null, and when the
849   // current VCMP is the opposite of PrevVCMP.
850   MachineInstr *PrevVCMP = nullptr;
851   // If we find an instruction that kills the result of PrevVCMP, we save the
852   // operand here to remove the kill flag in case we need to use PrevVCMP's
853   // result.
854   MachineOperand *PrevVCMPResultKiller = nullptr;
855 
856   for (MachineInstr &Instr : MBB.instrs()) {
857     if (PrevVCMP) {
858       if (MachineOperand *MO = Instr.findRegisterUseOperand(
859               PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
860         // If we come accross the instr that kills PrevVCMP's result, record it
861         // so we can remove the kill flag later if we need to.
862         PrevVCMPResultKiller = MO;
863       }
864     }
865 
866     // Ignore predicated instructions.
867     if (getVPTInstrPredicate(Instr) != ARMVCC::None)
868       continue;
869 
870     // Only look at VCMPs
871     if (!IsVCMP(Instr.getOpcode())) {
872       // If the instruction writes to VCCR, forget the previous VCMP.
873       if (IsWritingToVCCR(Instr))
874         PrevVCMP = nullptr;
875       continue;
876     }
877 
878     if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
879       PrevVCMP = &Instr;
880       continue;
881     }
882 
883     // The register containing the result of the VCMP that we're going to
884     // replace.
885     Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
886 
887     // Build a VPNOT to replace the VCMP, reusing its operands.
888     MachineInstrBuilder MIBuilder =
889         BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
890             .add(Instr.getOperand(0))
891             .addReg(PrevVCMPResultReg);
892     addUnpredicatedMveVpredNOp(MIBuilder);
893     LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
894                MIBuilder.getInstr()->dump(); dbgs() << "  Removed VCMP: ";
895                Instr.dump());
896 
897     // If we found an instruction that uses, and kills PrevVCMP's result,
898     // remove the kill flag.
899     if (PrevVCMPResultKiller)
900       PrevVCMPResultKiller->setIsKill(false);
901 
902     // Finally, mark the old VCMP for removal and reset
903     // PrevVCMP/PrevVCMPResultKiller.
904     DeadInstructions.push_back(&Instr);
905     PrevVCMP = nullptr;
906     PrevVCMPResultKiller = nullptr;
907   }
908 
909   for (MachineInstr *DeadInstruction : DeadInstructions)
910     DeadInstruction->eraseFromParent();
911 
912   return !DeadInstructions.empty();
913 }
914 
915 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
916                                                MachineDominatorTree *DT) {
917   // Scan through the block, looking for instructions that use constants moves
918   // into VPR that are the negative of one another. These are expected to be
919   // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
920   // mask is kept it or and VPNOT's of it are added or reused as we scan through
921   // the function.
922   unsigned LastVPTImm = 0;
923   Register LastVPTReg = 0;
924   SmallSet<MachineInstr *, 4> DeadInstructions;
925 
926   for (MachineInstr &Instr : MBB.instrs()) {
927     // Look for predicated MVE instructions.
928     int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
929     if (PIdx == -1)
930       continue;
931     Register VPR = Instr.getOperand(PIdx + 1).getReg();
932     if (!VPR.isVirtual())
933       continue;
934 
935     // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
936     MachineInstr *Copy = MRI->getVRegDef(VPR);
937     if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
938         !Copy->getOperand(1).getReg().isVirtual() ||
939         MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
940       LastVPTReg = 0;
941       continue;
942     }
943     Register GPR = Copy->getOperand(1).getReg();
944 
945     // Find the Immediate used by the copy.
946     auto getImm = [&](Register GPR) -> unsigned {
947       MachineInstr *Def = MRI->getVRegDef(GPR);
948       if (Def && (Def->getOpcode() == ARM::t2MOVi ||
949                   Def->getOpcode() == ARM::t2MOVi16))
950         return Def->getOperand(1).getImm();
951       return -1U;
952     };
953     unsigned Imm = getImm(GPR);
954     if (Imm == -1U) {
955       LastVPTReg = 0;
956       continue;
957     }
958 
959     unsigned NotImm = ~Imm & 0xffff;
960     if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
961       Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
962       if (MRI->use_empty(VPR)) {
963         DeadInstructions.insert(Copy);
964         if (MRI->hasOneUse(GPR))
965           DeadInstructions.insert(MRI->getVRegDef(GPR));
966       }
967       LLVM_DEBUG(dbgs() << "Reusing predicate: in  " << Instr);
968     } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
969       // We have found the not of a previous constant. Create a VPNot of the
970       // earlier predicate reg and use it instead of the copy.
971       Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
972       auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
973                            TII->get(ARM::MVE_VPNOT), NewVPR)
974                        .addReg(LastVPTReg);
975       addUnpredicatedMveVpredNOp(VPNot);
976 
977       // Use the new register and check if the def is now dead.
978       Instr.getOperand(PIdx + 1).setReg(NewVPR);
979       if (MRI->use_empty(VPR)) {
980         DeadInstructions.insert(Copy);
981         if (MRI->hasOneUse(GPR))
982           DeadInstructions.insert(MRI->getVRegDef(GPR));
983       }
984       LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << "  to replace use at "
985                         << Instr);
986       VPR = NewVPR;
987     }
988 
989     LastVPTImm = Imm;
990     LastVPTReg = VPR;
991   }
992 
993   for (MachineInstr *DI : DeadInstructions)
994     DI->eraseFromParent();
995 
996   return !DeadInstructions.empty();
997 }
998 
999 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
1000 // somewhat blunt approximation to allow tail predicated with vpsel
1001 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
1002 // different semantics under tail predication. Until that is modelled we just
1003 // convert to a VMOVT (via a predicated VORR) instead.
1004 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
1005   bool HasVCTP = false;
1006   SmallVector<MachineInstr *, 4> DeadInstructions;
1007 
1008   for (MachineInstr &MI : MBB.instrs()) {
1009     if (isVCTP(&MI)) {
1010       HasVCTP = true;
1011       continue;
1012     }
1013 
1014     if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
1015       continue;
1016 
1017     MachineInstrBuilder MIBuilder =
1018         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
1019             .add(MI.getOperand(0))
1020             .add(MI.getOperand(1))
1021             .add(MI.getOperand(1))
1022             .addImm(ARMVCC::Then)
1023             .add(MI.getOperand(4))
1024             .add(MI.getOperand(5))
1025             .add(MI.getOperand(2));
1026     // Silence unused variable warning in release builds.
1027     (void)MIBuilder;
1028     LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
1029                dbgs() << "     with VMOVT: "; MIBuilder.getInstr()->dump());
1030     DeadInstructions.push_back(&MI);
1031   }
1032 
1033   for (MachineInstr *DeadInstruction : DeadInstructions)
1034     DeadInstruction->eraseFromParent();
1035 
1036   return !DeadInstructions.empty();
1037 }
1038 
1039 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1040 // the instruction may be removable as a noop.
1041 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1042   bool Changed = false;
1043   for (MachineInstr &MI : MBB.instrs()) {
1044     if (MI.getOpcode() != ARM::t2DoLoopStart)
1045       continue;
1046     Register R = MI.getOperand(1).getReg();
1047     MachineFunction *MF = MI.getParent()->getParent();
1048     MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
1049     Changed = true;
1050   }
1051   return Changed;
1052 }
1053 
1054 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1055   const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
1056 
1057   if (!STI.isThumb2() || !STI.hasLOB())
1058     return false;
1059 
1060   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1061   MRI = &Fn.getRegInfo();
1062   MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
1063   MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
1064 
1065   LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1066                     << "********** Function: " << Fn.getName() << '\n');
1067 
1068   bool Modified = false;
1069   for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
1070     Modified |= LowerWhileLoopStart(ML);
1071     Modified |= MergeLoopEnd(ML);
1072     Modified |= ConvertTailPredLoop(ML, DT);
1073   }
1074 
1075   for (MachineBasicBlock &MBB : Fn) {
1076     Modified |= HintDoLoopStartReg(MBB);
1077     Modified |= ReplaceConstByVPNOTs(MBB, DT);
1078     Modified |= ReplaceVCMPsByVPNOTs(MBB);
1079     Modified |= ReduceOldVCCRValueUses(MBB);
1080     Modified |= ConvertVPSEL(MBB);
1081   }
1082 
1083   LLVM_DEBUG(dbgs() << "**************************************\n");
1084   return Modified;
1085 }
1086 
1087 /// createMVETPAndVPTOptimisationsPass
1088 FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
1089   return new MVETPAndVPTOptimisations();
1090 }
1091