1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass does a few optimisations related to Tail predicated loops 10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks 11 /// the goal is to maximize the sizes of the blocks that will be created by the 12 /// MVE VPT Block Insertion pass (which runs after register allocation). For 13 /// tail predicated loops we transform the loop into something that will 14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier. 15 /// 16 //===----------------------------------------------------------------------===// 17 18 #include "ARM.h" 19 #include "ARMSubtarget.h" 20 #include "MCTargetDesc/ARMBaseInfo.h" 21 #include "MVETailPredUtils.h" 22 #include "Thumb2InstrInfo.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/MachineDominators.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineLoopInfo.h" 30 #include "llvm/InitializePasses.h" 31 #include "llvm/Support/Debug.h" 32 #include <cassert> 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "arm-mve-vpt-opts" 37 38 static cl::opt<bool> 39 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, 40 cl::desc("Enable merging Loop End and Dec instructions."), 41 cl::init(true)); 42 43 static cl::opt<bool> 44 SetLRPredicate("arm-set-lr-predicate", cl::Hidden, 45 cl::desc("Enable setting lr as a predicate in tail predication regions."), 46 cl::init(true)); 47 48 namespace { 49 class MVETPAndVPTOptimisations : public MachineFunctionPass { 50 public: 51 static char ID; 52 const Thumb2InstrInfo *TII; 53 MachineRegisterInfo *MRI; 54 55 MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {} 56 57 bool runOnMachineFunction(MachineFunction &Fn) override; 58 59 void getAnalysisUsage(AnalysisUsage &AU) const override { 60 AU.addRequired<MachineLoopInfoWrapperPass>(); 61 AU.addPreserved<MachineLoopInfoWrapperPass>(); 62 AU.addRequired<MachineDominatorTreeWrapperPass>(); 63 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 StringRef getPassName() const override { 68 return "ARM MVE TailPred and VPT Optimisation Pass"; 69 } 70 71 private: 72 bool LowerWhileLoopStart(MachineLoop *ML); 73 bool MergeLoopEnd(MachineLoop *ML); 74 bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT); 75 MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB, 76 MachineInstr &Instr, 77 MachineOperand &User, 78 Register Target); 79 bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB); 80 bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB); 81 bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT); 82 bool ConvertVPSEL(MachineBasicBlock &MBB); 83 bool HintDoLoopStartReg(MachineBasicBlock &MBB); 84 MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader, 85 MachineInstr *LoopStart); 86 }; 87 88 char MVETPAndVPTOptimisations::ID = 0; 89 90 } // end anonymous namespace 91 92 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE, 93 "ARM MVE TailPred and VPT Optimisations pass", false, 94 false) 95 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 96 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 97 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE, 98 "ARM MVE TailPred and VPT Optimisations pass", false, false) 99 100 static MachineInstr *LookThroughCOPY(MachineInstr *MI, 101 MachineRegisterInfo *MRI) { 102 while (MI && MI->getOpcode() == TargetOpcode::COPY && 103 MI->getOperand(1).getReg().isVirtual()) 104 MI = MRI->getVRegDef(MI->getOperand(1).getReg()); 105 return MI; 106 } 107 108 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and 109 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops 110 // at the moment, returning a t2DoLoopStart in LoopStart. 111 static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, 112 MachineInstr *&LoopStart, MachineInstr *&LoopPhi, 113 MachineInstr *&LoopDec, MachineInstr *&LoopEnd) { 114 MachineBasicBlock *Header = ML->getHeader(); 115 MachineBasicBlock *Latch = ML->getLoopLatch(); 116 if (!Header || !Latch) { 117 LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n"); 118 return false; 119 } 120 121 // Find the loop end from the terminators. 122 LoopEnd = nullptr; 123 for (auto &T : Latch->terminators()) { 124 if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) { 125 LoopEnd = &T; 126 break; 127 } 128 if (T.getOpcode() == ARM::t2LoopEndDec && 129 T.getOperand(2).getMBB() == Header) { 130 LoopEnd = &T; 131 break; 132 } 133 } 134 if (!LoopEnd) { 135 LLVM_DEBUG(dbgs() << " no LoopEnd\n"); 136 return false; 137 } 138 LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd); 139 140 // Find the dec from the use of the end. There may be copies between 141 // instructions. We expect the loop to loop like: 142 // $vs = t2DoLoopStart ... 143 // loop: 144 // $vp = phi [ $vs ], [ $vd ] 145 // ... 146 // $vd = t2LoopDec $vp 147 // ... 148 // t2LoopEnd $vd, loop 149 if (LoopEnd->getOpcode() == ARM::t2LoopEndDec) 150 LoopDec = LoopEnd; 151 else { 152 LoopDec = 153 LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI); 154 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) { 155 LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n"); 156 return false; 157 } 158 } 159 LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec); 160 161 LoopPhi = 162 LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI); 163 if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI || 164 LoopPhi->getNumOperands() != 5 || 165 (LoopPhi->getOperand(2).getMBB() != Latch && 166 LoopPhi->getOperand(4).getMBB() != Latch)) { 167 LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n"); 168 return false; 169 } 170 LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi); 171 172 Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch 173 ? LoopPhi->getOperand(3).getReg() 174 : LoopPhi->getOperand(1).getReg(); 175 LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI); 176 if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart && 177 LoopStart->getOpcode() != ARM::t2WhileLoopSetup && 178 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) { 179 LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n"); 180 return false; 181 } 182 LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart); 183 184 return true; 185 } 186 187 static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) { 188 MachineBasicBlock *MBB = MI->getParent(); 189 assert(MI->getOpcode() == ARM::t2WhileLoopSetup && 190 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!"); 191 192 // Subs 193 MachineInstrBuilder MIB = 194 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); 195 MIB.add(MI->getOperand(0)); 196 MIB.add(MI->getOperand(1)); 197 MIB.addImm(0); 198 MIB.addImm(ARMCC::AL); 199 MIB.addReg(ARM::NoRegister); 200 MIB.addReg(ARM::CPSR, RegState::Define); 201 202 // Attempt to find a t2WhileLoopStart and revert to a t2Bcc. 203 for (MachineInstr &I : MBB->terminators()) { 204 if (I.getOpcode() == ARM::t2WhileLoopStart) { 205 MachineInstrBuilder MIB = 206 BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc)); 207 MIB.add(MI->getOperand(1)); // branch target 208 MIB.addImm(ARMCC::EQ); 209 MIB.addReg(ARM::CPSR); 210 I.eraseFromParent(); 211 break; 212 } 213 } 214 215 MI->eraseFromParent(); 216 } 217 218 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the 219 // start of a while loop: 220 // %a:gprlr = t2WhileLoopSetup %Cnt 221 // t2WhileLoopStart %a, %BB 222 // We want to convert those to a single instruction which, like t2LoopEndDec and 223 // t2DoLoopStartTP is both a terminator and produces a value: 224 // %a:grplr: t2WhileLoopStartLR %Cnt, %BB 225 // 226 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and 227 // t2WhileLoopStart are not valid past regalloc. 228 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) { 229 LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop " 230 << ML->getHeader()->getName() << "\n"); 231 232 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 233 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 234 return false; 235 236 if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup) 237 return false; 238 239 Register LR = LoopStart->getOperand(0).getReg(); 240 auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) { 241 return MI.getOpcode() == ARM::t2WhileLoopStart; 242 }); 243 if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) { 244 RevertWhileLoopSetup(LoopStart, TII); 245 RevertLoopDec(LoopStart, TII); 246 RevertLoopEnd(LoopStart, TII); 247 return true; 248 } 249 250 MachineInstrBuilder MI = 251 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(), 252 TII->get(ARM::t2WhileLoopStartLR), LR) 253 .add(LoopStart->getOperand(1)) 254 .add(WLSIt->getOperand(1)); 255 (void)MI; 256 LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr()); 257 258 WLSIt->eraseFromParent(); 259 LoopStart->eraseFromParent(); 260 return true; 261 } 262 263 // Return true if this instruction is invalid in a low overhead loop, usually 264 // because it clobbers LR. 265 static bool IsInvalidTPInstruction(MachineInstr &MI) { 266 return MI.isCall() || isLoopStart(MI); 267 } 268 269 // Starting from PreHeader, search for invalid instructions back until the 270 // LoopStart block is reached. If invalid instructions are found, the loop start 271 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will 272 // return the new DLS LoopStart if updated. 273 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors( 274 MachineBasicBlock *PreHeader, MachineInstr *LoopStart) { 275 SmallVector<MachineBasicBlock *> Worklist; 276 SmallPtrSet<MachineBasicBlock *, 4> Visited; 277 Worklist.push_back(PreHeader); 278 Visited.insert(LoopStart->getParent()); 279 280 while (!Worklist.empty()) { 281 MachineBasicBlock *MBB = Worklist.pop_back_val(); 282 if (Visited.count(MBB)) 283 continue; 284 285 for (MachineInstr &MI : *MBB) { 286 if (!IsInvalidTPInstruction(MI)) 287 continue; 288 289 LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI); 290 291 // Create a t2DoLoopStart at the end of the preheader. 292 MachineInstrBuilder MIB = 293 BuildMI(*PreHeader, PreHeader->getFirstTerminator(), 294 LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart)); 295 MIB.add(LoopStart->getOperand(0)); 296 MIB.add(LoopStart->getOperand(1)); 297 298 // Make sure to remove the kill flags, to prevent them from being invalid. 299 LoopStart->getOperand(1).setIsKill(false); 300 301 // Revert the t2WhileLoopStartLR to a CMP and Br. 302 RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true); 303 return MIB; 304 } 305 306 Visited.insert(MBB); 307 for (auto *Pred : MBB->predecessors()) 308 Worklist.push_back(Pred); 309 } 310 return LoopStart; 311 } 312 313 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into 314 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR 315 // will be valid to be used for the low overhead loop, which means nothing else 316 // is using LR (especially calls) and there are no superfluous copies in the 317 // loop. The t2LoopEndDec is a branching terminator that produces a value (the 318 // decrement) around the loop edge, which means we need to be careful that they 319 // will be valid to allocate without any spilling. 320 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { 321 if (!MergeEndDec) 322 return false; 323 324 LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName() 325 << "\n"); 326 327 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 328 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 329 return false; 330 331 // Check if there is an illegal instruction (a call) in the low overhead loop 332 // and if so revert it now before we get any further. While loops also need to 333 // check the preheaders, but can be reverted to a DLS loop if needed. 334 auto *PreHeader = ML->getLoopPreheader(); 335 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader) 336 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart); 337 338 for (MachineBasicBlock *MBB : ML->blocks()) { 339 for (MachineInstr &MI : *MBB) { 340 if (IsInvalidTPInstruction(MI)) { 341 LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI); 342 if (LoopStart->getOpcode() == ARM::t2DoLoopStart) 343 RevertDoLoopStart(LoopStart, TII); 344 else 345 RevertWhileLoopStartLR(LoopStart, TII); 346 RevertLoopDec(LoopDec, TII); 347 RevertLoopEnd(LoopEnd, TII); 348 return true; 349 } 350 } 351 } 352 353 // Remove any copies from the loop, to ensure the phi that remains is both 354 // simpler and contains no extra uses. Because t2LoopEndDec is a terminator 355 // that cannot spill, we need to be careful what remains in the loop. 356 Register PhiReg = LoopPhi->getOperand(0).getReg(); 357 Register DecReg = LoopDec->getOperand(0).getReg(); 358 Register StartReg = LoopStart->getOperand(0).getReg(); 359 // Ensure the uses are expected, and collect any copies we want to remove. 360 SmallVector<MachineInstr *, 4> Copies; 361 auto CheckUsers = [&Copies](Register BaseReg, 362 ArrayRef<MachineInstr *> ExpectedUsers, 363 MachineRegisterInfo *MRI) { 364 SmallVector<Register, 4> Worklist; 365 Worklist.push_back(BaseReg); 366 while (!Worklist.empty()) { 367 Register Reg = Worklist.pop_back_val(); 368 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 369 if (llvm::is_contained(ExpectedUsers, &MI)) 370 continue; 371 if (MI.getOpcode() != TargetOpcode::COPY || 372 !MI.getOperand(0).getReg().isVirtual()) { 373 LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI); 374 return false; 375 } 376 Worklist.push_back(MI.getOperand(0).getReg()); 377 Copies.push_back(&MI); 378 } 379 } 380 return true; 381 }; 382 if (!CheckUsers(PhiReg, {LoopDec}, MRI) || 383 !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) || 384 !CheckUsers(StartReg, {LoopPhi}, MRI)) { 385 // Don't leave a t2WhileLoopStartLR without the LoopDecEnd. 386 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) { 387 RevertWhileLoopStartLR(LoopStart, TII); 388 RevertLoopDec(LoopDec, TII); 389 RevertLoopEnd(LoopEnd, TII); 390 return true; 391 } 392 return false; 393 } 394 395 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass); 396 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass); 397 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass); 398 399 if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) { 400 LoopPhi->getOperand(3).setReg(StartReg); 401 LoopPhi->getOperand(1).setReg(DecReg); 402 } else { 403 LoopPhi->getOperand(1).setReg(StartReg); 404 LoopPhi->getOperand(3).setReg(DecReg); 405 } 406 407 SmallVector<MachineOperand, 4> Cond; // For analyzeBranch. 408 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. 409 if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) { 410 // If the LoopEnd falls through, need to insert a t2B to the fall-through 411 // block so that the non-analyzable t2LoopEndDec doesn't fall through. 412 MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator(); 413 BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B)) 414 .addMBB(&*MBBI) 415 .add(predOps(ARMCC::AL)); 416 } 417 418 // Replace the loop dec and loop end as a single instruction. 419 MachineInstrBuilder MI = 420 BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(), 421 TII->get(ARM::t2LoopEndDec), DecReg) 422 .addReg(PhiReg) 423 .add(LoopEnd->getOperand(1)); 424 (void)MI; 425 LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr()); 426 427 LoopDec->eraseFromParent(); 428 LoopEnd->eraseFromParent(); 429 for (auto *MI : Copies) 430 MI->eraseFromParent(); 431 return true; 432 } 433 434 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP 435 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP 436 // instruction, making the backend ARMLowOverheadLoops passes job of finding the 437 // VCTP operand much simpler. 438 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML, 439 MachineDominatorTree *DT) { 440 LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop " 441 << ML->getHeader()->getName() << "\n"); 442 443 // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's 444 // in the loop. 445 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 446 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 447 return false; 448 if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart && 449 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) 450 return false; 451 452 SmallVector<MachineInstr *, 4> VCTPs; 453 SmallVector<MachineInstr *, 4> MVEInstrs; 454 for (MachineBasicBlock *BB : ML->blocks()) { 455 for (MachineInstr &MI : *BB) 456 if (isVCTP(&MI)) 457 VCTPs.push_back(&MI); 458 else if (findFirstVPTPredOperandIdx(MI) != -1) 459 MVEInstrs.push_back(&MI); 460 } 461 462 if (VCTPs.empty()) { 463 LLVM_DEBUG(dbgs() << " no VCTPs\n"); 464 return false; 465 } 466 467 // Check all VCTPs are the same. 468 MachineInstr *FirstVCTP = *VCTPs.begin(); 469 for (MachineInstr *VCTP : VCTPs) { 470 LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP); 471 if (VCTP->getOpcode() != FirstVCTP->getOpcode() || 472 VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) { 473 LLVM_DEBUG(dbgs() << " VCTP's are not identical\n"); 474 return false; 475 } 476 } 477 478 // Check for the register being used can be setup before the loop. We expect 479 // this to be: 480 // $vx = ... 481 // loop: 482 // $vp = PHI [ $vx ], [ $vd ] 483 // .. 484 // $vpr = VCTP $vp 485 // .. 486 // $vd = t2SUBri $vp, #n 487 // .. 488 Register CountReg = FirstVCTP->getOperand(1).getReg(); 489 if (!CountReg.isVirtual()) { 490 LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n"); 491 return false; 492 } 493 MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI); 494 if (!Phi || Phi->getOpcode() != TargetOpcode::PHI || 495 Phi->getNumOperands() != 5 || 496 (Phi->getOperand(2).getMBB() != ML->getLoopLatch() && 497 Phi->getOperand(4).getMBB() != ML->getLoopLatch())) { 498 LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n"); 499 return false; 500 } 501 CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch() 502 ? Phi->getOperand(3).getReg() 503 : Phi->getOperand(1).getReg(); 504 505 // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of 506 // the preheader and add the new CountReg to it. We attempt to place it late 507 // in the preheader, but may need to move that earlier based on uses. 508 MachineBasicBlock *MBB = LoopStart->getParent(); 509 MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator(); 510 for (MachineInstr &Use : 511 MRI->use_instructions(LoopStart->getOperand(0).getReg())) 512 if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) || 513 !DT->dominates(ML->getHeader(), Use.getParent())) { 514 LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n"); 515 return false; 516 } 517 518 unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart 519 ? ARM::t2DoLoopStartTP 520 : ARM::t2WhileLoopStartTP; 521 MachineInstrBuilder MI = 522 BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc)) 523 .add(LoopStart->getOperand(0)) 524 .add(LoopStart->getOperand(1)) 525 .addReg(CountReg); 526 if (NewOpc == ARM::t2WhileLoopStartTP) 527 MI.add(LoopStart->getOperand(2)); 528 LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with " 529 << *MI.getInstr()); 530 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass); 531 LoopStart->eraseFromParent(); 532 533 if (SetLRPredicate) { 534 // Each instruction in the loop needs to be using LR as the predicate from 535 // the Phi as the predicate. 536 Register LR = LoopPhi->getOperand(0).getReg(); 537 for (MachineInstr *MI : MVEInstrs) { 538 int Idx = findFirstVPTPredOperandIdx(*MI); 539 MI->getOperand(Idx + 2).setReg(LR); 540 } 541 } 542 543 return true; 544 } 545 546 // Returns true if Opcode is any VCMP Opcode. 547 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; } 548 549 // Returns true if a VCMP with this Opcode can have its operands swapped. 550 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs, 551 // and VCMPr instructions (since the r is always on the right). 552 static bool CanHaveSwappedOperands(unsigned Opcode) { 553 switch (Opcode) { 554 default: 555 return true; 556 case ARM::MVE_VCMPf32: 557 case ARM::MVE_VCMPf16: 558 case ARM::MVE_VCMPf32r: 559 case ARM::MVE_VCMPf16r: 560 case ARM::MVE_VCMPi8r: 561 case ARM::MVE_VCMPi16r: 562 case ARM::MVE_VCMPi32r: 563 case ARM::MVE_VCMPu8r: 564 case ARM::MVE_VCMPu16r: 565 case ARM::MVE_VCMPu32r: 566 case ARM::MVE_VCMPs8r: 567 case ARM::MVE_VCMPs16r: 568 case ARM::MVE_VCMPs32r: 569 return false; 570 } 571 } 572 573 // Returns the CondCode of a VCMP Instruction. 574 static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) { 575 assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP"); 576 return ARMCC::CondCodes(Instr.getOperand(3).getImm()); 577 } 578 579 // Returns true if Cond is equivalent to a VPNOT instruction on the result of 580 // Prev. Cond and Prev must be VCMPs. 581 static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) { 582 assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode())); 583 584 // Opcodes must match. 585 if (Cond.getOpcode() != Prev.getOpcode()) 586 return false; 587 588 MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2); 589 MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2); 590 591 // If the VCMP has the opposite condition with the same operands, we can 592 // replace it with a VPNOT 593 ARMCC::CondCodes ExpectedCode = GetCondCode(Cond); 594 ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode); 595 if (ExpectedCode == GetCondCode(Prev)) 596 if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2)) 597 return true; 598 // Check again with operands swapped if possible 599 if (!CanHaveSwappedOperands(Cond.getOpcode())) 600 return false; 601 ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode); 602 return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) && 603 CondOP2.isIdenticalTo(PrevOP1); 604 } 605 606 // Returns true if Instr writes to VCCR. 607 static bool IsWritingToVCCR(MachineInstr &Instr) { 608 if (Instr.getNumOperands() == 0) 609 return false; 610 MachineOperand &Dst = Instr.getOperand(0); 611 if (!Dst.isReg()) 612 return false; 613 Register DstReg = Dst.getReg(); 614 if (!DstReg.isVirtual()) 615 return false; 616 MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo(); 617 const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg); 618 return RegClass && (RegClass->getID() == ARM::VCCRRegClassID); 619 } 620 621 // Transforms 622 // <Instr that uses %A ('User' Operand)> 623 // Into 624 // %K = VPNOT %Target 625 // <Instr that uses %K ('User' Operand)> 626 // And returns the newly inserted VPNOT. 627 // This optimization is done in the hopes of preventing spills/reloads of VPR by 628 // reducing the number of VCCR values with overlapping lifetimes. 629 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT( 630 MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User, 631 Register Target) { 632 Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target)); 633 634 MachineInstrBuilder MIBuilder = 635 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) 636 .addDef(NewResult) 637 .addReg(Target); 638 addUnpredicatedMveVpredNOp(MIBuilder); 639 640 // Make the user use NewResult instead, and clear its kill flag. 641 User.setReg(NewResult); 642 User.setIsKill(false); 643 644 LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): "; 645 MIBuilder.getInstr()->dump()); 646 647 return *MIBuilder.getInstr(); 648 } 649 650 // Moves a VPNOT before its first user if an instruction that uses Reg is found 651 // in-between the VPNOT and its user. 652 // Returns true if there is at least one user of the VPNOT in the block. 653 static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, 654 MachineBasicBlock::iterator Iter, 655 Register Reg) { 656 assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!"); 657 assert(getVPTInstrPredicate(*Iter) == ARMVCC::None && 658 "The VPNOT cannot be predicated"); 659 660 MachineInstr &VPNOT = *Iter; 661 Register VPNOTResult = VPNOT.getOperand(0).getReg(); 662 Register VPNOTOperand = VPNOT.getOperand(1).getReg(); 663 664 // Whether the VPNOT will need to be moved, and whether we found a user of the 665 // VPNOT. 666 bool MustMove = false, HasUser = false; 667 MachineOperand *VPNOTOperandKiller = nullptr; 668 for (; Iter != MBB.end(); ++Iter) { 669 if (MachineOperand *MO = 670 Iter->findRegisterUseOperand(VPNOTOperand, /*TRI=*/nullptr, 671 /*isKill*/ true)) { 672 // If we find the operand that kills the VPNOTOperand's result, save it. 673 VPNOTOperandKiller = MO; 674 } 675 676 if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) { 677 MustMove = true; 678 continue; 679 } 680 681 if (Iter->findRegisterUseOperandIdx(VPNOTResult, /*TRI=*/nullptr) == -1) 682 continue; 683 684 HasUser = true; 685 if (!MustMove) 686 break; 687 688 // Move the VPNOT right before Iter 689 LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: "; 690 Iter->dump()); 691 MBB.splice(Iter, &MBB, VPNOT.getIterator()); 692 // If we move the instr, and its operand was killed earlier, remove the kill 693 // flag. 694 if (VPNOTOperandKiller) 695 VPNOTOperandKiller->setIsKill(false); 696 697 break; 698 } 699 return HasUser; 700 } 701 702 // This optimisation attempts to reduce the number of overlapping lifetimes of 703 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example, 704 // this replaces 705 // %A:vccr = (something) 706 // %B:vccr = VPNOT %A 707 // %Foo = (some op that uses %B) 708 // %Bar = (some op that uses %A) 709 // With 710 // %A:vccr = (something) 711 // %B:vccr = VPNOT %A 712 // %Foo = (some op that uses %B) 713 // %TMP2:vccr = VPNOT %B 714 // %Bar = (some op that uses %A) 715 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) { 716 MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end(); 717 SmallVector<MachineInstr *, 4> DeadInstructions; 718 bool Modified = false; 719 720 while (Iter != End) { 721 Register VCCRValue, OppositeVCCRValue; 722 // The first loop looks for 2 unpredicated instructions: 723 // %A:vccr = (instr) ; A is stored in VCCRValue 724 // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue 725 for (; Iter != End; ++Iter) { 726 // We're only interested in unpredicated instructions that write to VCCR. 727 if (!IsWritingToVCCR(*Iter) || 728 getVPTInstrPredicate(*Iter) != ARMVCC::None) 729 continue; 730 Register Dst = Iter->getOperand(0).getReg(); 731 732 // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've 733 // found what we were looking for. 734 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT && 735 Iter->findRegisterUseOperandIdx(VCCRValue, /*TRI=*/nullptr) != -1) { 736 // Move the VPNOT closer to its first user if needed, and ignore if it 737 // has no users. 738 if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue)) 739 continue; 740 741 OppositeVCCRValue = Dst; 742 ++Iter; 743 break; 744 } 745 746 // Else, just set VCCRValue. 747 VCCRValue = Dst; 748 } 749 750 // If the first inner loop didn't find anything, stop here. 751 if (Iter == End) 752 break; 753 754 assert(VCCRValue && OppositeVCCRValue && 755 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop " 756 "stopped before the end of the block!"); 757 assert(VCCRValue != OppositeVCCRValue && 758 "VCCRValue should not be equal to OppositeVCCRValue!"); 759 760 // LastVPNOTResult always contains the same value as OppositeVCCRValue. 761 Register LastVPNOTResult = OppositeVCCRValue; 762 763 // This second loop tries to optimize the remaining instructions. 764 for (; Iter != End; ++Iter) { 765 bool IsInteresting = false; 766 767 if (MachineOperand *MO = 768 Iter->findRegisterUseOperand(VCCRValue, /*TRI=*/nullptr)) { 769 IsInteresting = true; 770 771 // - If the instruction is a VPNOT, it can be removed, and we can just 772 // replace its uses with LastVPNOTResult. 773 // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue. 774 if (Iter->getOpcode() == ARM::MVE_VPNOT) { 775 Register Result = Iter->getOperand(0).getReg(); 776 777 MRI->replaceRegWith(Result, LastVPNOTResult); 778 DeadInstructions.push_back(&*Iter); 779 Modified = true; 780 781 LLVM_DEBUG(dbgs() 782 << "Replacing all uses of '" << printReg(Result) 783 << "' with '" << printReg(LastVPNOTResult) << "'\n"); 784 } else { 785 MachineInstr &VPNOT = 786 ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult); 787 Modified = true; 788 789 LastVPNOTResult = VPNOT.getOperand(0).getReg(); 790 std::swap(VCCRValue, OppositeVCCRValue); 791 792 LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue) 793 << "' with '" << printReg(LastVPNOTResult) 794 << "' in instr: " << *Iter); 795 } 796 } else { 797 // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult 798 // instead as they contain the same value. 799 if (MachineOperand *MO = Iter->findRegisterUseOperand( 800 OppositeVCCRValue, /*TRI=*/nullptr)) { 801 IsInteresting = true; 802 803 // This is pointless if LastVPNOTResult == OppositeVCCRValue. 804 if (LastVPNOTResult != OppositeVCCRValue) { 805 LLVM_DEBUG(dbgs() << "Replacing usage of '" 806 << printReg(OppositeVCCRValue) << "' with '" 807 << printReg(LastVPNOTResult) << " for instr: "; 808 Iter->dump()); 809 MO->setReg(LastVPNOTResult); 810 Modified = true; 811 } 812 813 MO->setIsKill(false); 814 } 815 816 // If this is an unpredicated VPNOT on 817 // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it. 818 if (Iter->getOpcode() == ARM::MVE_VPNOT && 819 getVPTInstrPredicate(*Iter) == ARMVCC::None) { 820 Register VPNOTOperand = Iter->getOperand(1).getReg(); 821 if (VPNOTOperand == LastVPNOTResult || 822 VPNOTOperand == OppositeVCCRValue) { 823 IsInteresting = true; 824 825 std::swap(VCCRValue, OppositeVCCRValue); 826 LastVPNOTResult = Iter->getOperand(0).getReg(); 827 } 828 } 829 } 830 831 // If this instruction was not interesting, and it writes to VCCR, stop. 832 if (!IsInteresting && IsWritingToVCCR(*Iter)) 833 break; 834 } 835 } 836 837 for (MachineInstr *DeadInstruction : DeadInstructions) 838 DeadInstruction->eraseFromParent(); 839 840 return Modified; 841 } 842 843 // This optimisation replaces VCMPs with VPNOTs when they are equivalent. 844 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { 845 SmallVector<MachineInstr *, 4> DeadInstructions; 846 847 // The last VCMP that we have seen and that couldn't be replaced. 848 // This is reset when an instruction that writes to VCCR/VPR is found, or when 849 // a VCMP is replaced with a VPNOT. 850 // We'll only replace VCMPs with VPNOTs when this is not null, and when the 851 // current VCMP is the opposite of PrevVCMP. 852 MachineInstr *PrevVCMP = nullptr; 853 // If we find an instruction that kills the result of PrevVCMP, we save the 854 // operand here to remove the kill flag in case we need to use PrevVCMP's 855 // result. 856 MachineOperand *PrevVCMPResultKiller = nullptr; 857 858 for (MachineInstr &Instr : MBB.instrs()) { 859 if (PrevVCMP) { 860 if (MachineOperand *MO = 861 Instr.findRegisterUseOperand(PrevVCMP->getOperand(0).getReg(), 862 /*TRI=*/nullptr, /*isKill*/ true)) { 863 // If we come accross the instr that kills PrevVCMP's result, record it 864 // so we can remove the kill flag later if we need to. 865 PrevVCMPResultKiller = MO; 866 } 867 } 868 869 // Ignore predicated instructions. 870 if (getVPTInstrPredicate(Instr) != ARMVCC::None) 871 continue; 872 873 // Only look at VCMPs 874 if (!IsVCMP(Instr.getOpcode())) { 875 // If the instruction writes to VCCR, forget the previous VCMP. 876 if (IsWritingToVCCR(Instr)) 877 PrevVCMP = nullptr; 878 continue; 879 } 880 881 if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) { 882 PrevVCMP = &Instr; 883 continue; 884 } 885 886 // The register containing the result of the VCMP that we're going to 887 // replace. 888 Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg(); 889 890 // Build a VPNOT to replace the VCMP, reusing its operands. 891 MachineInstrBuilder MIBuilder = 892 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) 893 .add(Instr.getOperand(0)) 894 .addReg(PrevVCMPResultReg); 895 addUnpredicatedMveVpredNOp(MIBuilder); 896 LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): "; 897 MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: "; 898 Instr.dump()); 899 900 // If we found an instruction that uses, and kills PrevVCMP's result, 901 // remove the kill flag. 902 if (PrevVCMPResultKiller) 903 PrevVCMPResultKiller->setIsKill(false); 904 905 // Finally, mark the old VCMP for removal and reset 906 // PrevVCMP/PrevVCMPResultKiller. 907 DeadInstructions.push_back(&Instr); 908 PrevVCMP = nullptr; 909 PrevVCMPResultKiller = nullptr; 910 } 911 912 for (MachineInstr *DeadInstruction : DeadInstructions) 913 DeadInstruction->eraseFromParent(); 914 915 return !DeadInstructions.empty(); 916 } 917 918 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB, 919 MachineDominatorTree *DT) { 920 // Scan through the block, looking for instructions that use constants moves 921 // into VPR that are the negative of one another. These are expected to be 922 // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant 923 // mask is kept it or and VPNOT's of it are added or reused as we scan through 924 // the function. 925 unsigned LastVPTImm = 0; 926 Register LastVPTReg = 0; 927 SmallSet<MachineInstr *, 4> DeadInstructions; 928 929 for (MachineInstr &Instr : MBB.instrs()) { 930 // Look for predicated MVE instructions. 931 int PIdx = llvm::findFirstVPTPredOperandIdx(Instr); 932 if (PIdx == -1) 933 continue; 934 Register VPR = Instr.getOperand(PIdx + 1).getReg(); 935 if (!VPR.isVirtual()) 936 continue; 937 938 // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr. 939 MachineInstr *Copy = MRI->getVRegDef(VPR); 940 if (!Copy || Copy->getOpcode() != TargetOpcode::COPY || 941 !Copy->getOperand(1).getReg().isVirtual() || 942 MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) { 943 LastVPTReg = 0; 944 continue; 945 } 946 Register GPR = Copy->getOperand(1).getReg(); 947 948 // Find the Immediate used by the copy. 949 auto getImm = [&](Register GPR) -> unsigned { 950 MachineInstr *Def = MRI->getVRegDef(GPR); 951 if (Def && (Def->getOpcode() == ARM::t2MOVi || 952 Def->getOpcode() == ARM::t2MOVi16)) 953 return Def->getOperand(1).getImm(); 954 return -1U; 955 }; 956 unsigned Imm = getImm(GPR); 957 if (Imm == -1U) { 958 LastVPTReg = 0; 959 continue; 960 } 961 962 unsigned NotImm = ~Imm & 0xffff; 963 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) { 964 MRI->clearKillFlags(LastVPTReg); 965 Instr.getOperand(PIdx + 1).setReg(LastVPTReg); 966 if (MRI->use_empty(VPR)) { 967 DeadInstructions.insert(Copy); 968 if (MRI->hasOneUse(GPR)) 969 DeadInstructions.insert(MRI->getVRegDef(GPR)); 970 } 971 LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr); 972 VPR = LastVPTReg; 973 } else if (LastVPTReg != 0 && LastVPTImm == NotImm) { 974 // We have found the not of a previous constant. Create a VPNot of the 975 // earlier predicate reg and use it instead of the copy. 976 Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass); 977 auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(), 978 TII->get(ARM::MVE_VPNOT), NewVPR) 979 .addReg(LastVPTReg); 980 addUnpredicatedMveVpredNOp(VPNot); 981 982 // Use the new register and check if the def is now dead. 983 Instr.getOperand(PIdx + 1).setReg(NewVPR); 984 if (MRI->use_empty(VPR)) { 985 DeadInstructions.insert(Copy); 986 if (MRI->hasOneUse(GPR)) 987 DeadInstructions.insert(MRI->getVRegDef(GPR)); 988 } 989 LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at " 990 << Instr); 991 VPR = NewVPR; 992 } 993 994 LastVPTImm = Imm; 995 LastVPTReg = VPR; 996 } 997 998 for (MachineInstr *DI : DeadInstructions) 999 DI->eraseFromParent(); 1000 1001 return !DeadInstructions.empty(); 1002 } 1003 1004 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a 1005 // somewhat blunt approximation to allow tail predicated with vpsel 1006 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly 1007 // different semantics under tail predication. Until that is modelled we just 1008 // convert to a VMOVT (via a predicated VORR) instead. 1009 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) { 1010 bool HasVCTP = false; 1011 SmallVector<MachineInstr *, 4> DeadInstructions; 1012 1013 for (MachineInstr &MI : MBB.instrs()) { 1014 if (isVCTP(&MI)) { 1015 HasVCTP = true; 1016 continue; 1017 } 1018 1019 if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL) 1020 continue; 1021 1022 MachineInstrBuilder MIBuilder = 1023 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR)) 1024 .add(MI.getOperand(0)) 1025 .add(MI.getOperand(1)) 1026 .add(MI.getOperand(1)) 1027 .addImm(ARMVCC::Then) 1028 .add(MI.getOperand(4)) 1029 .add(MI.getOperand(5)) 1030 .add(MI.getOperand(2)); 1031 // Silence unused variable warning in release builds. 1032 (void)MIBuilder; 1033 LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump(); 1034 dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump()); 1035 DeadInstructions.push_back(&MI); 1036 } 1037 1038 for (MachineInstr *DeadInstruction : DeadInstructions) 1039 DeadInstruction->eraseFromParent(); 1040 1041 return !DeadInstructions.empty(); 1042 } 1043 1044 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as 1045 // the instruction may be removable as a noop. 1046 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) { 1047 bool Changed = false; 1048 for (MachineInstr &MI : MBB.instrs()) { 1049 if (MI.getOpcode() != ARM::t2DoLoopStart) 1050 continue; 1051 Register R = MI.getOperand(1).getReg(); 1052 MachineFunction *MF = MI.getParent()->getParent(); 1053 MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0); 1054 Changed = true; 1055 } 1056 return Changed; 1057 } 1058 1059 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { 1060 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); 1061 1062 if (!STI.isThumb2() || !STI.hasLOB()) 1063 return false; 1064 1065 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); 1066 MRI = &Fn.getRegInfo(); 1067 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 1068 MachineDominatorTree *DT = 1069 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 1070 1071 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n" 1072 << "********** Function: " << Fn.getName() << '\n'); 1073 1074 bool Modified = false; 1075 for (MachineLoop *ML : MLI->getLoopsInPreorder()) { 1076 Modified |= LowerWhileLoopStart(ML); 1077 Modified |= MergeLoopEnd(ML); 1078 Modified |= ConvertTailPredLoop(ML, DT); 1079 } 1080 1081 for (MachineBasicBlock &MBB : Fn) { 1082 Modified |= HintDoLoopStartReg(MBB); 1083 Modified |= ReplaceConstByVPNOTs(MBB, DT); 1084 Modified |= ReplaceVCMPsByVPNOTs(MBB); 1085 Modified |= ReduceOldVCCRValueUses(MBB); 1086 Modified |= ConvertVPSEL(MBB); 1087 } 1088 1089 LLVM_DEBUG(dbgs() << "**************************************\n"); 1090 return Modified; 1091 } 1092 1093 /// createMVETPAndVPTOptimisationsPass 1094 FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() { 1095 return new MVETPAndVPTOptimisations(); 1096 } 1097