xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // The Cortex-A15 processor employs a tracking scheme in its register renaming
100b57cec5SDimitry Andric // in order to process each instruction's micro-ops speculatively and
110b57cec5SDimitry Andric // out-of-order with appropriate forwarding. The ARM architecture allows VFP
120b57cec5SDimitry Andric // instructions to read and write 32-bit S-registers.  Each S-register
130b57cec5SDimitry Andric // corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // There are several instruction patterns which can be used to provide this
160b57cec5SDimitry Andric // capability which can provide higher performance than other, potentially more
170b57cec5SDimitry Andric // direct patterns, specifically around when one micro-op reads a D-register
180b57cec5SDimitry Andric // operand that has recently been written as one or more S-register results.
190b57cec5SDimitry Andric //
200b57cec5SDimitry Andric // This file defines a pre-regalloc pass which looks for SPR producers which
210b57cec5SDimitry Andric // are going to be used by a DPR (or QPR) consumers and creates the more
220b57cec5SDimitry Andric // optimized access pattern.
230b57cec5SDimitry Andric //
240b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric #include "ARM.h"
270b57cec5SDimitry Andric #include "ARMBaseInstrInfo.h"
280b57cec5SDimitry Andric #include "ARMBaseRegisterInfo.h"
290b57cec5SDimitry Andric #include "ARMSubtarget.h"
300b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
320b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
350b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
360b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
370b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
380b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
400b57cec5SDimitry Andric #include <map>
410b57cec5SDimitry Andric #include <set>
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric using namespace llvm;
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric #define DEBUG_TYPE "a15-sd-optimizer"
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric namespace {
480b57cec5SDimitry Andric   struct A15SDOptimizer : public MachineFunctionPass {
490b57cec5SDimitry Andric     static char ID;
A15SDOptimizer__anona34028960111::A15SDOptimizer500b57cec5SDimitry Andric     A15SDOptimizer() : MachineFunctionPass(ID) {}
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric     bool runOnMachineFunction(MachineFunction &Fn) override;
530b57cec5SDimitry Andric 
getPassName__anona34028960111::A15SDOptimizer540b57cec5SDimitry Andric     StringRef getPassName() const override { return "ARM A15 S->D optimizer"; }
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric   private:
570b57cec5SDimitry Andric     const ARMBaseInstrInfo *TII;
580b57cec5SDimitry Andric     const TargetRegisterInfo *TRI;
590b57cec5SDimitry Andric     MachineRegisterInfo *MRI;
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric     bool runOnInstruction(MachineInstr *MI);
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric     //
640b57cec5SDimitry Andric     // Instruction builder helpers
650b57cec5SDimitry Andric     //
660b57cec5SDimitry Andric     unsigned createDupLane(MachineBasicBlock &MBB,
670b57cec5SDimitry Andric                            MachineBasicBlock::iterator InsertBefore,
680b57cec5SDimitry Andric                            const DebugLoc &DL, unsigned Reg, unsigned Lane,
690b57cec5SDimitry Andric                            bool QPR = false);
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric     unsigned createExtractSubreg(MachineBasicBlock &MBB,
720b57cec5SDimitry Andric                                  MachineBasicBlock::iterator InsertBefore,
730b57cec5SDimitry Andric                                  const DebugLoc &DL, unsigned DReg,
740b57cec5SDimitry Andric                                  unsigned Lane, const TargetRegisterClass *TRC);
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric     unsigned createVExt(MachineBasicBlock &MBB,
770b57cec5SDimitry Andric                         MachineBasicBlock::iterator InsertBefore,
780b57cec5SDimitry Andric                         const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1);
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric     unsigned createRegSequence(MachineBasicBlock &MBB,
810b57cec5SDimitry Andric                                MachineBasicBlock::iterator InsertBefore,
820b57cec5SDimitry Andric                                const DebugLoc &DL, unsigned Reg1,
830b57cec5SDimitry Andric                                unsigned Reg2);
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric     unsigned createInsertSubreg(MachineBasicBlock &MBB,
860b57cec5SDimitry Andric                                 MachineBasicBlock::iterator InsertBefore,
870b57cec5SDimitry Andric                                 const DebugLoc &DL, unsigned DReg,
880b57cec5SDimitry Andric                                 unsigned Lane, unsigned ToInsert);
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric     unsigned createImplicitDef(MachineBasicBlock &MBB,
910b57cec5SDimitry Andric                                MachineBasicBlock::iterator InsertBefore,
920b57cec5SDimitry Andric                                const DebugLoc &DL);
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric     //
950b57cec5SDimitry Andric     // Various property checkers
960b57cec5SDimitry Andric     //
970b57cec5SDimitry Andric     bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
980b57cec5SDimitry Andric     bool hasPartialWrite(MachineInstr *MI);
990b57cec5SDimitry Andric     SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
1000b57cec5SDimitry Andric     unsigned getDPRLaneFromSPR(unsigned SReg);
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric     //
1030b57cec5SDimitry Andric     // Methods used for getting the definitions of partial registers
1040b57cec5SDimitry Andric     //
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric     MachineInstr *elideCopies(MachineInstr *MI);
1070b57cec5SDimitry Andric     void elideCopiesAndPHIs(MachineInstr *MI,
1080b57cec5SDimitry Andric                             SmallVectorImpl<MachineInstr*> &Outs);
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric     //
1110b57cec5SDimitry Andric     // Pattern optimization methods
1120b57cec5SDimitry Andric     //
1130b57cec5SDimitry Andric     unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
1140b57cec5SDimitry Andric     unsigned optimizeSDPattern(MachineInstr *MI);
1150b57cec5SDimitry Andric     unsigned getPrefSPRLane(unsigned SReg);
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric     //
1180b57cec5SDimitry Andric     // Sanitizing method - used to make sure if don't leave dead code around.
1190b57cec5SDimitry Andric     //
1200b57cec5SDimitry Andric     void eraseInstrWithNoUses(MachineInstr *MI);
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric     //
1230b57cec5SDimitry Andric     // A map used to track the changes done by this pass.
1240b57cec5SDimitry Andric     //
1250b57cec5SDimitry Andric     std::map<MachineInstr*, unsigned> Replacements;
1260b57cec5SDimitry Andric     std::set<MachineInstr *> DeadInstr;
1270b57cec5SDimitry Andric   };
1280b57cec5SDimitry Andric   char A15SDOptimizer::ID = 0;
1290b57cec5SDimitry Andric } // end anonymous namespace
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric // Returns true if this is a use of a SPR register.
usesRegClass(MachineOperand & MO,const TargetRegisterClass * TRC)1320b57cec5SDimitry Andric bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
1330b57cec5SDimitry Andric                                   const TargetRegisterClass *TRC) {
1340b57cec5SDimitry Andric   if (!MO.isReg())
1350b57cec5SDimitry Andric     return false;
1368bcb0991SDimitry Andric   Register Reg = MO.getReg();
1370b57cec5SDimitry Andric 
138bdd1243dSDimitry Andric   if (Reg.isVirtual())
1390b57cec5SDimitry Andric     return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
1400b57cec5SDimitry Andric   else
1410b57cec5SDimitry Andric     return TRC->contains(Reg);
1420b57cec5SDimitry Andric }
1430b57cec5SDimitry Andric 
getDPRLaneFromSPR(unsigned SReg)1440b57cec5SDimitry Andric unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
1450b57cec5SDimitry Andric   unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
1460b57cec5SDimitry Andric                                            &ARM::DPRRegClass);
1470b57cec5SDimitry Andric   if (DReg != ARM::NoRegister) return ARM::ssub_1;
1480b57cec5SDimitry Andric   return ARM::ssub_0;
1490b57cec5SDimitry Andric }
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric // Get the subreg type that is most likely to be coalesced
1520b57cec5SDimitry Andric // for an SPR register that will be used in VDUP32d pseudo.
getPrefSPRLane(unsigned SReg)1530b57cec5SDimitry Andric unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
1548bcb0991SDimitry Andric   if (!Register::isVirtualRegister(SReg))
1550b57cec5SDimitry Andric     return getDPRLaneFromSPR(SReg);
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric   MachineInstr *MI = MRI->getVRegDef(SReg);
1580b57cec5SDimitry Andric   if (!MI) return ARM::ssub_0;
159*0fca6ea1SDimitry Andric   MachineOperand *MO = MI->findRegisterDefOperand(SReg, /*TRI=*/nullptr);
1600b57cec5SDimitry Andric   if (!MO) return ARM::ssub_0;
161480093f4SDimitry Andric   assert(MO->isReg() && "Non-register operand found!");
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric   if (MI->isCopy() && usesRegClass(MI->getOperand(1),
1640b57cec5SDimitry Andric                                     &ARM::SPRRegClass)) {
1650b57cec5SDimitry Andric     SReg = MI->getOperand(1).getReg();
1660b57cec5SDimitry Andric   }
1670b57cec5SDimitry Andric 
1688bcb0991SDimitry Andric   if (Register::isVirtualRegister(SReg)) {
1690b57cec5SDimitry Andric     if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
1700b57cec5SDimitry Andric     return ARM::ssub_0;
1710b57cec5SDimitry Andric   }
1720b57cec5SDimitry Andric   return getDPRLaneFromSPR(SReg);
1730b57cec5SDimitry Andric }
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric // MI is known to be dead. Figure out what instructions
1760b57cec5SDimitry Andric // are also made dead by this and mark them for removal.
eraseInstrWithNoUses(MachineInstr * MI)1770b57cec5SDimitry Andric void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
1780b57cec5SDimitry Andric   SmallVector<MachineInstr *, 8> Front;
1790b57cec5SDimitry Andric   DeadInstr.insert(MI);
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
1820b57cec5SDimitry Andric   Front.push_back(MI);
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   while (Front.size() != 0) {
185349cc55cSDimitry Andric     MI = Front.pop_back_val();
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric     // MI is already known to be dead. We need to see
1880b57cec5SDimitry Andric     // if other instructions can also be removed.
1890b57cec5SDimitry Andric     for (MachineOperand &MO : MI->operands()) {
1900b57cec5SDimitry Andric       if ((!MO.isReg()) || (!MO.isUse()))
1910b57cec5SDimitry Andric         continue;
1928bcb0991SDimitry Andric       Register Reg = MO.getReg();
193bdd1243dSDimitry Andric       if (!Reg.isVirtual())
1940b57cec5SDimitry Andric         continue;
195*0fca6ea1SDimitry Andric       MachineOperand *Op = MI->findRegisterDefOperand(Reg, /*TRI=*/nullptr);
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric       if (!Op)
1980b57cec5SDimitry Andric         continue;
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric       MachineInstr *Def = Op->getParent();
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric       // We don't need to do anything if we have already marked
2030b57cec5SDimitry Andric       // this instruction as being dead.
2040b57cec5SDimitry Andric       if (DeadInstr.find(Def) != DeadInstr.end())
2050b57cec5SDimitry Andric         continue;
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric       // Check if all the uses of this instruction are marked as
2080b57cec5SDimitry Andric       // dead. If so, we can also mark this instruction as being
2090b57cec5SDimitry Andric       // dead.
2100b57cec5SDimitry Andric       bool IsDead = true;
2110b57cec5SDimitry Andric       for (MachineOperand &MODef : Def->operands()) {
2120b57cec5SDimitry Andric         if ((!MODef.isReg()) || (!MODef.isDef()))
2130b57cec5SDimitry Andric           continue;
2148bcb0991SDimitry Andric         Register DefReg = MODef.getReg();
215bdd1243dSDimitry Andric         if (!DefReg.isVirtual()) {
2160b57cec5SDimitry Andric           IsDead = false;
2170b57cec5SDimitry Andric           break;
2180b57cec5SDimitry Andric         }
2190b57cec5SDimitry Andric         for (MachineInstr &Use : MRI->use_instructions(Reg)) {
2200b57cec5SDimitry Andric           // We don't care about self references.
2210b57cec5SDimitry Andric           if (&Use == Def)
2220b57cec5SDimitry Andric             continue;
2230b57cec5SDimitry Andric           if (DeadInstr.find(&Use) == DeadInstr.end()) {
2240b57cec5SDimitry Andric             IsDead = false;
2250b57cec5SDimitry Andric             break;
2260b57cec5SDimitry Andric           }
2270b57cec5SDimitry Andric         }
2280b57cec5SDimitry Andric       }
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric       if (!IsDead) continue;
2310b57cec5SDimitry Andric 
2320b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
2330b57cec5SDimitry Andric       DeadInstr.insert(Def);
2340b57cec5SDimitry Andric     }
2350b57cec5SDimitry Andric   }
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric // Creates the more optimized patterns and generally does all the code
2390b57cec5SDimitry Andric // transformations in this pass.
optimizeSDPattern(MachineInstr * MI)2400b57cec5SDimitry Andric unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
2410b57cec5SDimitry Andric   if (MI->isCopy()) {
2420b57cec5SDimitry Andric     return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
2430b57cec5SDimitry Andric   }
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric   if (MI->isInsertSubreg()) {
2468bcb0991SDimitry Andric     Register DPRReg = MI->getOperand(1).getReg();
2478bcb0991SDimitry Andric     Register SPRReg = MI->getOperand(2).getReg();
2480b57cec5SDimitry Andric 
249bdd1243dSDimitry Andric     if (DPRReg.isVirtual() && SPRReg.isVirtual()) {
2500b57cec5SDimitry Andric       MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
2510b57cec5SDimitry Andric       MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric       if (DPRMI && SPRMI) {
2540b57cec5SDimitry Andric         // See if the first operand of this insert_subreg is IMPLICIT_DEF
2550b57cec5SDimitry Andric         MachineInstr *ECDef = elideCopies(DPRMI);
2560b57cec5SDimitry Andric         if (ECDef && ECDef->isImplicitDef()) {
2570b57cec5SDimitry Andric           // Another corner case - if we're inserting something that is purely
2580b57cec5SDimitry Andric           // a subreg copy of a DPR, just use that DPR.
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric           MachineInstr *EC = elideCopies(SPRMI);
2610b57cec5SDimitry Andric           // Is it a subreg copy of ssub_0?
2620b57cec5SDimitry Andric           if (EC && EC->isCopy() &&
2630b57cec5SDimitry Andric               EC->getOperand(1).getSubReg() == ARM::ssub_0) {
2640b57cec5SDimitry Andric             LLVM_DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
2650b57cec5SDimitry Andric 
2660b57cec5SDimitry Andric             // Find the thing we're subreg copying out of - is it of the same
2670b57cec5SDimitry Andric             // regclass as DPRMI? (i.e. a DPR or QPR).
2688bcb0991SDimitry Andric             Register FullReg = SPRMI->getOperand(1).getReg();
2690b57cec5SDimitry Andric             const TargetRegisterClass *TRC =
2700b57cec5SDimitry Andric               MRI->getRegClass(MI->getOperand(1).getReg());
2710b57cec5SDimitry Andric             if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
2720b57cec5SDimitry Andric               LLVM_DEBUG(dbgs() << "Subreg copy is compatible - returning ");
2730b57cec5SDimitry Andric               LLVM_DEBUG(dbgs() << printReg(FullReg) << "\n");
2740b57cec5SDimitry Andric               eraseInstrWithNoUses(MI);
2750b57cec5SDimitry Andric               return FullReg;
2760b57cec5SDimitry Andric             }
2770b57cec5SDimitry Andric           }
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric           return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
2800b57cec5SDimitry Andric         }
2810b57cec5SDimitry Andric       }
2820b57cec5SDimitry Andric     }
2830b57cec5SDimitry Andric     return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
2840b57cec5SDimitry Andric   }
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric   if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
2870b57cec5SDimitry Andric                                           &ARM::SPRRegClass)) {
2880b57cec5SDimitry Andric     // See if all bar one of the operands are IMPLICIT_DEF and insert the
2890b57cec5SDimitry Andric     // optimizer pattern accordingly.
2900b57cec5SDimitry Andric     unsigned NumImplicit = 0, NumTotal = 0;
2910b57cec5SDimitry Andric     unsigned NonImplicitReg = ~0U;
2920b57cec5SDimitry Andric 
293bdd1243dSDimitry Andric     for (MachineOperand &MO : llvm::drop_begin(MI->explicit_operands())) {
294bdd1243dSDimitry Andric       if (!MO.isReg())
2950b57cec5SDimitry Andric         continue;
2960b57cec5SDimitry Andric       ++NumTotal;
297bdd1243dSDimitry Andric       Register OpReg = MO.getReg();
2980b57cec5SDimitry Andric 
299bdd1243dSDimitry Andric       if (!OpReg.isVirtual())
3000b57cec5SDimitry Andric         break;
3010b57cec5SDimitry Andric 
3020b57cec5SDimitry Andric       MachineInstr *Def = MRI->getVRegDef(OpReg);
3030b57cec5SDimitry Andric       if (!Def)
3040b57cec5SDimitry Andric         break;
3050b57cec5SDimitry Andric       if (Def->isImplicitDef())
3060b57cec5SDimitry Andric         ++NumImplicit;
3070b57cec5SDimitry Andric       else
308bdd1243dSDimitry Andric         NonImplicitReg = MO.getReg();
3090b57cec5SDimitry Andric     }
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric     if (NumImplicit == NumTotal - 1)
3120b57cec5SDimitry Andric       return optimizeAllLanesPattern(MI, NonImplicitReg);
3130b57cec5SDimitry Andric     else
3140b57cec5SDimitry Andric       return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
3150b57cec5SDimitry Andric   }
3160b57cec5SDimitry Andric 
3170b57cec5SDimitry Andric   llvm_unreachable("Unhandled update pattern!");
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric 
3200b57cec5SDimitry Andric // Return true if this MachineInstr inserts a scalar (SPR) value into
3210b57cec5SDimitry Andric // a D or Q register.
hasPartialWrite(MachineInstr * MI)3220b57cec5SDimitry Andric bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
3230b57cec5SDimitry Andric   // The only way we can do a partial register update is through a COPY,
3240b57cec5SDimitry Andric   // INSERT_SUBREG or REG_SEQUENCE.
3250b57cec5SDimitry Andric   if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
3260b57cec5SDimitry Andric     return true;
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric   if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
3290b57cec5SDimitry Andric                                            &ARM::SPRRegClass))
3300b57cec5SDimitry Andric     return true;
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric   if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
3330b57cec5SDimitry Andric     return true;
3340b57cec5SDimitry Andric 
3350b57cec5SDimitry Andric   return false;
3360b57cec5SDimitry Andric }
3370b57cec5SDimitry Andric 
3380b57cec5SDimitry Andric // Looks through full copies to get the instruction that defines the input
3390b57cec5SDimitry Andric // operand for MI.
elideCopies(MachineInstr * MI)3400b57cec5SDimitry Andric MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
3410b57cec5SDimitry Andric   if (!MI->isFullCopy())
3420b57cec5SDimitry Andric     return MI;
343bdd1243dSDimitry Andric   if (!MI->getOperand(1).getReg().isVirtual())
3440b57cec5SDimitry Andric     return nullptr;
3450b57cec5SDimitry Andric   MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
3460b57cec5SDimitry Andric   if (!Def)
3470b57cec5SDimitry Andric     return nullptr;
3480b57cec5SDimitry Andric   return elideCopies(Def);
3490b57cec5SDimitry Andric }
3500b57cec5SDimitry Andric 
3510b57cec5SDimitry Andric // Look through full copies and PHIs to get the set of non-copy MachineInstrs
3520b57cec5SDimitry Andric // that can produce MI.
elideCopiesAndPHIs(MachineInstr * MI,SmallVectorImpl<MachineInstr * > & Outs)3530b57cec5SDimitry Andric void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
3540b57cec5SDimitry Andric                                         SmallVectorImpl<MachineInstr*> &Outs) {
3550b57cec5SDimitry Andric    // Looking through PHIs may create loops so we need to track what
3560b57cec5SDimitry Andric    // instructions we have visited before.
3570b57cec5SDimitry Andric    std::set<MachineInstr *> Reached;
3580b57cec5SDimitry Andric    SmallVector<MachineInstr *, 8> Front;
3590b57cec5SDimitry Andric    Front.push_back(MI);
3600b57cec5SDimitry Andric    while (Front.size() != 0) {
361e8d8bef9SDimitry Andric      MI = Front.pop_back_val();
3620b57cec5SDimitry Andric 
3630b57cec5SDimitry Andric      // If we have already explored this MachineInstr, ignore it.
36481ad6265SDimitry Andric      if (!Reached.insert(MI).second)
3650b57cec5SDimitry Andric        continue;
3660b57cec5SDimitry Andric      if (MI->isPHI()) {
3670b57cec5SDimitry Andric        for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
3688bcb0991SDimitry Andric          Register Reg = MI->getOperand(I).getReg();
369bdd1243dSDimitry Andric          if (!Reg.isVirtual()) {
3700b57cec5SDimitry Andric            continue;
3710b57cec5SDimitry Andric          }
3720b57cec5SDimitry Andric          MachineInstr *NewMI = MRI->getVRegDef(Reg);
3730b57cec5SDimitry Andric          if (!NewMI)
3740b57cec5SDimitry Andric            continue;
3750b57cec5SDimitry Andric          Front.push_back(NewMI);
3760b57cec5SDimitry Andric        }
3770b57cec5SDimitry Andric      } else if (MI->isFullCopy()) {
378bdd1243dSDimitry Andric        if (!MI->getOperand(1).getReg().isVirtual())
3790b57cec5SDimitry Andric          continue;
3800b57cec5SDimitry Andric        MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
3810b57cec5SDimitry Andric        if (!NewMI)
3820b57cec5SDimitry Andric          continue;
3830b57cec5SDimitry Andric        Front.push_back(NewMI);
3840b57cec5SDimitry Andric      } else {
3850b57cec5SDimitry Andric        LLVM_DEBUG(dbgs() << "Found partial copy" << *MI << "\n");
3860b57cec5SDimitry Andric        Outs.push_back(MI);
3870b57cec5SDimitry Andric      }
3880b57cec5SDimitry Andric    }
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric // Return the DPR virtual registers that are read by this machine instruction
3920b57cec5SDimitry Andric // (if any).
getReadDPRs(MachineInstr * MI)3930b57cec5SDimitry Andric SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
3940b57cec5SDimitry Andric   if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
3950b57cec5SDimitry Andric       MI->isKill())
3960b57cec5SDimitry Andric     return SmallVector<unsigned, 8>();
3970b57cec5SDimitry Andric 
3980b57cec5SDimitry Andric   SmallVector<unsigned, 8> Defs;
3990b57cec5SDimitry Andric   for (MachineOperand &MO : MI->operands()) {
4000b57cec5SDimitry Andric     if (!MO.isReg() || !MO.isUse())
4010b57cec5SDimitry Andric       continue;
4020b57cec5SDimitry Andric     if (!usesRegClass(MO, &ARM::DPRRegClass) &&
4030b57cec5SDimitry Andric         !usesRegClass(MO, &ARM::QPRRegClass) &&
4040b57cec5SDimitry Andric         !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
4050b57cec5SDimitry Andric       continue;
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric     Defs.push_back(MO.getReg());
4080b57cec5SDimitry Andric   }
4090b57cec5SDimitry Andric   return Defs;
4100b57cec5SDimitry Andric }
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric // Creates a DPR register from an SPR one by using a VDUP.
createDupLane(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Reg,unsigned Lane,bool QPR)4130b57cec5SDimitry Andric unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
4140b57cec5SDimitry Andric                                        MachineBasicBlock::iterator InsertBefore,
4150b57cec5SDimitry Andric                                        const DebugLoc &DL, unsigned Reg,
4160b57cec5SDimitry Andric                                        unsigned Lane, bool QPR) {
4178bcb0991SDimitry Andric   Register Out =
4188bcb0991SDimitry Andric       MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass);
4190b57cec5SDimitry Andric   BuildMI(MBB, InsertBefore, DL,
4200b57cec5SDimitry Andric           TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
4210b57cec5SDimitry Andric       .addReg(Reg)
4220b57cec5SDimitry Andric       .addImm(Lane)
4230b57cec5SDimitry Andric       .add(predOps(ARMCC::AL));
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric   return Out;
4260b57cec5SDimitry Andric }
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric // Creates a SPR register from a DPR by copying the value in lane 0.
createExtractSubreg(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned DReg,unsigned Lane,const TargetRegisterClass * TRC)4290b57cec5SDimitry Andric unsigned A15SDOptimizer::createExtractSubreg(
4300b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
4310b57cec5SDimitry Andric     const DebugLoc &DL, unsigned DReg, unsigned Lane,
4320b57cec5SDimitry Andric     const TargetRegisterClass *TRC) {
4338bcb0991SDimitry Andric   Register Out = MRI->createVirtualRegister(TRC);
4340b57cec5SDimitry Andric   BuildMI(MBB,
4350b57cec5SDimitry Andric           InsertBefore,
4360b57cec5SDimitry Andric           DL,
4370b57cec5SDimitry Andric           TII->get(TargetOpcode::COPY), Out)
4380b57cec5SDimitry Andric     .addReg(DReg, 0, Lane);
4390b57cec5SDimitry Andric 
4400b57cec5SDimitry Andric   return Out;
4410b57cec5SDimitry Andric }
4420b57cec5SDimitry Andric 
4430b57cec5SDimitry Andric // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
createRegSequence(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Reg1,unsigned Reg2)4440b57cec5SDimitry Andric unsigned A15SDOptimizer::createRegSequence(
4450b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
4460b57cec5SDimitry Andric     const DebugLoc &DL, unsigned Reg1, unsigned Reg2) {
4478bcb0991SDimitry Andric   Register Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
4480b57cec5SDimitry Andric   BuildMI(MBB,
4490b57cec5SDimitry Andric           InsertBefore,
4500b57cec5SDimitry Andric           DL,
4510b57cec5SDimitry Andric           TII->get(TargetOpcode::REG_SEQUENCE), Out)
4520b57cec5SDimitry Andric     .addReg(Reg1)
4530b57cec5SDimitry Andric     .addImm(ARM::dsub_0)
4540b57cec5SDimitry Andric     .addReg(Reg2)
4550b57cec5SDimitry Andric     .addImm(ARM::dsub_1);
4560b57cec5SDimitry Andric   return Out;
4570b57cec5SDimitry Andric }
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
4600b57cec5SDimitry Andric // and merges them into one DPR register.
createVExt(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Ssub0,unsigned Ssub1)4610b57cec5SDimitry Andric unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
4620b57cec5SDimitry Andric                                     MachineBasicBlock::iterator InsertBefore,
4630b57cec5SDimitry Andric                                     const DebugLoc &DL, unsigned Ssub0,
4640b57cec5SDimitry Andric                                     unsigned Ssub1) {
4658bcb0991SDimitry Andric   Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
4660b57cec5SDimitry Andric   BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
4670b57cec5SDimitry Andric       .addReg(Ssub0)
4680b57cec5SDimitry Andric       .addReg(Ssub1)
4690b57cec5SDimitry Andric       .addImm(1)
4700b57cec5SDimitry Andric       .add(predOps(ARMCC::AL));
4710b57cec5SDimitry Andric   return Out;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric 
createInsertSubreg(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned DReg,unsigned Lane,unsigned ToInsert)4740b57cec5SDimitry Andric unsigned A15SDOptimizer::createInsertSubreg(
4750b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
4760b57cec5SDimitry Andric     const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) {
4778bcb0991SDimitry Andric   Register Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
4780b57cec5SDimitry Andric   BuildMI(MBB,
4790b57cec5SDimitry Andric           InsertBefore,
4800b57cec5SDimitry Andric           DL,
4810b57cec5SDimitry Andric           TII->get(TargetOpcode::INSERT_SUBREG), Out)
4820b57cec5SDimitry Andric     .addReg(DReg)
4830b57cec5SDimitry Andric     .addReg(ToInsert)
4840b57cec5SDimitry Andric     .addImm(Lane);
4850b57cec5SDimitry Andric 
4860b57cec5SDimitry Andric   return Out;
4870b57cec5SDimitry Andric }
4880b57cec5SDimitry Andric 
4890b57cec5SDimitry Andric unsigned
createImplicitDef(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL)4900b57cec5SDimitry Andric A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
4910b57cec5SDimitry Andric                                   MachineBasicBlock::iterator InsertBefore,
4920b57cec5SDimitry Andric                                   const DebugLoc &DL) {
4938bcb0991SDimitry Andric   Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
4940b57cec5SDimitry Andric   BuildMI(MBB,
4950b57cec5SDimitry Andric           InsertBefore,
4960b57cec5SDimitry Andric           DL,
4970b57cec5SDimitry Andric           TII->get(TargetOpcode::IMPLICIT_DEF), Out);
4980b57cec5SDimitry Andric   return Out;
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric // This function inserts instructions in order to optimize interactions between
5020b57cec5SDimitry Andric // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
5030b57cec5SDimitry Andric // lanes, and the using VEXT instructions to recompose the result.
5040b57cec5SDimitry Andric unsigned
optimizeAllLanesPattern(MachineInstr * MI,unsigned Reg)5050b57cec5SDimitry Andric A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
5060b57cec5SDimitry Andric   MachineBasicBlock::iterator InsertPt(MI);
5070b57cec5SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
5080b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
5090b57cec5SDimitry Andric   InsertPt++;
5100b57cec5SDimitry Andric   unsigned Out;
5110b57cec5SDimitry Andric 
5120b57cec5SDimitry Andric   // DPair has the same length as QPR and also has two DPRs as subreg.
5130b57cec5SDimitry Andric   // Treat DPair as QPR.
5140b57cec5SDimitry Andric   if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
5150b57cec5SDimitry Andric       MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
5160b57cec5SDimitry Andric     unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
5170b57cec5SDimitry Andric                                          ARM::dsub_0, &ARM::DPRRegClass);
5180b57cec5SDimitry Andric     unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
5190b57cec5SDimitry Andric                                          ARM::dsub_1, &ARM::DPRRegClass);
5200b57cec5SDimitry Andric 
5210b57cec5SDimitry Andric     unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
5220b57cec5SDimitry Andric     unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
5230b57cec5SDimitry Andric     Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
5240b57cec5SDimitry Andric 
5250b57cec5SDimitry Andric     unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
5260b57cec5SDimitry Andric     unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
5270b57cec5SDimitry Andric     Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
5280b57cec5SDimitry Andric 
5290b57cec5SDimitry Andric     Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
5300b57cec5SDimitry Andric 
5310b57cec5SDimitry Andric   } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
5320b57cec5SDimitry Andric     unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
5330b57cec5SDimitry Andric     unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
5340b57cec5SDimitry Andric     Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric   } else {
5370b57cec5SDimitry Andric     assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
5380b57cec5SDimitry Andric            "Found unexpected regclass!");
5390b57cec5SDimitry Andric 
5400b57cec5SDimitry Andric     unsigned PrefLane = getPrefSPRLane(Reg);
5410b57cec5SDimitry Andric     unsigned Lane;
5420b57cec5SDimitry Andric     switch (PrefLane) {
5430b57cec5SDimitry Andric       case ARM::ssub_0: Lane = 0; break;
5440b57cec5SDimitry Andric       case ARM::ssub_1: Lane = 1; break;
5450b57cec5SDimitry Andric       default: llvm_unreachable("Unknown preferred lane!");
5460b57cec5SDimitry Andric     }
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric     // Treat DPair as QPR
5490b57cec5SDimitry Andric     bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
5500b57cec5SDimitry Andric                    usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric     Out = createImplicitDef(MBB, InsertPt, DL);
5530b57cec5SDimitry Andric     Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
5540b57cec5SDimitry Andric     Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
5550b57cec5SDimitry Andric     eraseInstrWithNoUses(MI);
5560b57cec5SDimitry Andric   }
5570b57cec5SDimitry Andric   return Out;
5580b57cec5SDimitry Andric }
5590b57cec5SDimitry Andric 
runOnInstruction(MachineInstr * MI)5600b57cec5SDimitry Andric bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
5610b57cec5SDimitry Andric   // We look for instructions that write S registers that are then read as
5620b57cec5SDimitry Andric   // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
5630b57cec5SDimitry Andric   // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
5640b57cec5SDimitry Andric   // merge two SPR values to form a DPR register.  In order avoid false
5650b57cec5SDimitry Andric   // positives we make sure that there is an SPR producer so we look past
5660b57cec5SDimitry Andric   // COPY and PHI nodes to find it.
5670b57cec5SDimitry Andric   //
5680b57cec5SDimitry Andric   // The best code pattern for when an SPR producer is going to be used by a
5690b57cec5SDimitry Andric   // DPR or QPR consumer depends on whether the other lanes of the
5700b57cec5SDimitry Andric   // corresponding DPR/QPR are currently defined.
5710b57cec5SDimitry Andric   //
5720b57cec5SDimitry Andric   // We can handle these efficiently, depending on the type of
5730b57cec5SDimitry Andric   // pseudo-instruction that is producing the pattern
5740b57cec5SDimitry Andric   //
5750b57cec5SDimitry Andric   //   * COPY:          * VDUP all lanes and merge the results together
5760b57cec5SDimitry Andric   //                      using VEXTs.
5770b57cec5SDimitry Andric   //
5780b57cec5SDimitry Andric   //   * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
5790b57cec5SDimitry Andric   //                      lane, and the other lane(s) of the DPR/QPR register
5800b57cec5SDimitry Andric   //                      that we are inserting in are undefined, use the
5810b57cec5SDimitry Andric   //                      original DPR/QPR value.
5820b57cec5SDimitry Andric   //                    * Otherwise, fall back on the same stategy as COPY.
5830b57cec5SDimitry Andric   //
5840b57cec5SDimitry Andric   //   * REG_SEQUENCE:  * If all except one of the input operands are
5850b57cec5SDimitry Andric   //                      IMPLICIT_DEFs, insert the VDUP pattern for just the
5860b57cec5SDimitry Andric   //                      defined input operand
5870b57cec5SDimitry Andric   //                    * Otherwise, fall back on the same stategy as COPY.
5880b57cec5SDimitry Andric   //
5890b57cec5SDimitry Andric 
5900b57cec5SDimitry Andric   // First, get all the reads of D-registers done by this instruction.
5910b57cec5SDimitry Andric   SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
5920b57cec5SDimitry Andric   bool Modified = false;
5930b57cec5SDimitry Andric 
5940eae32dcSDimitry Andric   for (unsigned I : Defs) {
5950b57cec5SDimitry Andric     // Follow the def-use chain for this DPR through COPYs, and also through
5960b57cec5SDimitry Andric     // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
5970b57cec5SDimitry Andric     // we can end up with multiple defs of this DPR.
5980b57cec5SDimitry Andric 
5990b57cec5SDimitry Andric     SmallVector<MachineInstr *, 8> DefSrcs;
6000eae32dcSDimitry Andric     if (!Register::isVirtualRegister(I))
6010b57cec5SDimitry Andric       continue;
6020eae32dcSDimitry Andric     MachineInstr *Def = MRI->getVRegDef(I);
6030b57cec5SDimitry Andric     if (!Def)
6040b57cec5SDimitry Andric       continue;
6050b57cec5SDimitry Andric 
6060b57cec5SDimitry Andric     elideCopiesAndPHIs(Def, DefSrcs);
6070b57cec5SDimitry Andric 
6080b57cec5SDimitry Andric     for (MachineInstr *MI : DefSrcs) {
6090b57cec5SDimitry Andric       // If we've already analyzed and replaced this operand, don't do
6100b57cec5SDimitry Andric       // anything.
6110b57cec5SDimitry Andric       if (Replacements.find(MI) != Replacements.end())
6120b57cec5SDimitry Andric         continue;
6130b57cec5SDimitry Andric 
6140b57cec5SDimitry Andric       // Now, work out if the instruction causes a SPR->DPR dependency.
6150b57cec5SDimitry Andric       if (!hasPartialWrite(MI))
6160b57cec5SDimitry Andric         continue;
6170b57cec5SDimitry Andric 
6180b57cec5SDimitry Andric       // Collect all the uses of this MI's DPR def for updating later.
6190b57cec5SDimitry Andric       SmallVector<MachineOperand*, 8> Uses;
6208bcb0991SDimitry Andric       Register DPRDefReg = MI->getOperand(0).getReg();
621349cc55cSDimitry Andric       for (MachineOperand &MO : MRI->use_operands(DPRDefReg))
622349cc55cSDimitry Andric         Uses.push_back(&MO);
6230b57cec5SDimitry Andric 
6240b57cec5SDimitry Andric       // We can optimize this.
6250b57cec5SDimitry Andric       unsigned NewReg = optimizeSDPattern(MI);
6260b57cec5SDimitry Andric 
6270b57cec5SDimitry Andric       if (NewReg != 0) {
6280b57cec5SDimitry Andric         Modified = true;
6290eae32dcSDimitry Andric         for (MachineOperand *Use : Uses) {
6300b57cec5SDimitry Andric           // Make sure to constrain the register class of the new register to
6310b57cec5SDimitry Andric           // match what we're replacing. Otherwise we can optimize a DPR_VFP2
6320b57cec5SDimitry Andric           // reference into a plain DPR, and that will end poorly. NewReg is
6330b57cec5SDimitry Andric           // always virtual here, so there will always be a matching subclass
6340b57cec5SDimitry Andric           // to find.
6350eae32dcSDimitry Andric           MRI->constrainRegClass(NewReg, MRI->getRegClass(Use->getReg()));
6360b57cec5SDimitry Andric 
6370eae32dcSDimitry Andric           LLVM_DEBUG(dbgs() << "Replacing operand " << *Use << " with "
6380b57cec5SDimitry Andric                             << printReg(NewReg) << "\n");
6390eae32dcSDimitry Andric           Use->substVirtReg(NewReg, 0, *TRI);
6400b57cec5SDimitry Andric         }
6410b57cec5SDimitry Andric       }
6420b57cec5SDimitry Andric       Replacements[MI] = NewReg;
6430b57cec5SDimitry Andric     }
6440b57cec5SDimitry Andric   }
6450b57cec5SDimitry Andric   return Modified;
6460b57cec5SDimitry Andric }
6470b57cec5SDimitry Andric 
runOnMachineFunction(MachineFunction & Fn)6480b57cec5SDimitry Andric bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
6490b57cec5SDimitry Andric   if (skipFunction(Fn.getFunction()))
6500b57cec5SDimitry Andric     return false;
6510b57cec5SDimitry Andric 
6520b57cec5SDimitry Andric   const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
6530b57cec5SDimitry Andric   // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
6540b57cec5SDimitry Andric   // enabled when NEON is available.
6550b57cec5SDimitry Andric   if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
6560b57cec5SDimitry Andric     return false;
6570b57cec5SDimitry Andric 
6580b57cec5SDimitry Andric   TII = STI.getInstrInfo();
6590b57cec5SDimitry Andric   TRI = STI.getRegisterInfo();
6600b57cec5SDimitry Andric   MRI = &Fn.getRegInfo();
6610b57cec5SDimitry Andric   bool Modified = false;
6620b57cec5SDimitry Andric 
6630b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Running on function " << Fn.getName() << "\n");
6640b57cec5SDimitry Andric 
6650b57cec5SDimitry Andric   DeadInstr.clear();
6660b57cec5SDimitry Andric   Replacements.clear();
6670b57cec5SDimitry Andric 
6680b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : Fn) {
6690b57cec5SDimitry Andric     for (MachineInstr &MI : MBB) {
6700b57cec5SDimitry Andric       Modified |= runOnInstruction(&MI);
6710b57cec5SDimitry Andric     }
6720b57cec5SDimitry Andric   }
6730b57cec5SDimitry Andric 
6740b57cec5SDimitry Andric   for (MachineInstr *MI : DeadInstr) {
6750b57cec5SDimitry Andric     MI->eraseFromParent();
6760b57cec5SDimitry Andric   }
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric   return Modified;
6790b57cec5SDimitry Andric }
6800b57cec5SDimitry Andric 
createA15SDOptimizerPass()6810b57cec5SDimitry Andric FunctionPass *llvm::createA15SDOptimizerPass() {
6820b57cec5SDimitry Andric   return new A15SDOptimizer();
6830b57cec5SDimitry Andric }
684