10b57cec5SDimitry Andric //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // The Cortex-A15 processor employs a tracking scheme in its register renaming
100b57cec5SDimitry Andric // in order to process each instruction's micro-ops speculatively and
110b57cec5SDimitry Andric // out-of-order with appropriate forwarding. The ARM architecture allows VFP
120b57cec5SDimitry Andric // instructions to read and write 32-bit S-registers. Each S-register
130b57cec5SDimitry Andric // corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // There are several instruction patterns which can be used to provide this
160b57cec5SDimitry Andric // capability which can provide higher performance than other, potentially more
170b57cec5SDimitry Andric // direct patterns, specifically around when one micro-op reads a D-register
180b57cec5SDimitry Andric // operand that has recently been written as one or more S-register results.
190b57cec5SDimitry Andric //
200b57cec5SDimitry Andric // This file defines a pre-regalloc pass which looks for SPR producers which
210b57cec5SDimitry Andric // are going to be used by a DPR (or QPR) consumers and creates the more
220b57cec5SDimitry Andric // optimized access pattern.
230b57cec5SDimitry Andric //
240b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
250b57cec5SDimitry Andric
260b57cec5SDimitry Andric #include "ARM.h"
270b57cec5SDimitry Andric #include "ARMBaseInstrInfo.h"
280b57cec5SDimitry Andric #include "ARMBaseRegisterInfo.h"
290b57cec5SDimitry Andric #include "ARMSubtarget.h"
300b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
320b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
350b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
360b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
370b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
380b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
400b57cec5SDimitry Andric #include <map>
410b57cec5SDimitry Andric #include <set>
420b57cec5SDimitry Andric
430b57cec5SDimitry Andric using namespace llvm;
440b57cec5SDimitry Andric
450b57cec5SDimitry Andric #define DEBUG_TYPE "a15-sd-optimizer"
460b57cec5SDimitry Andric
470b57cec5SDimitry Andric namespace {
480b57cec5SDimitry Andric struct A15SDOptimizer : public MachineFunctionPass {
490b57cec5SDimitry Andric static char ID;
A15SDOptimizer__anona34028960111::A15SDOptimizer500b57cec5SDimitry Andric A15SDOptimizer() : MachineFunctionPass(ID) {}
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override;
530b57cec5SDimitry Andric
getPassName__anona34028960111::A15SDOptimizer540b57cec5SDimitry Andric StringRef getPassName() const override { return "ARM A15 S->D optimizer"; }
550b57cec5SDimitry Andric
560b57cec5SDimitry Andric private:
570b57cec5SDimitry Andric const ARMBaseInstrInfo *TII;
580b57cec5SDimitry Andric const TargetRegisterInfo *TRI;
590b57cec5SDimitry Andric MachineRegisterInfo *MRI;
600b57cec5SDimitry Andric
610b57cec5SDimitry Andric bool runOnInstruction(MachineInstr *MI);
620b57cec5SDimitry Andric
630b57cec5SDimitry Andric //
640b57cec5SDimitry Andric // Instruction builder helpers
650b57cec5SDimitry Andric //
660b57cec5SDimitry Andric unsigned createDupLane(MachineBasicBlock &MBB,
670b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore,
680b57cec5SDimitry Andric const DebugLoc &DL, unsigned Reg, unsigned Lane,
690b57cec5SDimitry Andric bool QPR = false);
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric unsigned createExtractSubreg(MachineBasicBlock &MBB,
720b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore,
730b57cec5SDimitry Andric const DebugLoc &DL, unsigned DReg,
740b57cec5SDimitry Andric unsigned Lane, const TargetRegisterClass *TRC);
750b57cec5SDimitry Andric
760b57cec5SDimitry Andric unsigned createVExt(MachineBasicBlock &MBB,
770b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore,
780b57cec5SDimitry Andric const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1);
790b57cec5SDimitry Andric
800b57cec5SDimitry Andric unsigned createRegSequence(MachineBasicBlock &MBB,
810b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore,
820b57cec5SDimitry Andric const DebugLoc &DL, unsigned Reg1,
830b57cec5SDimitry Andric unsigned Reg2);
840b57cec5SDimitry Andric
850b57cec5SDimitry Andric unsigned createInsertSubreg(MachineBasicBlock &MBB,
860b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore,
870b57cec5SDimitry Andric const DebugLoc &DL, unsigned DReg,
880b57cec5SDimitry Andric unsigned Lane, unsigned ToInsert);
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric unsigned createImplicitDef(MachineBasicBlock &MBB,
910b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore,
920b57cec5SDimitry Andric const DebugLoc &DL);
930b57cec5SDimitry Andric
940b57cec5SDimitry Andric //
950b57cec5SDimitry Andric // Various property checkers
960b57cec5SDimitry Andric //
970b57cec5SDimitry Andric bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
980b57cec5SDimitry Andric bool hasPartialWrite(MachineInstr *MI);
990b57cec5SDimitry Andric SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
1000b57cec5SDimitry Andric unsigned getDPRLaneFromSPR(unsigned SReg);
1010b57cec5SDimitry Andric
1020b57cec5SDimitry Andric //
1030b57cec5SDimitry Andric // Methods used for getting the definitions of partial registers
1040b57cec5SDimitry Andric //
1050b57cec5SDimitry Andric
1060b57cec5SDimitry Andric MachineInstr *elideCopies(MachineInstr *MI);
1070b57cec5SDimitry Andric void elideCopiesAndPHIs(MachineInstr *MI,
1080b57cec5SDimitry Andric SmallVectorImpl<MachineInstr*> &Outs);
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric //
1110b57cec5SDimitry Andric // Pattern optimization methods
1120b57cec5SDimitry Andric //
1130b57cec5SDimitry Andric unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
1140b57cec5SDimitry Andric unsigned optimizeSDPattern(MachineInstr *MI);
1150b57cec5SDimitry Andric unsigned getPrefSPRLane(unsigned SReg);
1160b57cec5SDimitry Andric
1170b57cec5SDimitry Andric //
1180b57cec5SDimitry Andric // Sanitizing method - used to make sure if don't leave dead code around.
1190b57cec5SDimitry Andric //
1200b57cec5SDimitry Andric void eraseInstrWithNoUses(MachineInstr *MI);
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric //
1230b57cec5SDimitry Andric // A map used to track the changes done by this pass.
1240b57cec5SDimitry Andric //
1250b57cec5SDimitry Andric std::map<MachineInstr*, unsigned> Replacements;
1260b57cec5SDimitry Andric std::set<MachineInstr *> DeadInstr;
1270b57cec5SDimitry Andric };
1280b57cec5SDimitry Andric char A15SDOptimizer::ID = 0;
1290b57cec5SDimitry Andric } // end anonymous namespace
1300b57cec5SDimitry Andric
1310b57cec5SDimitry Andric // Returns true if this is a use of a SPR register.
usesRegClass(MachineOperand & MO,const TargetRegisterClass * TRC)1320b57cec5SDimitry Andric bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
1330b57cec5SDimitry Andric const TargetRegisterClass *TRC) {
1340b57cec5SDimitry Andric if (!MO.isReg())
1350b57cec5SDimitry Andric return false;
1368bcb0991SDimitry Andric Register Reg = MO.getReg();
1370b57cec5SDimitry Andric
138bdd1243dSDimitry Andric if (Reg.isVirtual())
1390b57cec5SDimitry Andric return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
1400b57cec5SDimitry Andric else
1410b57cec5SDimitry Andric return TRC->contains(Reg);
1420b57cec5SDimitry Andric }
1430b57cec5SDimitry Andric
getDPRLaneFromSPR(unsigned SReg)1440b57cec5SDimitry Andric unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
1450b57cec5SDimitry Andric unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
1460b57cec5SDimitry Andric &ARM::DPRRegClass);
1470b57cec5SDimitry Andric if (DReg != ARM::NoRegister) return ARM::ssub_1;
1480b57cec5SDimitry Andric return ARM::ssub_0;
1490b57cec5SDimitry Andric }
1500b57cec5SDimitry Andric
1510b57cec5SDimitry Andric // Get the subreg type that is most likely to be coalesced
1520b57cec5SDimitry Andric // for an SPR register that will be used in VDUP32d pseudo.
getPrefSPRLane(unsigned SReg)1530b57cec5SDimitry Andric unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
1548bcb0991SDimitry Andric if (!Register::isVirtualRegister(SReg))
1550b57cec5SDimitry Andric return getDPRLaneFromSPR(SReg);
1560b57cec5SDimitry Andric
1570b57cec5SDimitry Andric MachineInstr *MI = MRI->getVRegDef(SReg);
1580b57cec5SDimitry Andric if (!MI) return ARM::ssub_0;
159*0fca6ea1SDimitry Andric MachineOperand *MO = MI->findRegisterDefOperand(SReg, /*TRI=*/nullptr);
1600b57cec5SDimitry Andric if (!MO) return ARM::ssub_0;
161480093f4SDimitry Andric assert(MO->isReg() && "Non-register operand found!");
1620b57cec5SDimitry Andric
1630b57cec5SDimitry Andric if (MI->isCopy() && usesRegClass(MI->getOperand(1),
1640b57cec5SDimitry Andric &ARM::SPRRegClass)) {
1650b57cec5SDimitry Andric SReg = MI->getOperand(1).getReg();
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric
1688bcb0991SDimitry Andric if (Register::isVirtualRegister(SReg)) {
1690b57cec5SDimitry Andric if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
1700b57cec5SDimitry Andric return ARM::ssub_0;
1710b57cec5SDimitry Andric }
1720b57cec5SDimitry Andric return getDPRLaneFromSPR(SReg);
1730b57cec5SDimitry Andric }
1740b57cec5SDimitry Andric
1750b57cec5SDimitry Andric // MI is known to be dead. Figure out what instructions
1760b57cec5SDimitry Andric // are also made dead by this and mark them for removal.
eraseInstrWithNoUses(MachineInstr * MI)1770b57cec5SDimitry Andric void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
1780b57cec5SDimitry Andric SmallVector<MachineInstr *, 8> Front;
1790b57cec5SDimitry Andric DeadInstr.insert(MI);
1800b57cec5SDimitry Andric
1810b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
1820b57cec5SDimitry Andric Front.push_back(MI);
1830b57cec5SDimitry Andric
1840b57cec5SDimitry Andric while (Front.size() != 0) {
185349cc55cSDimitry Andric MI = Front.pop_back_val();
1860b57cec5SDimitry Andric
1870b57cec5SDimitry Andric // MI is already known to be dead. We need to see
1880b57cec5SDimitry Andric // if other instructions can also be removed.
1890b57cec5SDimitry Andric for (MachineOperand &MO : MI->operands()) {
1900b57cec5SDimitry Andric if ((!MO.isReg()) || (!MO.isUse()))
1910b57cec5SDimitry Andric continue;
1928bcb0991SDimitry Andric Register Reg = MO.getReg();
193bdd1243dSDimitry Andric if (!Reg.isVirtual())
1940b57cec5SDimitry Andric continue;
195*0fca6ea1SDimitry Andric MachineOperand *Op = MI->findRegisterDefOperand(Reg, /*TRI=*/nullptr);
1960b57cec5SDimitry Andric
1970b57cec5SDimitry Andric if (!Op)
1980b57cec5SDimitry Andric continue;
1990b57cec5SDimitry Andric
2000b57cec5SDimitry Andric MachineInstr *Def = Op->getParent();
2010b57cec5SDimitry Andric
2020b57cec5SDimitry Andric // We don't need to do anything if we have already marked
2030b57cec5SDimitry Andric // this instruction as being dead.
2040b57cec5SDimitry Andric if (DeadInstr.find(Def) != DeadInstr.end())
2050b57cec5SDimitry Andric continue;
2060b57cec5SDimitry Andric
2070b57cec5SDimitry Andric // Check if all the uses of this instruction are marked as
2080b57cec5SDimitry Andric // dead. If so, we can also mark this instruction as being
2090b57cec5SDimitry Andric // dead.
2100b57cec5SDimitry Andric bool IsDead = true;
2110b57cec5SDimitry Andric for (MachineOperand &MODef : Def->operands()) {
2120b57cec5SDimitry Andric if ((!MODef.isReg()) || (!MODef.isDef()))
2130b57cec5SDimitry Andric continue;
2148bcb0991SDimitry Andric Register DefReg = MODef.getReg();
215bdd1243dSDimitry Andric if (!DefReg.isVirtual()) {
2160b57cec5SDimitry Andric IsDead = false;
2170b57cec5SDimitry Andric break;
2180b57cec5SDimitry Andric }
2190b57cec5SDimitry Andric for (MachineInstr &Use : MRI->use_instructions(Reg)) {
2200b57cec5SDimitry Andric // We don't care about self references.
2210b57cec5SDimitry Andric if (&Use == Def)
2220b57cec5SDimitry Andric continue;
2230b57cec5SDimitry Andric if (DeadInstr.find(&Use) == DeadInstr.end()) {
2240b57cec5SDimitry Andric IsDead = false;
2250b57cec5SDimitry Andric break;
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric }
2280b57cec5SDimitry Andric }
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andric if (!IsDead) continue;
2310b57cec5SDimitry Andric
2320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
2330b57cec5SDimitry Andric DeadInstr.insert(Def);
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric }
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric // Creates the more optimized patterns and generally does all the code
2390b57cec5SDimitry Andric // transformations in this pass.
optimizeSDPattern(MachineInstr * MI)2400b57cec5SDimitry Andric unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
2410b57cec5SDimitry Andric if (MI->isCopy()) {
2420b57cec5SDimitry Andric return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
2430b57cec5SDimitry Andric }
2440b57cec5SDimitry Andric
2450b57cec5SDimitry Andric if (MI->isInsertSubreg()) {
2468bcb0991SDimitry Andric Register DPRReg = MI->getOperand(1).getReg();
2478bcb0991SDimitry Andric Register SPRReg = MI->getOperand(2).getReg();
2480b57cec5SDimitry Andric
249bdd1243dSDimitry Andric if (DPRReg.isVirtual() && SPRReg.isVirtual()) {
2500b57cec5SDimitry Andric MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
2510b57cec5SDimitry Andric MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
2520b57cec5SDimitry Andric
2530b57cec5SDimitry Andric if (DPRMI && SPRMI) {
2540b57cec5SDimitry Andric // See if the first operand of this insert_subreg is IMPLICIT_DEF
2550b57cec5SDimitry Andric MachineInstr *ECDef = elideCopies(DPRMI);
2560b57cec5SDimitry Andric if (ECDef && ECDef->isImplicitDef()) {
2570b57cec5SDimitry Andric // Another corner case - if we're inserting something that is purely
2580b57cec5SDimitry Andric // a subreg copy of a DPR, just use that DPR.
2590b57cec5SDimitry Andric
2600b57cec5SDimitry Andric MachineInstr *EC = elideCopies(SPRMI);
2610b57cec5SDimitry Andric // Is it a subreg copy of ssub_0?
2620b57cec5SDimitry Andric if (EC && EC->isCopy() &&
2630b57cec5SDimitry Andric EC->getOperand(1).getSubReg() == ARM::ssub_0) {
2640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
2650b57cec5SDimitry Andric
2660b57cec5SDimitry Andric // Find the thing we're subreg copying out of - is it of the same
2670b57cec5SDimitry Andric // regclass as DPRMI? (i.e. a DPR or QPR).
2688bcb0991SDimitry Andric Register FullReg = SPRMI->getOperand(1).getReg();
2690b57cec5SDimitry Andric const TargetRegisterClass *TRC =
2700b57cec5SDimitry Andric MRI->getRegClass(MI->getOperand(1).getReg());
2710b57cec5SDimitry Andric if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
2720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Subreg copy is compatible - returning ");
2730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << printReg(FullReg) << "\n");
2740b57cec5SDimitry Andric eraseInstrWithNoUses(MI);
2750b57cec5SDimitry Andric return FullReg;
2760b57cec5SDimitry Andric }
2770b57cec5SDimitry Andric }
2780b57cec5SDimitry Andric
2790b57cec5SDimitry Andric return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
2800b57cec5SDimitry Andric }
2810b57cec5SDimitry Andric }
2820b57cec5SDimitry Andric }
2830b57cec5SDimitry Andric return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
2840b57cec5SDimitry Andric }
2850b57cec5SDimitry Andric
2860b57cec5SDimitry Andric if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
2870b57cec5SDimitry Andric &ARM::SPRRegClass)) {
2880b57cec5SDimitry Andric // See if all bar one of the operands are IMPLICIT_DEF and insert the
2890b57cec5SDimitry Andric // optimizer pattern accordingly.
2900b57cec5SDimitry Andric unsigned NumImplicit = 0, NumTotal = 0;
2910b57cec5SDimitry Andric unsigned NonImplicitReg = ~0U;
2920b57cec5SDimitry Andric
293bdd1243dSDimitry Andric for (MachineOperand &MO : llvm::drop_begin(MI->explicit_operands())) {
294bdd1243dSDimitry Andric if (!MO.isReg())
2950b57cec5SDimitry Andric continue;
2960b57cec5SDimitry Andric ++NumTotal;
297bdd1243dSDimitry Andric Register OpReg = MO.getReg();
2980b57cec5SDimitry Andric
299bdd1243dSDimitry Andric if (!OpReg.isVirtual())
3000b57cec5SDimitry Andric break;
3010b57cec5SDimitry Andric
3020b57cec5SDimitry Andric MachineInstr *Def = MRI->getVRegDef(OpReg);
3030b57cec5SDimitry Andric if (!Def)
3040b57cec5SDimitry Andric break;
3050b57cec5SDimitry Andric if (Def->isImplicitDef())
3060b57cec5SDimitry Andric ++NumImplicit;
3070b57cec5SDimitry Andric else
308bdd1243dSDimitry Andric NonImplicitReg = MO.getReg();
3090b57cec5SDimitry Andric }
3100b57cec5SDimitry Andric
3110b57cec5SDimitry Andric if (NumImplicit == NumTotal - 1)
3120b57cec5SDimitry Andric return optimizeAllLanesPattern(MI, NonImplicitReg);
3130b57cec5SDimitry Andric else
3140b57cec5SDimitry Andric return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
3150b57cec5SDimitry Andric }
3160b57cec5SDimitry Andric
3170b57cec5SDimitry Andric llvm_unreachable("Unhandled update pattern!");
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric
3200b57cec5SDimitry Andric // Return true if this MachineInstr inserts a scalar (SPR) value into
3210b57cec5SDimitry Andric // a D or Q register.
hasPartialWrite(MachineInstr * MI)3220b57cec5SDimitry Andric bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
3230b57cec5SDimitry Andric // The only way we can do a partial register update is through a COPY,
3240b57cec5SDimitry Andric // INSERT_SUBREG or REG_SEQUENCE.
3250b57cec5SDimitry Andric if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
3260b57cec5SDimitry Andric return true;
3270b57cec5SDimitry Andric
3280b57cec5SDimitry Andric if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
3290b57cec5SDimitry Andric &ARM::SPRRegClass))
3300b57cec5SDimitry Andric return true;
3310b57cec5SDimitry Andric
3320b57cec5SDimitry Andric if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
3330b57cec5SDimitry Andric return true;
3340b57cec5SDimitry Andric
3350b57cec5SDimitry Andric return false;
3360b57cec5SDimitry Andric }
3370b57cec5SDimitry Andric
3380b57cec5SDimitry Andric // Looks through full copies to get the instruction that defines the input
3390b57cec5SDimitry Andric // operand for MI.
elideCopies(MachineInstr * MI)3400b57cec5SDimitry Andric MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
3410b57cec5SDimitry Andric if (!MI->isFullCopy())
3420b57cec5SDimitry Andric return MI;
343bdd1243dSDimitry Andric if (!MI->getOperand(1).getReg().isVirtual())
3440b57cec5SDimitry Andric return nullptr;
3450b57cec5SDimitry Andric MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
3460b57cec5SDimitry Andric if (!Def)
3470b57cec5SDimitry Andric return nullptr;
3480b57cec5SDimitry Andric return elideCopies(Def);
3490b57cec5SDimitry Andric }
3500b57cec5SDimitry Andric
3510b57cec5SDimitry Andric // Look through full copies and PHIs to get the set of non-copy MachineInstrs
3520b57cec5SDimitry Andric // that can produce MI.
elideCopiesAndPHIs(MachineInstr * MI,SmallVectorImpl<MachineInstr * > & Outs)3530b57cec5SDimitry Andric void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
3540b57cec5SDimitry Andric SmallVectorImpl<MachineInstr*> &Outs) {
3550b57cec5SDimitry Andric // Looking through PHIs may create loops so we need to track what
3560b57cec5SDimitry Andric // instructions we have visited before.
3570b57cec5SDimitry Andric std::set<MachineInstr *> Reached;
3580b57cec5SDimitry Andric SmallVector<MachineInstr *, 8> Front;
3590b57cec5SDimitry Andric Front.push_back(MI);
3600b57cec5SDimitry Andric while (Front.size() != 0) {
361e8d8bef9SDimitry Andric MI = Front.pop_back_val();
3620b57cec5SDimitry Andric
3630b57cec5SDimitry Andric // If we have already explored this MachineInstr, ignore it.
36481ad6265SDimitry Andric if (!Reached.insert(MI).second)
3650b57cec5SDimitry Andric continue;
3660b57cec5SDimitry Andric if (MI->isPHI()) {
3670b57cec5SDimitry Andric for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
3688bcb0991SDimitry Andric Register Reg = MI->getOperand(I).getReg();
369bdd1243dSDimitry Andric if (!Reg.isVirtual()) {
3700b57cec5SDimitry Andric continue;
3710b57cec5SDimitry Andric }
3720b57cec5SDimitry Andric MachineInstr *NewMI = MRI->getVRegDef(Reg);
3730b57cec5SDimitry Andric if (!NewMI)
3740b57cec5SDimitry Andric continue;
3750b57cec5SDimitry Andric Front.push_back(NewMI);
3760b57cec5SDimitry Andric }
3770b57cec5SDimitry Andric } else if (MI->isFullCopy()) {
378bdd1243dSDimitry Andric if (!MI->getOperand(1).getReg().isVirtual())
3790b57cec5SDimitry Andric continue;
3800b57cec5SDimitry Andric MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
3810b57cec5SDimitry Andric if (!NewMI)
3820b57cec5SDimitry Andric continue;
3830b57cec5SDimitry Andric Front.push_back(NewMI);
3840b57cec5SDimitry Andric } else {
3850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found partial copy" << *MI << "\n");
3860b57cec5SDimitry Andric Outs.push_back(MI);
3870b57cec5SDimitry Andric }
3880b57cec5SDimitry Andric }
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric
3910b57cec5SDimitry Andric // Return the DPR virtual registers that are read by this machine instruction
3920b57cec5SDimitry Andric // (if any).
getReadDPRs(MachineInstr * MI)3930b57cec5SDimitry Andric SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
3940b57cec5SDimitry Andric if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
3950b57cec5SDimitry Andric MI->isKill())
3960b57cec5SDimitry Andric return SmallVector<unsigned, 8>();
3970b57cec5SDimitry Andric
3980b57cec5SDimitry Andric SmallVector<unsigned, 8> Defs;
3990b57cec5SDimitry Andric for (MachineOperand &MO : MI->operands()) {
4000b57cec5SDimitry Andric if (!MO.isReg() || !MO.isUse())
4010b57cec5SDimitry Andric continue;
4020b57cec5SDimitry Andric if (!usesRegClass(MO, &ARM::DPRRegClass) &&
4030b57cec5SDimitry Andric !usesRegClass(MO, &ARM::QPRRegClass) &&
4040b57cec5SDimitry Andric !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
4050b57cec5SDimitry Andric continue;
4060b57cec5SDimitry Andric
4070b57cec5SDimitry Andric Defs.push_back(MO.getReg());
4080b57cec5SDimitry Andric }
4090b57cec5SDimitry Andric return Defs;
4100b57cec5SDimitry Andric }
4110b57cec5SDimitry Andric
4120b57cec5SDimitry Andric // Creates a DPR register from an SPR one by using a VDUP.
createDupLane(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Reg,unsigned Lane,bool QPR)4130b57cec5SDimitry Andric unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
4140b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore,
4150b57cec5SDimitry Andric const DebugLoc &DL, unsigned Reg,
4160b57cec5SDimitry Andric unsigned Lane, bool QPR) {
4178bcb0991SDimitry Andric Register Out =
4188bcb0991SDimitry Andric MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass);
4190b57cec5SDimitry Andric BuildMI(MBB, InsertBefore, DL,
4200b57cec5SDimitry Andric TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
4210b57cec5SDimitry Andric .addReg(Reg)
4220b57cec5SDimitry Andric .addImm(Lane)
4230b57cec5SDimitry Andric .add(predOps(ARMCC::AL));
4240b57cec5SDimitry Andric
4250b57cec5SDimitry Andric return Out;
4260b57cec5SDimitry Andric }
4270b57cec5SDimitry Andric
4280b57cec5SDimitry Andric // Creates a SPR register from a DPR by copying the value in lane 0.
createExtractSubreg(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned DReg,unsigned Lane,const TargetRegisterClass * TRC)4290b57cec5SDimitry Andric unsigned A15SDOptimizer::createExtractSubreg(
4300b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
4310b57cec5SDimitry Andric const DebugLoc &DL, unsigned DReg, unsigned Lane,
4320b57cec5SDimitry Andric const TargetRegisterClass *TRC) {
4338bcb0991SDimitry Andric Register Out = MRI->createVirtualRegister(TRC);
4340b57cec5SDimitry Andric BuildMI(MBB,
4350b57cec5SDimitry Andric InsertBefore,
4360b57cec5SDimitry Andric DL,
4370b57cec5SDimitry Andric TII->get(TargetOpcode::COPY), Out)
4380b57cec5SDimitry Andric .addReg(DReg, 0, Lane);
4390b57cec5SDimitry Andric
4400b57cec5SDimitry Andric return Out;
4410b57cec5SDimitry Andric }
4420b57cec5SDimitry Andric
4430b57cec5SDimitry Andric // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
createRegSequence(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Reg1,unsigned Reg2)4440b57cec5SDimitry Andric unsigned A15SDOptimizer::createRegSequence(
4450b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
4460b57cec5SDimitry Andric const DebugLoc &DL, unsigned Reg1, unsigned Reg2) {
4478bcb0991SDimitry Andric Register Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
4480b57cec5SDimitry Andric BuildMI(MBB,
4490b57cec5SDimitry Andric InsertBefore,
4500b57cec5SDimitry Andric DL,
4510b57cec5SDimitry Andric TII->get(TargetOpcode::REG_SEQUENCE), Out)
4520b57cec5SDimitry Andric .addReg(Reg1)
4530b57cec5SDimitry Andric .addImm(ARM::dsub_0)
4540b57cec5SDimitry Andric .addReg(Reg2)
4550b57cec5SDimitry Andric .addImm(ARM::dsub_1);
4560b57cec5SDimitry Andric return Out;
4570b57cec5SDimitry Andric }
4580b57cec5SDimitry Andric
4590b57cec5SDimitry Andric // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
4600b57cec5SDimitry Andric // and merges them into one DPR register.
createVExt(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Ssub0,unsigned Ssub1)4610b57cec5SDimitry Andric unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
4620b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore,
4630b57cec5SDimitry Andric const DebugLoc &DL, unsigned Ssub0,
4640b57cec5SDimitry Andric unsigned Ssub1) {
4658bcb0991SDimitry Andric Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
4660b57cec5SDimitry Andric BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
4670b57cec5SDimitry Andric .addReg(Ssub0)
4680b57cec5SDimitry Andric .addReg(Ssub1)
4690b57cec5SDimitry Andric .addImm(1)
4700b57cec5SDimitry Andric .add(predOps(ARMCC::AL));
4710b57cec5SDimitry Andric return Out;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric
createInsertSubreg(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned DReg,unsigned Lane,unsigned ToInsert)4740b57cec5SDimitry Andric unsigned A15SDOptimizer::createInsertSubreg(
4750b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
4760b57cec5SDimitry Andric const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) {
4778bcb0991SDimitry Andric Register Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
4780b57cec5SDimitry Andric BuildMI(MBB,
4790b57cec5SDimitry Andric InsertBefore,
4800b57cec5SDimitry Andric DL,
4810b57cec5SDimitry Andric TII->get(TargetOpcode::INSERT_SUBREG), Out)
4820b57cec5SDimitry Andric .addReg(DReg)
4830b57cec5SDimitry Andric .addReg(ToInsert)
4840b57cec5SDimitry Andric .addImm(Lane);
4850b57cec5SDimitry Andric
4860b57cec5SDimitry Andric return Out;
4870b57cec5SDimitry Andric }
4880b57cec5SDimitry Andric
4890b57cec5SDimitry Andric unsigned
createImplicitDef(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL)4900b57cec5SDimitry Andric A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
4910b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore,
4920b57cec5SDimitry Andric const DebugLoc &DL) {
4938bcb0991SDimitry Andric Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
4940b57cec5SDimitry Andric BuildMI(MBB,
4950b57cec5SDimitry Andric InsertBefore,
4960b57cec5SDimitry Andric DL,
4970b57cec5SDimitry Andric TII->get(TargetOpcode::IMPLICIT_DEF), Out);
4980b57cec5SDimitry Andric return Out;
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric
5010b57cec5SDimitry Andric // This function inserts instructions in order to optimize interactions between
5020b57cec5SDimitry Andric // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
5030b57cec5SDimitry Andric // lanes, and the using VEXT instructions to recompose the result.
5040b57cec5SDimitry Andric unsigned
optimizeAllLanesPattern(MachineInstr * MI,unsigned Reg)5050b57cec5SDimitry Andric A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
5060b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPt(MI);
5070b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc();
5080b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
5090b57cec5SDimitry Andric InsertPt++;
5100b57cec5SDimitry Andric unsigned Out;
5110b57cec5SDimitry Andric
5120b57cec5SDimitry Andric // DPair has the same length as QPR and also has two DPRs as subreg.
5130b57cec5SDimitry Andric // Treat DPair as QPR.
5140b57cec5SDimitry Andric if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
5150b57cec5SDimitry Andric MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
5160b57cec5SDimitry Andric unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
5170b57cec5SDimitry Andric ARM::dsub_0, &ARM::DPRRegClass);
5180b57cec5SDimitry Andric unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
5190b57cec5SDimitry Andric ARM::dsub_1, &ARM::DPRRegClass);
5200b57cec5SDimitry Andric
5210b57cec5SDimitry Andric unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
5220b57cec5SDimitry Andric unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
5230b57cec5SDimitry Andric Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
5240b57cec5SDimitry Andric
5250b57cec5SDimitry Andric unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
5260b57cec5SDimitry Andric unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
5270b57cec5SDimitry Andric Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
5280b57cec5SDimitry Andric
5290b57cec5SDimitry Andric Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
5300b57cec5SDimitry Andric
5310b57cec5SDimitry Andric } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
5320b57cec5SDimitry Andric unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
5330b57cec5SDimitry Andric unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
5340b57cec5SDimitry Andric Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
5350b57cec5SDimitry Andric
5360b57cec5SDimitry Andric } else {
5370b57cec5SDimitry Andric assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
5380b57cec5SDimitry Andric "Found unexpected regclass!");
5390b57cec5SDimitry Andric
5400b57cec5SDimitry Andric unsigned PrefLane = getPrefSPRLane(Reg);
5410b57cec5SDimitry Andric unsigned Lane;
5420b57cec5SDimitry Andric switch (PrefLane) {
5430b57cec5SDimitry Andric case ARM::ssub_0: Lane = 0; break;
5440b57cec5SDimitry Andric case ARM::ssub_1: Lane = 1; break;
5450b57cec5SDimitry Andric default: llvm_unreachable("Unknown preferred lane!");
5460b57cec5SDimitry Andric }
5470b57cec5SDimitry Andric
5480b57cec5SDimitry Andric // Treat DPair as QPR
5490b57cec5SDimitry Andric bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
5500b57cec5SDimitry Andric usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
5510b57cec5SDimitry Andric
5520b57cec5SDimitry Andric Out = createImplicitDef(MBB, InsertPt, DL);
5530b57cec5SDimitry Andric Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
5540b57cec5SDimitry Andric Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
5550b57cec5SDimitry Andric eraseInstrWithNoUses(MI);
5560b57cec5SDimitry Andric }
5570b57cec5SDimitry Andric return Out;
5580b57cec5SDimitry Andric }
5590b57cec5SDimitry Andric
runOnInstruction(MachineInstr * MI)5600b57cec5SDimitry Andric bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
5610b57cec5SDimitry Andric // We look for instructions that write S registers that are then read as
5620b57cec5SDimitry Andric // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
5630b57cec5SDimitry Andric // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
5640b57cec5SDimitry Andric // merge two SPR values to form a DPR register. In order avoid false
5650b57cec5SDimitry Andric // positives we make sure that there is an SPR producer so we look past
5660b57cec5SDimitry Andric // COPY and PHI nodes to find it.
5670b57cec5SDimitry Andric //
5680b57cec5SDimitry Andric // The best code pattern for when an SPR producer is going to be used by a
5690b57cec5SDimitry Andric // DPR or QPR consumer depends on whether the other lanes of the
5700b57cec5SDimitry Andric // corresponding DPR/QPR are currently defined.
5710b57cec5SDimitry Andric //
5720b57cec5SDimitry Andric // We can handle these efficiently, depending on the type of
5730b57cec5SDimitry Andric // pseudo-instruction that is producing the pattern
5740b57cec5SDimitry Andric //
5750b57cec5SDimitry Andric // * COPY: * VDUP all lanes and merge the results together
5760b57cec5SDimitry Andric // using VEXTs.
5770b57cec5SDimitry Andric //
5780b57cec5SDimitry Andric // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
5790b57cec5SDimitry Andric // lane, and the other lane(s) of the DPR/QPR register
5800b57cec5SDimitry Andric // that we are inserting in are undefined, use the
5810b57cec5SDimitry Andric // original DPR/QPR value.
5820b57cec5SDimitry Andric // * Otherwise, fall back on the same stategy as COPY.
5830b57cec5SDimitry Andric //
5840b57cec5SDimitry Andric // * REG_SEQUENCE: * If all except one of the input operands are
5850b57cec5SDimitry Andric // IMPLICIT_DEFs, insert the VDUP pattern for just the
5860b57cec5SDimitry Andric // defined input operand
5870b57cec5SDimitry Andric // * Otherwise, fall back on the same stategy as COPY.
5880b57cec5SDimitry Andric //
5890b57cec5SDimitry Andric
5900b57cec5SDimitry Andric // First, get all the reads of D-registers done by this instruction.
5910b57cec5SDimitry Andric SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
5920b57cec5SDimitry Andric bool Modified = false;
5930b57cec5SDimitry Andric
5940eae32dcSDimitry Andric for (unsigned I : Defs) {
5950b57cec5SDimitry Andric // Follow the def-use chain for this DPR through COPYs, and also through
5960b57cec5SDimitry Andric // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
5970b57cec5SDimitry Andric // we can end up with multiple defs of this DPR.
5980b57cec5SDimitry Andric
5990b57cec5SDimitry Andric SmallVector<MachineInstr *, 8> DefSrcs;
6000eae32dcSDimitry Andric if (!Register::isVirtualRegister(I))
6010b57cec5SDimitry Andric continue;
6020eae32dcSDimitry Andric MachineInstr *Def = MRI->getVRegDef(I);
6030b57cec5SDimitry Andric if (!Def)
6040b57cec5SDimitry Andric continue;
6050b57cec5SDimitry Andric
6060b57cec5SDimitry Andric elideCopiesAndPHIs(Def, DefSrcs);
6070b57cec5SDimitry Andric
6080b57cec5SDimitry Andric for (MachineInstr *MI : DefSrcs) {
6090b57cec5SDimitry Andric // If we've already analyzed and replaced this operand, don't do
6100b57cec5SDimitry Andric // anything.
6110b57cec5SDimitry Andric if (Replacements.find(MI) != Replacements.end())
6120b57cec5SDimitry Andric continue;
6130b57cec5SDimitry Andric
6140b57cec5SDimitry Andric // Now, work out if the instruction causes a SPR->DPR dependency.
6150b57cec5SDimitry Andric if (!hasPartialWrite(MI))
6160b57cec5SDimitry Andric continue;
6170b57cec5SDimitry Andric
6180b57cec5SDimitry Andric // Collect all the uses of this MI's DPR def for updating later.
6190b57cec5SDimitry Andric SmallVector<MachineOperand*, 8> Uses;
6208bcb0991SDimitry Andric Register DPRDefReg = MI->getOperand(0).getReg();
621349cc55cSDimitry Andric for (MachineOperand &MO : MRI->use_operands(DPRDefReg))
622349cc55cSDimitry Andric Uses.push_back(&MO);
6230b57cec5SDimitry Andric
6240b57cec5SDimitry Andric // We can optimize this.
6250b57cec5SDimitry Andric unsigned NewReg = optimizeSDPattern(MI);
6260b57cec5SDimitry Andric
6270b57cec5SDimitry Andric if (NewReg != 0) {
6280b57cec5SDimitry Andric Modified = true;
6290eae32dcSDimitry Andric for (MachineOperand *Use : Uses) {
6300b57cec5SDimitry Andric // Make sure to constrain the register class of the new register to
6310b57cec5SDimitry Andric // match what we're replacing. Otherwise we can optimize a DPR_VFP2
6320b57cec5SDimitry Andric // reference into a plain DPR, and that will end poorly. NewReg is
6330b57cec5SDimitry Andric // always virtual here, so there will always be a matching subclass
6340b57cec5SDimitry Andric // to find.
6350eae32dcSDimitry Andric MRI->constrainRegClass(NewReg, MRI->getRegClass(Use->getReg()));
6360b57cec5SDimitry Andric
6370eae32dcSDimitry Andric LLVM_DEBUG(dbgs() << "Replacing operand " << *Use << " with "
6380b57cec5SDimitry Andric << printReg(NewReg) << "\n");
6390eae32dcSDimitry Andric Use->substVirtReg(NewReg, 0, *TRI);
6400b57cec5SDimitry Andric }
6410b57cec5SDimitry Andric }
6420b57cec5SDimitry Andric Replacements[MI] = NewReg;
6430b57cec5SDimitry Andric }
6440b57cec5SDimitry Andric }
6450b57cec5SDimitry Andric return Modified;
6460b57cec5SDimitry Andric }
6470b57cec5SDimitry Andric
runOnMachineFunction(MachineFunction & Fn)6480b57cec5SDimitry Andric bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
6490b57cec5SDimitry Andric if (skipFunction(Fn.getFunction()))
6500b57cec5SDimitry Andric return false;
6510b57cec5SDimitry Andric
6520b57cec5SDimitry Andric const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
6530b57cec5SDimitry Andric // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
6540b57cec5SDimitry Andric // enabled when NEON is available.
6550b57cec5SDimitry Andric if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
6560b57cec5SDimitry Andric return false;
6570b57cec5SDimitry Andric
6580b57cec5SDimitry Andric TII = STI.getInstrInfo();
6590b57cec5SDimitry Andric TRI = STI.getRegisterInfo();
6600b57cec5SDimitry Andric MRI = &Fn.getRegInfo();
6610b57cec5SDimitry Andric bool Modified = false;
6620b57cec5SDimitry Andric
6630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Running on function " << Fn.getName() << "\n");
6640b57cec5SDimitry Andric
6650b57cec5SDimitry Andric DeadInstr.clear();
6660b57cec5SDimitry Andric Replacements.clear();
6670b57cec5SDimitry Andric
6680b57cec5SDimitry Andric for (MachineBasicBlock &MBB : Fn) {
6690b57cec5SDimitry Andric for (MachineInstr &MI : MBB) {
6700b57cec5SDimitry Andric Modified |= runOnInstruction(&MI);
6710b57cec5SDimitry Andric }
6720b57cec5SDimitry Andric }
6730b57cec5SDimitry Andric
6740b57cec5SDimitry Andric for (MachineInstr *MI : DeadInstr) {
6750b57cec5SDimitry Andric MI->eraseFromParent();
6760b57cec5SDimitry Andric }
6770b57cec5SDimitry Andric
6780b57cec5SDimitry Andric return Modified;
6790b57cec5SDimitry Andric }
6800b57cec5SDimitry Andric
createA15SDOptimizerPass()6810b57cec5SDimitry Andric FunctionPass *llvm::createA15SDOptimizerPass() {
6820b57cec5SDimitry Andric return new A15SDOptimizer();
6830b57cec5SDimitry Andric }
684