1 //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass identifies floating point stores that should not be combined into 10 // store pairs. Later we may do the same for floating point loads. 11 // ===---------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "llvm/CodeGen/MachineFunction.h" 15 #include "llvm/CodeGen/MachineFunctionPass.h" 16 #include "llvm/CodeGen/MachineInstr.h" 17 #include "llvm/CodeGen/MachineTraceMetrics.h" 18 #include "llvm/CodeGen/TargetInstrInfo.h" 19 #include "llvm/CodeGen/TargetSchedule.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/raw_ostream.h" 22 23 using namespace llvm; 24 25 #define DEBUG_TYPE "aarch64-stp-suppress" 26 27 #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression" 28 29 namespace { 30 class AArch64StorePairSuppress : public MachineFunctionPass { 31 const AArch64InstrInfo *TII; 32 const TargetRegisterInfo *TRI; 33 const MachineRegisterInfo *MRI; 34 TargetSchedModel SchedModel; 35 MachineTraceMetrics *Traces; 36 MachineTraceMetrics::Ensemble *MinInstr; 37 38 public: 39 static char ID; 40 AArch64StorePairSuppress() : MachineFunctionPass(ID) { 41 initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry()); 42 } 43 44 StringRef getPassName() const override { return STPSUPPRESS_PASS_NAME; } 45 46 bool runOnMachineFunction(MachineFunction &F) override; 47 48 private: 49 bool shouldAddSTPToBlock(const MachineBasicBlock *BB); 50 51 bool isNarrowFPStore(const MachineInstr &MI); 52 53 void getAnalysisUsage(AnalysisUsage &AU) const override { 54 AU.setPreservesCFG(); 55 AU.addRequired<MachineTraceMetrics>(); 56 AU.addPreserved<MachineTraceMetrics>(); 57 MachineFunctionPass::getAnalysisUsage(AU); 58 } 59 }; 60 char AArch64StorePairSuppress::ID = 0; 61 } // anonymous 62 63 INITIALIZE_PASS(AArch64StorePairSuppress, "aarch64-stp-suppress", 64 STPSUPPRESS_PASS_NAME, false, false) 65 66 FunctionPass *llvm::createAArch64StorePairSuppressPass() { 67 return new AArch64StorePairSuppress(); 68 } 69 70 /// Return true if an STP can be added to this block without increasing the 71 /// critical resource height. STP is good to form in Ld/St limited blocks and 72 /// bad to form in float-point limited blocks. This is true independent of the 73 /// critical path. If the critical path is longer than the resource height, the 74 /// extra vector ops can limit physreg renaming. Otherwise, it could simply 75 /// oversaturate the vector units. 76 bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { 77 if (!MinInstr) 78 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); 79 80 MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); 81 unsigned ResLength = BBTrace.getResourceLength(); 82 83 // Get the machine model's scheduling class for STPQi. 84 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. 85 unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass(); 86 const MCSchedClassDesc *SCDesc = 87 SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); 88 89 // If a subtarget does not define resources for STPQi, bail here. 90 if (SCDesc->isValid() && !SCDesc->isVariant()) { 91 unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc); 92 if (ResLenWithSTP > ResLength) { 93 LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() 94 << " resources " << ResLength << " -> " << ResLenWithSTP 95 << "\n"); 96 return false; 97 } 98 } 99 return true; 100 } 101 102 /// Return true if this is a floating-point store smaller than the V reg. On 103 /// cyclone, these require a vector shuffle before storing a pair. 104 /// Ideally we would call getMatchingPairOpcode() and have the machine model 105 /// tell us if it's profitable with no cpu knowledge here. 106 /// 107 /// FIXME: We plan to develop a decent Target abstraction for simple loads and 108 /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer. 109 bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { 110 switch (MI.getOpcode()) { 111 default: 112 return false; 113 case AArch64::STRSui: 114 case AArch64::STRDui: 115 case AArch64::STURSi: 116 case AArch64::STURDi: 117 return true; 118 } 119 } 120 121 bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) { 122 if (skipFunction(MF.getFunction()) || MF.getFunction().hasOptSize()) 123 return false; 124 125 const TargetSubtargetInfo &ST = MF.getSubtarget(); 126 TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo()); 127 TRI = ST.getRegisterInfo(); 128 MRI = &MF.getRegInfo(); 129 SchedModel.init(&ST); 130 Traces = &getAnalysis<MachineTraceMetrics>(); 131 MinInstr = nullptr; 132 133 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n'); 134 135 if (!SchedModel.hasInstrSchedModel()) { 136 LLVM_DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); 137 return false; 138 } 139 140 // Check for a sequence of stores to the same base address. We don't need to 141 // precisely determine whether a store pair can be formed. But we do want to 142 // filter out most situations where we can't form store pairs to avoid 143 // computing trace metrics in those cases. 144 for (auto &MBB : MF) { 145 bool SuppressSTP = false; 146 unsigned PrevBaseReg = 0; 147 for (auto &MI : MBB) { 148 if (!isNarrowFPStore(MI)) 149 continue; 150 const MachineOperand *BaseOp; 151 int64_t Offset; 152 bool OffsetIsScalable; 153 if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, 154 TRI) && 155 BaseOp->isReg()) { 156 Register BaseReg = BaseOp->getReg(); 157 if (PrevBaseReg == BaseReg) { 158 // If this block can take STPs, skip ahead to the next block. 159 if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) 160 break; 161 // Otherwise, continue unpairing the stores in this block. 162 LLVM_DEBUG(dbgs() << "Unpairing store " << MI << "\n"); 163 SuppressSTP = true; 164 TII->suppressLdStPair(MI); 165 } 166 PrevBaseReg = BaseReg; 167 } else 168 PrevBaseReg = 0; 169 } 170 } 171 // This pass just sets some internal MachineMemOperand flags. It can't really 172 // invalidate anything. 173 return false; 174 } 175