1 //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass identifies floating point stores that should not be combined into 10 // store pairs. Later we may do the same for floating point loads. 11 // ===---------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64Subtarget.h" 15 #include "llvm/CodeGen/MachineFunction.h" 16 #include "llvm/CodeGen/MachineFunctionPass.h" 17 #include "llvm/CodeGen/MachineInstr.h" 18 #include "llvm/CodeGen/MachineTraceMetrics.h" 19 #include "llvm/CodeGen/TargetInstrInfo.h" 20 #include "llvm/CodeGen/TargetSchedule.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/raw_ostream.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "aarch64-stp-suppress" 27 28 #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression" 29 30 namespace { 31 class AArch64StorePairSuppress : public MachineFunctionPass { 32 const AArch64InstrInfo *TII; 33 const TargetRegisterInfo *TRI; 34 const MachineRegisterInfo *MRI; 35 TargetSchedModel SchedModel; 36 MachineTraceMetrics *Traces; 37 MachineTraceMetrics::Ensemble *MinInstr; 38 39 public: 40 static char ID; 41 AArch64StorePairSuppress() : MachineFunctionPass(ID) { 42 initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry()); 43 } 44 45 StringRef getPassName() const override { return STPSUPPRESS_PASS_NAME; } 46 47 bool runOnMachineFunction(MachineFunction &F) override; 48 49 private: 50 bool shouldAddSTPToBlock(const MachineBasicBlock *BB); 51 52 bool isNarrowFPStore(const MachineInstr &MI); 53 54 void getAnalysisUsage(AnalysisUsage &AU) const override { 55 AU.setPreservesCFG(); 56 AU.addRequired<MachineTraceMetrics>(); 57 AU.addPreserved<MachineTraceMetrics>(); 58 MachineFunctionPass::getAnalysisUsage(AU); 59 } 60 }; 61 char AArch64StorePairSuppress::ID = 0; 62 } // anonymous 63 64 INITIALIZE_PASS(AArch64StorePairSuppress, "aarch64-stp-suppress", 65 STPSUPPRESS_PASS_NAME, false, false) 66 67 FunctionPass *llvm::createAArch64StorePairSuppressPass() { 68 return new AArch64StorePairSuppress(); 69 } 70 71 /// Return true if an STP can be added to this block without increasing the 72 /// critical resource height. STP is good to form in Ld/St limited blocks and 73 /// bad to form in float-point limited blocks. This is true independent of the 74 /// critical path. If the critical path is longer than the resource height, the 75 /// extra vector ops can limit physreg renaming. Otherwise, it could simply 76 /// oversaturate the vector units. 77 bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { 78 if (!MinInstr) 79 MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount); 80 81 MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); 82 unsigned ResLength = BBTrace.getResourceLength(); 83 84 // Get the machine model's scheduling class for STPDi and STRDui. 85 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. 86 unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass(); 87 const MCSchedClassDesc *PairSCDesc = 88 SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); 89 90 unsigned SCIdx2 = TII->get(AArch64::STRDui).getSchedClass(); 91 const MCSchedClassDesc *SingleSCDesc = 92 SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx2); 93 94 // If a subtarget does not define resources for STPDi, bail here. 95 if (PairSCDesc->isValid() && !PairSCDesc->isVariant() && 96 SingleSCDesc->isValid() && !SingleSCDesc->isVariant()) { 97 // Compute the new critical resource length after replacing 2 separate 98 // STRDui with one STPDi. 99 unsigned ResLenWithSTP = BBTrace.getResourceLength( 100 std::nullopt, PairSCDesc, {SingleSCDesc, SingleSCDesc}); 101 if (ResLenWithSTP > ResLength) { 102 LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() 103 << " resources " << ResLength << " -> " << ResLenWithSTP 104 << "\n"); 105 return false; 106 } 107 } 108 return true; 109 } 110 111 /// Return true if this is a floating-point store smaller than the V reg. On 112 /// cyclone, these require a vector shuffle before storing a pair. 113 /// Ideally we would call getMatchingPairOpcode() and have the machine model 114 /// tell us if it's profitable with no cpu knowledge here. 115 /// 116 /// FIXME: We plan to develop a decent Target abstraction for simple loads and 117 /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer. 118 bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { 119 switch (MI.getOpcode()) { 120 default: 121 return false; 122 case AArch64::STRSui: 123 case AArch64::STRDui: 124 case AArch64::STURSi: 125 case AArch64::STURDi: 126 return true; 127 } 128 } 129 130 bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) { 131 if (skipFunction(MF.getFunction()) || MF.getFunction().hasOptSize()) 132 return false; 133 134 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>(); 135 if (!ST.enableStorePairSuppress()) 136 return false; 137 138 TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo()); 139 TRI = ST.getRegisterInfo(); 140 MRI = &MF.getRegInfo(); 141 SchedModel.init(&ST); 142 Traces = &getAnalysis<MachineTraceMetrics>(); 143 MinInstr = nullptr; 144 145 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n'); 146 147 if (!SchedModel.hasInstrSchedModel()) { 148 LLVM_DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); 149 return false; 150 } 151 152 // Check for a sequence of stores to the same base address. We don't need to 153 // precisely determine whether a store pair can be formed. But we do want to 154 // filter out most situations where we can't form store pairs to avoid 155 // computing trace metrics in those cases. 156 for (auto &MBB : MF) { 157 bool SuppressSTP = false; 158 unsigned PrevBaseReg = 0; 159 for (auto &MI : MBB) { 160 if (!isNarrowFPStore(MI)) 161 continue; 162 const MachineOperand *BaseOp; 163 int64_t Offset; 164 bool OffsetIsScalable; 165 if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, 166 TRI) && 167 BaseOp->isReg()) { 168 Register BaseReg = BaseOp->getReg(); 169 if (PrevBaseReg == BaseReg) { 170 // If this block can take STPs, skip ahead to the next block. 171 if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) 172 break; 173 // Otherwise, continue unpairing the stores in this block. 174 LLVM_DEBUG(dbgs() << "Unpairing store " << MI << "\n"); 175 SuppressSTP = true; 176 TII->suppressLdStPair(MI); 177 } 178 PrevBaseReg = BaseReg; 179 } else 180 PrevBaseReg = 0; 181 } 182 } 183 // This pass just sets some internal MachineMemOperand flags. It can't really 184 // invalidate anything. 185 return false; 186 } 187