1 //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass identifies floating point stores that should not be combined into 10 // store pairs. Later we may do the same for floating point loads. 11 // ===---------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64Subtarget.h" 15 #include "llvm/CodeGen/MachineFunction.h" 16 #include "llvm/CodeGen/MachineFunctionPass.h" 17 #include "llvm/CodeGen/MachineInstr.h" 18 #include "llvm/CodeGen/MachineTraceMetrics.h" 19 #include "llvm/CodeGen/TargetInstrInfo.h" 20 #include "llvm/CodeGen/TargetSchedule.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/raw_ostream.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "aarch64-stp-suppress" 27 28 #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression" 29 30 namespace { 31 class AArch64StorePairSuppress : public MachineFunctionPass { 32 const AArch64InstrInfo *TII; 33 const TargetRegisterInfo *TRI; 34 const MachineRegisterInfo *MRI; 35 TargetSchedModel SchedModel; 36 MachineTraceMetrics *Traces; 37 MachineTraceMetrics::Ensemble *MinInstr; 38 39 public: 40 static char ID; 41 AArch64StorePairSuppress() : MachineFunctionPass(ID) { 42 initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry()); 43 } 44 45 StringRef getPassName() const override { return STPSUPPRESS_PASS_NAME; } 46 47 bool runOnMachineFunction(MachineFunction &F) override; 48 49 private: 50 bool shouldAddSTPToBlock(const MachineBasicBlock *BB); 51 52 bool isNarrowFPStore(const MachineInstr &MI); 53 54 void getAnalysisUsage(AnalysisUsage &AU) const override { 55 AU.setPreservesCFG(); 56 AU.addRequired<MachineTraceMetrics>(); 57 AU.addPreserved<MachineTraceMetrics>(); 58 MachineFunctionPass::getAnalysisUsage(AU); 59 } 60 }; 61 char AArch64StorePairSuppress::ID = 0; 62 } // anonymous 63 64 INITIALIZE_PASS(AArch64StorePairSuppress, "aarch64-stp-suppress", 65 STPSUPPRESS_PASS_NAME, false, false) 66 67 FunctionPass *llvm::createAArch64StorePairSuppressPass() { 68 return new AArch64StorePairSuppress(); 69 } 70 71 /// Return true if an STP can be added to this block without increasing the 72 /// critical resource height. STP is good to form in Ld/St limited blocks and 73 /// bad to form in float-point limited blocks. This is true independent of the 74 /// critical path. If the critical path is longer than the resource height, the 75 /// extra vector ops can limit physreg renaming. Otherwise, it could simply 76 /// oversaturate the vector units. 77 bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { 78 if (!MinInstr) 79 MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount); 80 81 MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); 82 unsigned ResLength = BBTrace.getResourceLength(); 83 84 // Get the machine model's scheduling class for STPQi. 85 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. 86 unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass(); 87 const MCSchedClassDesc *SCDesc = 88 SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); 89 90 // If a subtarget does not define resources for STPQi, bail here. 91 if (SCDesc->isValid() && !SCDesc->isVariant()) { 92 unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc); 93 if (ResLenWithSTP > ResLength) { 94 LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() 95 << " resources " << ResLength << " -> " << ResLenWithSTP 96 << "\n"); 97 return false; 98 } 99 } 100 return true; 101 } 102 103 /// Return true if this is a floating-point store smaller than the V reg. On 104 /// cyclone, these require a vector shuffle before storing a pair. 105 /// Ideally we would call getMatchingPairOpcode() and have the machine model 106 /// tell us if it's profitable with no cpu knowledge here. 107 /// 108 /// FIXME: We plan to develop a decent Target abstraction for simple loads and 109 /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer. 110 bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { 111 switch (MI.getOpcode()) { 112 default: 113 return false; 114 case AArch64::STRSui: 115 case AArch64::STRDui: 116 case AArch64::STURSi: 117 case AArch64::STURDi: 118 return true; 119 } 120 } 121 122 bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) { 123 if (skipFunction(MF.getFunction()) || MF.getFunction().hasOptSize()) 124 return false; 125 126 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>(); 127 if (!ST.enableStorePairSuppress()) 128 return false; 129 130 TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo()); 131 TRI = ST.getRegisterInfo(); 132 MRI = &MF.getRegInfo(); 133 SchedModel.init(&ST); 134 Traces = &getAnalysis<MachineTraceMetrics>(); 135 MinInstr = nullptr; 136 137 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n'); 138 139 if (!SchedModel.hasInstrSchedModel()) { 140 LLVM_DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); 141 return false; 142 } 143 144 // Check for a sequence of stores to the same base address. We don't need to 145 // precisely determine whether a store pair can be formed. But we do want to 146 // filter out most situations where we can't form store pairs to avoid 147 // computing trace metrics in those cases. 148 for (auto &MBB : MF) { 149 bool SuppressSTP = false; 150 unsigned PrevBaseReg = 0; 151 for (auto &MI : MBB) { 152 if (!isNarrowFPStore(MI)) 153 continue; 154 const MachineOperand *BaseOp; 155 int64_t Offset; 156 bool OffsetIsScalable; 157 if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, 158 TRI) && 159 BaseOp->isReg()) { 160 Register BaseReg = BaseOp->getReg(); 161 if (PrevBaseReg == BaseReg) { 162 // If this block can take STPs, skip ahead to the next block. 163 if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) 164 break; 165 // Otherwise, continue unpairing the stores in this block. 166 LLVM_DEBUG(dbgs() << "Unpairing store " << MI << "\n"); 167 SuppressSTP = true; 168 TII->suppressLdStPair(MI); 169 } 170 PrevBaseReg = BaseReg; 171 } else 172 PrevBaseReg = 0; 173 } 174 } 175 // This pass just sets some internal MachineMemOperand flags. It can't really 176 // invalidate anything. 177 return false; 178 } 179