1 //===-- SIPostRABundler.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass creates bundles of memory instructions to protect adjacent loads 11 /// and stores from being rescheduled apart from each other post-RA. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "SIPostRABundler.h" 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 21 using namespace llvm; 22 23 #define DEBUG_TYPE "si-post-ra-bundler" 24 25 namespace { 26 27 class SIPostRABundlerLegacy : public MachineFunctionPass { 28 public: 29 static char ID; 30 31 public: 32 SIPostRABundlerLegacy() : MachineFunctionPass(ID) { 33 initializeSIPostRABundlerLegacyPass(*PassRegistry::getPassRegistry()); 34 } 35 36 bool runOnMachineFunction(MachineFunction &MF) override; 37 38 StringRef getPassName() const override { 39 return "SI post-RA bundler"; 40 } 41 42 void getAnalysisUsage(AnalysisUsage &AU) const override { 43 AU.setPreservesAll(); 44 MachineFunctionPass::getAnalysisUsage(AU); 45 } 46 }; 47 48 class SIPostRABundler { 49 public: 50 bool run(MachineFunction &MF); 51 52 private: 53 const SIRegisterInfo *TRI; 54 55 SmallSet<Register, 16> Defs; 56 57 void collectUsedRegUnits(const MachineInstr &MI, 58 BitVector &UsedRegUnits) const; 59 60 bool isBundleCandidate(const MachineInstr &MI) const; 61 bool isDependentLoad(const MachineInstr &MI) const; 62 bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const; 63 }; 64 65 constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF | 66 SIInstrFlags::SMRD | SIInstrFlags::DS | 67 SIInstrFlags::FLAT | SIInstrFlags::MIMG | 68 SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; 69 70 } // End anonymous namespace. 71 72 INITIALIZE_PASS(SIPostRABundlerLegacy, DEBUG_TYPE, "SI post-RA bundler", false, 73 false) 74 75 char SIPostRABundlerLegacy::ID = 0; 76 77 char &llvm::SIPostRABundlerLegacyID = SIPostRABundlerLegacy::ID; 78 79 FunctionPass *llvm::createSIPostRABundlerPass() { 80 return new SIPostRABundlerLegacy(); 81 } 82 83 bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const { 84 if (!MI.mayLoad()) 85 return false; 86 87 for (const MachineOperand &Op : MI.explicit_operands()) { 88 if (!Op.isReg()) 89 continue; 90 Register Reg = Op.getReg(); 91 for (Register Def : Defs) 92 if (TRI->regsOverlap(Reg, Def)) 93 return true; 94 } 95 96 return false; 97 } 98 99 void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI, 100 BitVector &UsedRegUnits) const { 101 if (MI.isDebugInstr()) 102 return; 103 104 for (const MachineOperand &Op : MI.operands()) { 105 if (!Op.isReg() || !Op.readsReg()) 106 continue; 107 108 Register Reg = Op.getReg(); 109 assert(!Op.getSubReg() && 110 "subregister indexes should not be present after RA"); 111 112 for (MCRegUnit Unit : TRI->regunits(Reg)) 113 UsedRegUnits.set(Unit); 114 } 115 } 116 117 bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const { 118 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; 119 return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled(); 120 } 121 122 bool SIPostRABundler::canBundle(const MachineInstr &MI, 123 const MachineInstr &NextMI) const { 124 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; 125 126 return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() && 127 NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() && 128 ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) && 129 !isDependentLoad(NextMI)); 130 } 131 132 bool SIPostRABundlerLegacy::runOnMachineFunction(MachineFunction &MF) { 133 if (skipFunction(MF.getFunction())) 134 return false; 135 return SIPostRABundler().run(MF); 136 } 137 138 PreservedAnalyses SIPostRABundlerPass::run(MachineFunction &MF, 139 MachineFunctionAnalysisManager &) { 140 SIPostRABundler().run(MF); 141 return PreservedAnalyses::all(); 142 } 143 144 bool SIPostRABundler::run(MachineFunction &MF) { 145 146 TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 147 BitVector BundleUsedRegUnits(TRI->getNumRegUnits()); 148 BitVector KillUsedRegUnits(TRI->getNumRegUnits()); 149 150 bool Changed = false; 151 for (MachineBasicBlock &MBB : MF) { 152 bool HasIGLPInstrs = llvm::any_of(MBB.instrs(), [](MachineInstr &MI) { 153 unsigned Opc = MI.getOpcode(); 154 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT; 155 }); 156 157 // Don't cluster with IGLP instructions. 158 if (HasIGLPInstrs) 159 continue; 160 161 MachineBasicBlock::instr_iterator Next; 162 MachineBasicBlock::instr_iterator B = MBB.instr_begin(); 163 MachineBasicBlock::instr_iterator E = MBB.instr_end(); 164 165 for (auto I = B; I != E; I = Next) { 166 Next = std::next(I); 167 if (!isBundleCandidate(*I)) 168 continue; 169 170 assert(Defs.empty()); 171 172 if (I->getNumExplicitDefs() != 0) 173 Defs.insert(I->defs().begin()->getReg()); 174 175 MachineBasicBlock::instr_iterator BundleStart = I; 176 MachineBasicBlock::instr_iterator BundleEnd = I; 177 unsigned ClauseLength = 1; 178 for (I = Next; I != E; I = Next) { 179 Next = std::next(I); 180 181 assert(BundleEnd != I); 182 if (canBundle(*BundleEnd, *I)) { 183 BundleEnd = I; 184 if (I->getNumExplicitDefs() != 0) 185 Defs.insert(I->defs().begin()->getReg()); 186 ++ClauseLength; 187 } else if (!I->isMetaInstruction()) { 188 // Allow meta instructions in between bundle candidates, but do not 189 // start or end a bundle on one. 190 // 191 // TODO: It may be better to move meta instructions like dbg_value 192 // after the bundle. We're relying on the memory legalizer to unbundle 193 // these. 194 break; 195 } 196 } 197 198 Next = std::next(BundleEnd); 199 if (ClauseLength > 1) { 200 Changed = true; 201 202 // Before register allocation, kills are inserted after potential soft 203 // clauses to hint register allocation. Look for kills that look like 204 // this, and erase them. 205 if (Next != E && Next->isKill()) { 206 207 // TODO: Should maybe back-propagate kill flags to the bundle. 208 for (const MachineInstr &BundleMI : make_range(BundleStart, Next)) 209 collectUsedRegUnits(BundleMI, BundleUsedRegUnits); 210 211 BundleUsedRegUnits.flip(); 212 213 while (Next != E && Next->isKill()) { 214 MachineInstr &Kill = *Next; 215 collectUsedRegUnits(Kill, KillUsedRegUnits); 216 217 KillUsedRegUnits &= BundleUsedRegUnits; 218 219 // Erase the kill if it's a subset of the used registers. 220 // 221 // TODO: Should we just remove all kills? Is there any real reason to 222 // keep them after RA? 223 if (KillUsedRegUnits.none()) { 224 ++Next; 225 Kill.eraseFromParent(); 226 } else 227 break; 228 229 KillUsedRegUnits.reset(); 230 } 231 232 BundleUsedRegUnits.reset(); 233 } 234 235 finalizeBundle(MBB, BundleStart, Next); 236 } 237 238 Defs.clear(); 239 } 240 } 241 242 return Changed; 243 } 244