1 //===-- SIPostRABundler.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass creates bundles of memory instructions to protect adjacent loads 11 /// and stores from being rescheduled apart from each other post-RA. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "GCNSubtarget.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/CodeGen/MachineFunctionPass.h" 19 20 using namespace llvm; 21 22 #define DEBUG_TYPE "si-post-ra-bundler" 23 24 namespace { 25 26 class SIPostRABundler : public MachineFunctionPass { 27 public: 28 static char ID; 29 30 public: 31 SIPostRABundler() : MachineFunctionPass(ID) { 32 initializeSIPostRABundlerPass(*PassRegistry::getPassRegistry()); 33 } 34 35 bool runOnMachineFunction(MachineFunction &MF) override; 36 37 StringRef getPassName() const override { 38 return "SI post-RA bundler"; 39 } 40 41 void getAnalysisUsage(AnalysisUsage &AU) const override { 42 AU.setPreservesAll(); 43 MachineFunctionPass::getAnalysisUsage(AU); 44 } 45 46 private: 47 const SIRegisterInfo *TRI; 48 49 SmallSet<Register, 16> Defs; 50 51 void collectUsedRegUnits(const MachineInstr &MI, 52 BitVector &UsedRegUnits) const; 53 54 bool isBundleCandidate(const MachineInstr &MI) const; 55 bool isDependentLoad(const MachineInstr &MI) const; 56 bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const; 57 }; 58 59 constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF | 60 SIInstrFlags::SMRD | SIInstrFlags::DS | 61 SIInstrFlags::FLAT | SIInstrFlags::MIMG; 62 63 } // End anonymous namespace. 64 65 INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler", false, false) 66 67 char SIPostRABundler::ID = 0; 68 69 char &llvm::SIPostRABundlerID = SIPostRABundler::ID; 70 71 FunctionPass *llvm::createSIPostRABundlerPass() { 72 return new SIPostRABundler(); 73 } 74 75 bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const { 76 if (!MI.mayLoad()) 77 return false; 78 79 for (const MachineOperand &Op : MI.explicit_operands()) { 80 if (!Op.isReg()) 81 continue; 82 Register Reg = Op.getReg(); 83 for (Register Def : Defs) 84 if (TRI->regsOverlap(Reg, Def)) 85 return true; 86 } 87 88 return false; 89 } 90 91 void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI, 92 BitVector &UsedRegUnits) const { 93 if (MI.isDebugInstr()) 94 return; 95 96 for (const MachineOperand &Op : MI.operands()) { 97 if (!Op.isReg() || !Op.readsReg()) 98 continue; 99 100 Register Reg = Op.getReg(); 101 assert(!Op.getSubReg() && 102 "subregister indexes should not be present after RA"); 103 104 for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) 105 UsedRegUnits.set(*Units); 106 } 107 } 108 109 bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const { 110 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; 111 return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled(); 112 } 113 114 bool SIPostRABundler::canBundle(const MachineInstr &MI, 115 const MachineInstr &NextMI) const { 116 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; 117 118 return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() && 119 NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() && 120 ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) && 121 !isDependentLoad(NextMI)); 122 } 123 124 bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) { 125 if (skipFunction(MF.getFunction())) 126 return false; 127 128 TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 129 BitVector BundleUsedRegUnits(TRI->getNumRegUnits()); 130 BitVector KillUsedRegUnits(TRI->getNumRegUnits()); 131 132 bool Changed = false; 133 for (MachineBasicBlock &MBB : MF) { 134 bool HasIGLPInstrs = llvm::any_of(MBB.instrs(), [](MachineInstr &MI) { 135 unsigned Opc = MI.getOpcode(); 136 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT; 137 }); 138 139 // Don't cluster with IGLP instructions. 140 if (HasIGLPInstrs) 141 continue; 142 143 MachineBasicBlock::instr_iterator Next; 144 MachineBasicBlock::instr_iterator B = MBB.instr_begin(); 145 MachineBasicBlock::instr_iterator E = MBB.instr_end(); 146 147 for (auto I = B; I != E; I = Next) { 148 Next = std::next(I); 149 if (!isBundleCandidate(*I)) 150 continue; 151 152 assert(Defs.empty()); 153 154 if (I->getNumExplicitDefs() != 0) 155 Defs.insert(I->defs().begin()->getReg()); 156 157 MachineBasicBlock::instr_iterator BundleStart = I; 158 MachineBasicBlock::instr_iterator BundleEnd = I; 159 unsigned ClauseLength = 1; 160 for (I = Next; I != E; I = Next) { 161 Next = std::next(I); 162 163 assert(BundleEnd != I); 164 if (canBundle(*BundleEnd, *I)) { 165 BundleEnd = I; 166 if (I->getNumExplicitDefs() != 0) 167 Defs.insert(I->defs().begin()->getReg()); 168 ++ClauseLength; 169 } else if (!I->isMetaInstruction()) { 170 // Allow meta instructions in between bundle candidates, but do not 171 // start or end a bundle on one. 172 // 173 // TODO: It may be better to move meta instructions like dbg_value 174 // after the bundle. We're relying on the memory legalizer to unbundle 175 // these. 176 break; 177 } 178 } 179 180 Next = std::next(BundleEnd); 181 if (ClauseLength > 1) { 182 Changed = true; 183 184 // Before register allocation, kills are inserted after potential soft 185 // clauses to hint register allocation. Look for kills that look like 186 // this, and erase them. 187 if (Next != E && Next->isKill()) { 188 189 // TODO: Should maybe back-propagate kill flags to the bundle. 190 for (const MachineInstr &BundleMI : make_range(BundleStart, Next)) 191 collectUsedRegUnits(BundleMI, BundleUsedRegUnits); 192 193 BundleUsedRegUnits.flip(); 194 195 while (Next != E && Next->isKill()) { 196 MachineInstr &Kill = *Next; 197 collectUsedRegUnits(Kill, KillUsedRegUnits); 198 199 KillUsedRegUnits &= BundleUsedRegUnits; 200 201 // Erase the kill if it's a subset of the used registers. 202 // 203 // TODO: Should we just remove all kills? Is there any real reason to 204 // keep them after RA? 205 if (KillUsedRegUnits.none()) { 206 ++Next; 207 Kill.eraseFromParent(); 208 } else 209 break; 210 211 KillUsedRegUnits.reset(); 212 } 213 214 BundleUsedRegUnits.reset(); 215 } 216 217 finalizeBundle(MBB, BundleStart, Next); 218 } 219 220 Defs.clear(); 221 } 222 } 223 224 return Changed; 225 } 226