1 //===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Load/Store Pairing: It identifies pairs of load or store instructions 10 // operating on consecutive memory locations and merges them into a single 11 // paired instruction, leveraging hardware support for paired memory accesses. 12 // Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass. 13 // 14 // NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as 15 // merging zero store instructions, promoting loads that read directly from a 16 // preceding store, and merging base register updates with load/store 17 // instructions (via pre-/post-indexed addressing). These advanced 18 // transformations are not yet implemented in the RISC-V pass but represent 19 // potential future enhancements for further optimizing RISC-V memory 20 // operations. 21 // 22 //===----------------------------------------------------------------------===// 23 24 #include "RISCV.h" 25 #include "RISCVTargetMachine.h" 26 #include "llvm/Analysis/AliasAnalysis.h" 27 #include "llvm/CodeGen/Passes.h" 28 #include "llvm/MC/TargetRegistry.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Target/TargetOptions.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "riscv-load-store-opt" 35 #define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer" 36 37 // The LdStLimit limits number of instructions how far we search for load/store 38 // pairs. 39 static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128), 40 cl::Hidden); 41 42 namespace { 43 44 struct RISCVLoadStoreOpt : public MachineFunctionPass { 45 static char ID; 46 bool runOnMachineFunction(MachineFunction &Fn) override; 47 48 RISCVLoadStoreOpt() : MachineFunctionPass(ID) {} 49 50 MachineFunctionProperties getRequiredProperties() const override { 51 return MachineFunctionProperties().setNoVRegs(); 52 } 53 54 void getAnalysisUsage(AnalysisUsage &AU) const override { 55 AU.addRequired<AAResultsWrapperPass>(); 56 MachineFunctionPass::getAnalysisUsage(AU); 57 } 58 59 StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; } 60 61 // Find and pair load/store instructions. 62 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); 63 64 // Convert load/store pairs to single instructions. 65 bool tryConvertToLdStPair(MachineBasicBlock::iterator First, 66 MachineBasicBlock::iterator Second); 67 68 // Scan the instructions looking for a load/store that can be combined 69 // with the current instruction into a load/store pair. 70 // Return the matching instruction if one is found, else MBB->end(). 71 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, 72 bool &MergeForward); 73 74 MachineBasicBlock::iterator 75 mergePairedInsns(MachineBasicBlock::iterator I, 76 MachineBasicBlock::iterator Paired, bool MergeForward); 77 78 private: 79 AliasAnalysis *AA; 80 MachineRegisterInfo *MRI; 81 const RISCVInstrInfo *TII; 82 const RISCVRegisterInfo *TRI; 83 LiveRegUnits ModifiedRegUnits, UsedRegUnits; 84 }; 85 } // end anonymous namespace 86 87 char RISCVLoadStoreOpt::ID = 0; 88 INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, 89 false) 90 91 bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 92 if (skipFunction(Fn.getFunction())) 93 return false; 94 const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>(); 95 if (!Subtarget.useLoadStorePairs()) 96 return false; 97 98 bool MadeChange = false; 99 TII = Subtarget.getInstrInfo(); 100 TRI = Subtarget.getRegisterInfo(); 101 MRI = &Fn.getRegInfo(); 102 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 103 ModifiedRegUnits.init(*TRI); 104 UsedRegUnits.init(*TRI); 105 106 for (MachineBasicBlock &MBB : Fn) { 107 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); 108 109 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 110 MBBI != E;) { 111 if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && 112 tryToPairLdStInst(MBBI)) 113 MadeChange = true; 114 else 115 ++MBBI; 116 } 117 } 118 return MadeChange; 119 } 120 121 // Find loads and stores that can be merged into a single load or store pair 122 // instruction. 123 bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { 124 MachineInstr &MI = *MBBI; 125 126 // If this is volatile, it is not a candidate. 127 if (MI.hasOrderedMemoryRef()) 128 return false; 129 130 if (!TII->isLdStSafeToPair(MI, TRI)) 131 return false; 132 133 // Look ahead for a pairable instruction. 134 MachineBasicBlock::iterator E = MI.getParent()->end(); 135 bool MergeForward; 136 MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward); 137 if (Paired != E) { 138 MBBI = mergePairedInsns(MBBI, Paired, MergeForward); 139 return true; 140 } 141 return false; 142 } 143 144 // Merge two adjacent load/store instructions into a paired instruction 145 // (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of 146 // SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the 147 // appropriate paired opcode, verifies that the memory operand is properly 148 // aligned, and checks that the offset is valid. If all conditions are met, it 149 // builds and inserts the paired instruction. 150 bool RISCVLoadStoreOpt::tryConvertToLdStPair( 151 MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { 152 unsigned PairOpc; 153 Align RequiredAlignment; 154 switch (First->getOpcode()) { 155 default: 156 llvm_unreachable("Unsupported load/store instruction for pairing"); 157 case RISCV::SW: 158 PairOpc = RISCV::MIPS_SWP; 159 RequiredAlignment = Align(8); 160 break; 161 case RISCV::LW: 162 PairOpc = RISCV::MIPS_LWP; 163 RequiredAlignment = Align(8); 164 break; 165 case RISCV::SD: 166 PairOpc = RISCV::MIPS_SDP; 167 RequiredAlignment = Align(16); 168 break; 169 case RISCV::LD: 170 PairOpc = RISCV::MIPS_LDP; 171 RequiredAlignment = Align(16); 172 break; 173 } 174 175 MachineFunction *MF = First->getMF(); 176 const MachineMemOperand *MMO = *First->memoperands_begin(); 177 Align MMOAlign = MMO->getAlign(); 178 179 if (MMOAlign < RequiredAlignment) 180 return false; 181 182 int64_t Offset = First->getOperand(2).getImm(); 183 if (!isUInt<7>(Offset)) 184 return false; 185 186 MachineInstrBuilder MIB = BuildMI( 187 *MF, First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(), 188 TII->get(PairOpc)); 189 MIB.add(First->getOperand(0)) 190 .add(Second->getOperand(0)) 191 .add(First->getOperand(1)) 192 .add(First->getOperand(2)) 193 .cloneMergedMemRefs({&*First, &*Second}); 194 195 First->getParent()->insert(First, MIB); 196 197 First->removeFromParent(); 198 Second->removeFromParent(); 199 200 return true; 201 } 202 203 static bool mayAlias(MachineInstr &MIa, 204 SmallVectorImpl<MachineInstr *> &MemInsns, 205 AliasAnalysis *AA) { 206 for (MachineInstr *MIb : MemInsns) 207 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) 208 return true; 209 210 return false; 211 } 212 213 // Scan the instructions looking for a load/store that can be combined with the 214 // current instruction into a wider equivalent or a load/store pair. 215 // TODO: Extend pairing logic to consider reordering both instructions 216 // to a safe "middle" position rather than only merging forward/backward. 217 // This requires more sophisticated checks for aliasing, register 218 // liveness, and potential scheduling hazards. 219 MachineBasicBlock::iterator 220 RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, 221 bool &MergeForward) { 222 MachineBasicBlock::iterator E = I->getParent()->end(); 223 MachineBasicBlock::iterator MBBI = I; 224 MachineInstr &FirstMI = *I; 225 MBBI = next_nodbg(MBBI, E); 226 227 bool MayLoad = FirstMI.mayLoad(); 228 Register Reg = FirstMI.getOperand(0).getReg(); 229 Register BaseReg = FirstMI.getOperand(1).getReg(); 230 int64_t Offset = FirstMI.getOperand(2).getImm(); 231 int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue(); 232 233 MergeForward = false; 234 235 // Track which register units have been modified and used between the first 236 // insn (inclusive) and the second insn. 237 ModifiedRegUnits.clear(); 238 UsedRegUnits.clear(); 239 240 // Remember any instructions that read/write memory between FirstMI and MI. 241 SmallVector<MachineInstr *, 4> MemInsns; 242 243 for (unsigned Count = 0; MBBI != E && Count < LdStLimit; 244 MBBI = next_nodbg(MBBI, E)) { 245 MachineInstr &MI = *MBBI; 246 247 // Don't count transient instructions towards the search limit since there 248 // may be different numbers of them if e.g. debug information is present. 249 if (!MI.isTransient()) 250 ++Count; 251 252 if (MI.getOpcode() == FirstMI.getOpcode() && 253 TII->isLdStSafeToPair(MI, TRI)) { 254 Register MIBaseReg = MI.getOperand(1).getReg(); 255 int64_t MIOffset = MI.getOperand(2).getImm(); 256 257 if (BaseReg == MIBaseReg) { 258 if ((Offset != MIOffset + OffsetStride) && 259 (Offset + OffsetStride != MIOffset)) { 260 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, 261 TRI); 262 MemInsns.push_back(&MI); 263 continue; 264 } 265 266 // If the destination register of one load is the same register or a 267 // sub/super register of the other load, bail and keep looking. 268 if (MayLoad && 269 TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) { 270 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, 271 TRI); 272 MemInsns.push_back(&MI); 273 continue; 274 } 275 276 // If the BaseReg has been modified, then we cannot do the optimization. 277 if (!ModifiedRegUnits.available(BaseReg)) 278 return E; 279 280 // If the Rt of the second instruction was not modified or used between 281 // the two instructions and none of the instructions between the second 282 // and first alias with the second, we can combine the second into the 283 // first. 284 if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) && 285 !(MI.mayLoad() && 286 !UsedRegUnits.available(MI.getOperand(0).getReg())) && 287 !mayAlias(MI, MemInsns, AA)) { 288 289 MergeForward = false; 290 return MBBI; 291 } 292 293 // Likewise, if the Rt of the first instruction is not modified or used 294 // between the two instructions and none of the instructions between the 295 // first and the second alias with the first, we can combine the first 296 // into the second. 297 if (!(MayLoad && 298 !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) && 299 !mayAlias(FirstMI, MemInsns, AA)) { 300 301 if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) { 302 MergeForward = true; 303 return MBBI; 304 } 305 } 306 // Unable to combine these instructions due to interference in between. 307 // Keep looking. 308 } 309 } 310 311 // If the instruction wasn't a matching load or store. Stop searching if we 312 // encounter a call instruction that might modify memory. 313 if (MI.isCall()) 314 return E; 315 316 // Update modified / uses register units. 317 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); 318 319 // Otherwise, if the base register is modified, we have no match, so 320 // return early. 321 if (!ModifiedRegUnits.available(BaseReg)) 322 return E; 323 324 // Update list of instructions that read/write memory. 325 if (MI.mayLoadOrStore()) 326 MemInsns.push_back(&MI); 327 } 328 return E; 329 } 330 331 MachineBasicBlock::iterator 332 RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, 333 MachineBasicBlock::iterator Paired, 334 bool MergeForward) { 335 MachineBasicBlock::iterator E = I->getParent()->end(); 336 MachineBasicBlock::iterator NextI = next_nodbg(I, E); 337 // If NextI is the second of the two instructions to be merged, we need 338 // to skip one further. Either way we merge will invalidate the iterator, 339 // and we don't need to scan the new instruction, as it's a pairwise 340 // instruction, which we're not considering for further action anyway. 341 if (NextI == Paired) 342 NextI = next_nodbg(NextI, E); 343 344 // Insert our new paired instruction after whichever of the paired 345 // instructions MergeForward indicates. 346 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; 347 MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired; 348 int Offset = I->getOperand(2).getImm(); 349 int PairedOffset = Paired->getOperand(2).getImm(); 350 bool InsertAfter = (Offset < PairedOffset) ^ MergeForward; 351 352 if (!MergeForward) 353 Paired->getOperand(1).setIsKill(false); 354 355 // Kill flags may become invalid when moving stores for pairing. 356 if (I->getOperand(0).isUse()) { 357 if (!MergeForward) { 358 // Check if the Paired store's source register has a kill flag and clear 359 // it only if there are intermediate uses between I and Paired. 360 MachineOperand &PairedRegOp = Paired->getOperand(0); 361 if (PairedRegOp.isKill()) { 362 for (auto It = std::next(I); It != Paired; ++It) { 363 if (It->readsRegister(PairedRegOp.getReg(), TRI)) { 364 PairedRegOp.setIsKill(false); 365 break; 366 } 367 } 368 } 369 } else { 370 // Clear kill flags of the first store's register in the forward 371 // direction. 372 Register Reg = I->getOperand(0).getReg(); 373 for (MachineInstr &MI : make_range(std::next(I), std::next(Paired))) 374 MI.clearRegisterKills(Reg, TRI); 375 } 376 } 377 378 MachineInstr *ToInsert = DeletionPoint->removeFromParent(); 379 MachineBasicBlock &MBB = *InsertionPoint->getParent(); 380 MachineBasicBlock::iterator First, Second; 381 382 if (!InsertAfter) { 383 First = MBB.insert(InsertionPoint, ToInsert); 384 Second = InsertionPoint; 385 } else { 386 Second = MBB.insertAfter(InsertionPoint, ToInsert); 387 First = InsertionPoint; 388 } 389 390 if (tryConvertToLdStPair(First, Second)) { 391 LLVM_DEBUG(dbgs() << "Pairing load/store:\n "); 392 LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs())); 393 } 394 395 return NextI; 396 } 397 398 // Returns an instance of the Load / Store Optimization pass. 399 FunctionPass *llvm::createRISCVLoadStoreOptPass() { 400 return new RISCVLoadStoreOpt(); 401 } 402