xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Load/Store Pairing: It identifies pairs of load or store instructions
10 // operating on consecutive memory locations and merges them into a single
11 // paired instruction, leveraging hardware support for paired memory accesses.
12 // Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
13 //
14 // NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
15 // merging zero store instructions, promoting loads that read directly from a
16 // preceding store, and merging base register updates with load/store
17 // instructions (via pre-/post-indexed addressing). These advanced
18 // transformations are not yet implemented in the RISC-V pass but represent
19 // potential future enhancements for further optimizing RISC-V memory
20 // operations.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "RISCV.h"
25 #include "RISCVTargetMachine.h"
26 #include "llvm/Analysis/AliasAnalysis.h"
27 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/MC/TargetRegistry.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Target/TargetOptions.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "riscv-load-store-opt"
35 #define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer"
36 
37 // The LdStLimit limits number of instructions how far we search for load/store
38 // pairs.
39 static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
40                                    cl::Hidden);
41 
42 namespace {
43 
44 struct RISCVLoadStoreOpt : public MachineFunctionPass {
45   static char ID;
46   bool runOnMachineFunction(MachineFunction &Fn) override;
47 
48   RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
49 
50   MachineFunctionProperties getRequiredProperties() const override {
51     return MachineFunctionProperties().setNoVRegs();
52   }
53 
54   void getAnalysisUsage(AnalysisUsage &AU) const override {
55     AU.addRequired<AAResultsWrapperPass>();
56     MachineFunctionPass::getAnalysisUsage(AU);
57   }
58 
59   StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
60 
61   // Find and pair load/store instructions.
62   bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
63 
64   // Convert load/store pairs to single instructions.
65   bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
66                             MachineBasicBlock::iterator Second);
67 
68   // Scan the instructions looking for a load/store that can be combined
69   // with the current instruction into a load/store pair.
70   // Return the matching instruction if one is found, else MBB->end().
71   MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
72                                                bool &MergeForward);
73 
74   MachineBasicBlock::iterator
75   mergePairedInsns(MachineBasicBlock::iterator I,
76                    MachineBasicBlock::iterator Paired, bool MergeForward);
77 
78 private:
79   AliasAnalysis *AA;
80   MachineRegisterInfo *MRI;
81   const RISCVInstrInfo *TII;
82   const RISCVRegisterInfo *TRI;
83   LiveRegUnits ModifiedRegUnits, UsedRegUnits;
84 };
85 } // end anonymous namespace
86 
87 char RISCVLoadStoreOpt::ID = 0;
88 INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
89                 false)
90 
91 bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
92   if (skipFunction(Fn.getFunction()))
93     return false;
94   const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
95   if (!Subtarget.useLoadStorePairs())
96     return false;
97 
98   bool MadeChange = false;
99   TII = Subtarget.getInstrInfo();
100   TRI = Subtarget.getRegisterInfo();
101   MRI = &Fn.getRegInfo();
102   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
103   ModifiedRegUnits.init(*TRI);
104   UsedRegUnits.init(*TRI);
105 
106   for (MachineBasicBlock &MBB : Fn) {
107     LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
108 
109     for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
110          MBBI != E;) {
111       if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
112           tryToPairLdStInst(MBBI))
113         MadeChange = true;
114       else
115         ++MBBI;
116     }
117   }
118   return MadeChange;
119 }
120 
121 // Find loads and stores that can be merged into a single load or store pair
122 // instruction.
123 bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
124   MachineInstr &MI = *MBBI;
125 
126   // If this is volatile, it is not a candidate.
127   if (MI.hasOrderedMemoryRef())
128     return false;
129 
130   if (!TII->isLdStSafeToPair(MI, TRI))
131     return false;
132 
133   // Look ahead for a pairable instruction.
134   MachineBasicBlock::iterator E = MI.getParent()->end();
135   bool MergeForward;
136   MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
137   if (Paired != E) {
138     MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
139     return true;
140   }
141   return false;
142 }
143 
144 // Merge two adjacent load/store instructions into a paired instruction
145 // (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of
146 // SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the
147 // appropriate paired opcode, verifies that the memory operand is properly
148 // aligned, and checks that the offset is valid. If all conditions are met, it
149 // builds and inserts the paired instruction.
150 bool RISCVLoadStoreOpt::tryConvertToLdStPair(
151     MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
152   unsigned PairOpc;
153   Align RequiredAlignment;
154   switch (First->getOpcode()) {
155   default:
156     llvm_unreachable("Unsupported load/store instruction for pairing");
157   case RISCV::SW:
158     PairOpc = RISCV::MIPS_SWP;
159     RequiredAlignment = Align(8);
160     break;
161   case RISCV::LW:
162     PairOpc = RISCV::MIPS_LWP;
163     RequiredAlignment = Align(8);
164     break;
165   case RISCV::SD:
166     PairOpc = RISCV::MIPS_SDP;
167     RequiredAlignment = Align(16);
168     break;
169   case RISCV::LD:
170     PairOpc = RISCV::MIPS_LDP;
171     RequiredAlignment = Align(16);
172     break;
173   }
174 
175   MachineFunction *MF = First->getMF();
176   const MachineMemOperand *MMO = *First->memoperands_begin();
177   Align MMOAlign = MMO->getAlign();
178 
179   if (MMOAlign < RequiredAlignment)
180     return false;
181 
182   int64_t Offset = First->getOperand(2).getImm();
183   if (!isUInt<7>(Offset))
184     return false;
185 
186   MachineInstrBuilder MIB = BuildMI(
187       *MF, First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(),
188       TII->get(PairOpc));
189   MIB.add(First->getOperand(0))
190       .add(Second->getOperand(0))
191       .add(First->getOperand(1))
192       .add(First->getOperand(2))
193       .cloneMergedMemRefs({&*First, &*Second});
194 
195   First->getParent()->insert(First, MIB);
196 
197   First->removeFromParent();
198   Second->removeFromParent();
199 
200   return true;
201 }
202 
203 static bool mayAlias(MachineInstr &MIa,
204                      SmallVectorImpl<MachineInstr *> &MemInsns,
205                      AliasAnalysis *AA) {
206   for (MachineInstr *MIb : MemInsns)
207     if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
208       return true;
209 
210   return false;
211 }
212 
213 // Scan the instructions looking for a load/store that can be combined with the
214 // current instruction into a wider equivalent or a load/store pair.
215 // TODO: Extend pairing logic to consider reordering both instructions
216 // to a safe "middle" position rather than only merging forward/backward.
217 // This requires more sophisticated checks for aliasing, register
218 // liveness, and potential scheduling hazards.
219 MachineBasicBlock::iterator
220 RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
221                                     bool &MergeForward) {
222   MachineBasicBlock::iterator E = I->getParent()->end();
223   MachineBasicBlock::iterator MBBI = I;
224   MachineInstr &FirstMI = *I;
225   MBBI = next_nodbg(MBBI, E);
226 
227   bool MayLoad = FirstMI.mayLoad();
228   Register Reg = FirstMI.getOperand(0).getReg();
229   Register BaseReg = FirstMI.getOperand(1).getReg();
230   int64_t Offset = FirstMI.getOperand(2).getImm();
231   int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
232 
233   MergeForward = false;
234 
235   // Track which register units have been modified and used between the first
236   // insn (inclusive) and the second insn.
237   ModifiedRegUnits.clear();
238   UsedRegUnits.clear();
239 
240   // Remember any instructions that read/write memory between FirstMI and MI.
241   SmallVector<MachineInstr *, 4> MemInsns;
242 
243   for (unsigned Count = 0; MBBI != E && Count < LdStLimit;
244        MBBI = next_nodbg(MBBI, E)) {
245     MachineInstr &MI = *MBBI;
246 
247     // Don't count transient instructions towards the search limit since there
248     // may be different numbers of them if e.g. debug information is present.
249     if (!MI.isTransient())
250       ++Count;
251 
252     if (MI.getOpcode() == FirstMI.getOpcode() &&
253         TII->isLdStSafeToPair(MI, TRI)) {
254       Register MIBaseReg = MI.getOperand(1).getReg();
255       int64_t MIOffset = MI.getOperand(2).getImm();
256 
257       if (BaseReg == MIBaseReg) {
258         if ((Offset != MIOffset + OffsetStride) &&
259             (Offset + OffsetStride != MIOffset)) {
260           LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
261                                             TRI);
262           MemInsns.push_back(&MI);
263           continue;
264         }
265 
266         // If the destination register of one load is the same register or a
267         // sub/super register of the other load, bail and keep looking.
268         if (MayLoad &&
269             TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) {
270           LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
271                                             TRI);
272           MemInsns.push_back(&MI);
273           continue;
274         }
275 
276         // If the BaseReg has been modified, then we cannot do the optimization.
277         if (!ModifiedRegUnits.available(BaseReg))
278           return E;
279 
280         // If the Rt of the second instruction was not modified or used between
281         // the two instructions and none of the instructions between the second
282         // and first alias with the second, we can combine the second into the
283         // first.
284         if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) &&
285             !(MI.mayLoad() &&
286               !UsedRegUnits.available(MI.getOperand(0).getReg())) &&
287             !mayAlias(MI, MemInsns, AA)) {
288 
289           MergeForward = false;
290           return MBBI;
291         }
292 
293         // Likewise, if the Rt of the first instruction is not modified or used
294         // between the two instructions and none of the instructions between the
295         // first and the second alias with the first, we can combine the first
296         // into the second.
297         if (!(MayLoad &&
298               !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) &&
299             !mayAlias(FirstMI, MemInsns, AA)) {
300 
301           if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) {
302             MergeForward = true;
303             return MBBI;
304           }
305         }
306         // Unable to combine these instructions due to interference in between.
307         // Keep looking.
308       }
309     }
310 
311     // If the instruction wasn't a matching load or store.  Stop searching if we
312     // encounter a call instruction that might modify memory.
313     if (MI.isCall())
314       return E;
315 
316     // Update modified / uses register units.
317     LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
318 
319     // Otherwise, if the base register is modified, we have no match, so
320     // return early.
321     if (!ModifiedRegUnits.available(BaseReg))
322       return E;
323 
324     // Update list of instructions that read/write memory.
325     if (MI.mayLoadOrStore())
326       MemInsns.push_back(&MI);
327   }
328   return E;
329 }
330 
331 MachineBasicBlock::iterator
332 RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
333                                     MachineBasicBlock::iterator Paired,
334                                     bool MergeForward) {
335   MachineBasicBlock::iterator E = I->getParent()->end();
336   MachineBasicBlock::iterator NextI = next_nodbg(I, E);
337   // If NextI is the second of the two instructions to be merged, we need
338   // to skip one further. Either way we merge will invalidate the iterator,
339   // and we don't need to scan the new instruction, as it's a pairwise
340   // instruction, which we're not considering for further action anyway.
341   if (NextI == Paired)
342     NextI = next_nodbg(NextI, E);
343 
344   // Insert our new paired instruction after whichever of the paired
345   // instructions MergeForward indicates.
346   MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
347   MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
348   int Offset = I->getOperand(2).getImm();
349   int PairedOffset = Paired->getOperand(2).getImm();
350   bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
351 
352   if (!MergeForward)
353     Paired->getOperand(1).setIsKill(false);
354 
355   // Kill flags may become invalid when moving stores for pairing.
356   if (I->getOperand(0).isUse()) {
357     if (!MergeForward) {
358       // Check if the Paired store's source register has a kill flag and clear
359       // it only if there are intermediate uses between I and Paired.
360       MachineOperand &PairedRegOp = Paired->getOperand(0);
361       if (PairedRegOp.isKill()) {
362         for (auto It = std::next(I); It != Paired; ++It) {
363           if (It->readsRegister(PairedRegOp.getReg(), TRI)) {
364             PairedRegOp.setIsKill(false);
365             break;
366           }
367         }
368       }
369     } else {
370       // Clear kill flags of the first store's register in the forward
371       // direction.
372       Register Reg = I->getOperand(0).getReg();
373       for (MachineInstr &MI : make_range(std::next(I), std::next(Paired)))
374         MI.clearRegisterKills(Reg, TRI);
375     }
376   }
377 
378   MachineInstr *ToInsert = DeletionPoint->removeFromParent();
379   MachineBasicBlock &MBB = *InsertionPoint->getParent();
380   MachineBasicBlock::iterator First, Second;
381 
382   if (!InsertAfter) {
383     First = MBB.insert(InsertionPoint, ToInsert);
384     Second = InsertionPoint;
385   } else {
386     Second = MBB.insertAfter(InsertionPoint, ToInsert);
387     First = InsertionPoint;
388   }
389 
390   if (tryConvertToLdStPair(First, Second)) {
391     LLVM_DEBUG(dbgs() << "Pairing load/store:\n    ");
392     LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
393   }
394 
395   return NextI;
396 }
397 
398 // Returns an instance of the Load / Store Optimization pass.
399 FunctionPass *llvm::createRISCVLoadStoreOptPass() {
400   return new RISCVLoadStoreOpt();
401 }
402