1*700637cbSDimitry Andric //===- RISCVFoldMemOffset.cpp - Fold ADDI into memory offsets ------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===---------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // Look for ADDIs that can be removed by folding their immediate into later
10*700637cbSDimitry Andric // load/store addresses. There may be other arithmetic instructions between the
11*700637cbSDimitry Andric // addi and load/store that we need to reassociate through. If the final result
12*700637cbSDimitry Andric // of the arithmetic is only used by load/store addresses, we can fold the
13*700637cbSDimitry Andric // offset into the all the load/store as long as it doesn't create an offset
14*700637cbSDimitry Andric // that is too large.
15*700637cbSDimitry Andric //
16*700637cbSDimitry Andric //===---------------------------------------------------------------------===//
17*700637cbSDimitry Andric
18*700637cbSDimitry Andric #include "RISCV.h"
19*700637cbSDimitry Andric #include "RISCVSubtarget.h"
20*700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
21*700637cbSDimitry Andric #include <queue>
22*700637cbSDimitry Andric
23*700637cbSDimitry Andric using namespace llvm;
24*700637cbSDimitry Andric
25*700637cbSDimitry Andric #define DEBUG_TYPE "riscv-fold-mem-offset"
26*700637cbSDimitry Andric #define RISCV_FOLD_MEM_OFFSET_NAME "RISC-V Fold Memory Offset"
27*700637cbSDimitry Andric
28*700637cbSDimitry Andric namespace {
29*700637cbSDimitry Andric
30*700637cbSDimitry Andric class RISCVFoldMemOffset : public MachineFunctionPass {
31*700637cbSDimitry Andric public:
32*700637cbSDimitry Andric static char ID;
33*700637cbSDimitry Andric
RISCVFoldMemOffset()34*700637cbSDimitry Andric RISCVFoldMemOffset() : MachineFunctionPass(ID) {}
35*700637cbSDimitry Andric
36*700637cbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
37*700637cbSDimitry Andric
38*700637cbSDimitry Andric bool foldOffset(Register OrigReg, int64_t InitialOffset,
39*700637cbSDimitry Andric const MachineRegisterInfo &MRI,
40*700637cbSDimitry Andric DenseMap<MachineInstr *, int64_t> &FoldableInstrs);
41*700637cbSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const42*700637cbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
43*700637cbSDimitry Andric AU.setPreservesCFG();
44*700637cbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
45*700637cbSDimitry Andric }
46*700637cbSDimitry Andric
getPassName() const47*700637cbSDimitry Andric StringRef getPassName() const override { return RISCV_FOLD_MEM_OFFSET_NAME; }
48*700637cbSDimitry Andric };
49*700637cbSDimitry Andric
50*700637cbSDimitry Andric // Wrapper class around a std::optional to allow accumulation.
51*700637cbSDimitry Andric class FoldableOffset {
52*700637cbSDimitry Andric std::optional<int64_t> Offset;
53*700637cbSDimitry Andric
54*700637cbSDimitry Andric public:
hasValue() const55*700637cbSDimitry Andric bool hasValue() const { return Offset.has_value(); }
getValue() const56*700637cbSDimitry Andric int64_t getValue() const { return *Offset; }
57*700637cbSDimitry Andric
operator =(int64_t RHS)58*700637cbSDimitry Andric FoldableOffset &operator=(int64_t RHS) {
59*700637cbSDimitry Andric Offset = RHS;
60*700637cbSDimitry Andric return *this;
61*700637cbSDimitry Andric }
62*700637cbSDimitry Andric
operator +=(int64_t RHS)63*700637cbSDimitry Andric FoldableOffset &operator+=(int64_t RHS) {
64*700637cbSDimitry Andric if (!Offset)
65*700637cbSDimitry Andric Offset = 0;
66*700637cbSDimitry Andric Offset = (uint64_t)*Offset + (uint64_t)RHS;
67*700637cbSDimitry Andric return *this;
68*700637cbSDimitry Andric }
69*700637cbSDimitry Andric
operator *()70*700637cbSDimitry Andric int64_t operator*() { return *Offset; }
71*700637cbSDimitry Andric };
72*700637cbSDimitry Andric
73*700637cbSDimitry Andric } // end anonymous namespace
74*700637cbSDimitry Andric
75*700637cbSDimitry Andric char RISCVFoldMemOffset::ID = 0;
INITIALIZE_PASS(RISCVFoldMemOffset,DEBUG_TYPE,RISCV_FOLD_MEM_OFFSET_NAME,false,false)76*700637cbSDimitry Andric INITIALIZE_PASS(RISCVFoldMemOffset, DEBUG_TYPE, RISCV_FOLD_MEM_OFFSET_NAME,
77*700637cbSDimitry Andric false, false)
78*700637cbSDimitry Andric
79*700637cbSDimitry Andric FunctionPass *llvm::createRISCVFoldMemOffsetPass() {
80*700637cbSDimitry Andric return new RISCVFoldMemOffset();
81*700637cbSDimitry Andric }
82*700637cbSDimitry Andric
83*700637cbSDimitry Andric // Walk forward from the ADDI looking for arithmetic instructions we can
84*700637cbSDimitry Andric // analyze or memory instructions that use it as part of their address
85*700637cbSDimitry Andric // calculation. For each arithmetic instruction we lookup how the offset
86*700637cbSDimitry Andric // contributes to the value in that register use that information to
87*700637cbSDimitry Andric // calculate the contribution to the output of this instruction.
88*700637cbSDimitry Andric // Only addition and left shift are supported.
89*700637cbSDimitry Andric // FIXME: Add multiplication by constant. The constant will be in a register.
foldOffset(Register OrigReg,int64_t InitialOffset,const MachineRegisterInfo & MRI,DenseMap<MachineInstr *,int64_t> & FoldableInstrs)90*700637cbSDimitry Andric bool RISCVFoldMemOffset::foldOffset(
91*700637cbSDimitry Andric Register OrigReg, int64_t InitialOffset, const MachineRegisterInfo &MRI,
92*700637cbSDimitry Andric DenseMap<MachineInstr *, int64_t> &FoldableInstrs) {
93*700637cbSDimitry Andric // Map to hold how much the offset contributes to the value of this register.
94*700637cbSDimitry Andric DenseMap<Register, int64_t> RegToOffsetMap;
95*700637cbSDimitry Andric
96*700637cbSDimitry Andric // Insert root offset into the map.
97*700637cbSDimitry Andric RegToOffsetMap[OrigReg] = InitialOffset;
98*700637cbSDimitry Andric
99*700637cbSDimitry Andric std::queue<Register> Worklist;
100*700637cbSDimitry Andric Worklist.push(OrigReg);
101*700637cbSDimitry Andric
102*700637cbSDimitry Andric while (!Worklist.empty()) {
103*700637cbSDimitry Andric Register Reg = Worklist.front();
104*700637cbSDimitry Andric Worklist.pop();
105*700637cbSDimitry Andric
106*700637cbSDimitry Andric if (!Reg.isVirtual())
107*700637cbSDimitry Andric return false;
108*700637cbSDimitry Andric
109*700637cbSDimitry Andric for (auto &User : MRI.use_nodbg_instructions(Reg)) {
110*700637cbSDimitry Andric FoldableOffset Offset;
111*700637cbSDimitry Andric
112*700637cbSDimitry Andric switch (User.getOpcode()) {
113*700637cbSDimitry Andric default:
114*700637cbSDimitry Andric return false;
115*700637cbSDimitry Andric case RISCV::ADD:
116*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
117*700637cbSDimitry Andric I != RegToOffsetMap.end())
118*700637cbSDimitry Andric Offset = I->second;
119*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
120*700637cbSDimitry Andric I != RegToOffsetMap.end())
121*700637cbSDimitry Andric Offset += I->second;
122*700637cbSDimitry Andric break;
123*700637cbSDimitry Andric case RISCV::SH1ADD:
124*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
125*700637cbSDimitry Andric I != RegToOffsetMap.end())
126*700637cbSDimitry Andric Offset = (uint64_t)I->second << 1;
127*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
128*700637cbSDimitry Andric I != RegToOffsetMap.end())
129*700637cbSDimitry Andric Offset += I->second;
130*700637cbSDimitry Andric break;
131*700637cbSDimitry Andric case RISCV::SH2ADD:
132*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
133*700637cbSDimitry Andric I != RegToOffsetMap.end())
134*700637cbSDimitry Andric Offset = (uint64_t)I->second << 2;
135*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
136*700637cbSDimitry Andric I != RegToOffsetMap.end())
137*700637cbSDimitry Andric Offset += I->second;
138*700637cbSDimitry Andric break;
139*700637cbSDimitry Andric case RISCV::SH3ADD:
140*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
141*700637cbSDimitry Andric I != RegToOffsetMap.end())
142*700637cbSDimitry Andric Offset = (uint64_t)I->second << 3;
143*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
144*700637cbSDimitry Andric I != RegToOffsetMap.end())
145*700637cbSDimitry Andric Offset += I->second;
146*700637cbSDimitry Andric break;
147*700637cbSDimitry Andric case RISCV::ADD_UW:
148*700637cbSDimitry Andric case RISCV::SH1ADD_UW:
149*700637cbSDimitry Andric case RISCV::SH2ADD_UW:
150*700637cbSDimitry Andric case RISCV::SH3ADD_UW:
151*700637cbSDimitry Andric // Don't fold through the zero extended input.
152*700637cbSDimitry Andric if (User.getOperand(1).getReg() == Reg)
153*700637cbSDimitry Andric return false;
154*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
155*700637cbSDimitry Andric I != RegToOffsetMap.end())
156*700637cbSDimitry Andric Offset = I->second;
157*700637cbSDimitry Andric break;
158*700637cbSDimitry Andric case RISCV::SLLI: {
159*700637cbSDimitry Andric unsigned ShAmt = User.getOperand(2).getImm();
160*700637cbSDimitry Andric if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
161*700637cbSDimitry Andric I != RegToOffsetMap.end())
162*700637cbSDimitry Andric Offset = (uint64_t)I->second << ShAmt;
163*700637cbSDimitry Andric break;
164*700637cbSDimitry Andric }
165*700637cbSDimitry Andric case RISCV::LB:
166*700637cbSDimitry Andric case RISCV::LBU:
167*700637cbSDimitry Andric case RISCV::SB:
168*700637cbSDimitry Andric case RISCV::LH:
169*700637cbSDimitry Andric case RISCV::LH_INX:
170*700637cbSDimitry Andric case RISCV::LHU:
171*700637cbSDimitry Andric case RISCV::FLH:
172*700637cbSDimitry Andric case RISCV::SH:
173*700637cbSDimitry Andric case RISCV::SH_INX:
174*700637cbSDimitry Andric case RISCV::FSH:
175*700637cbSDimitry Andric case RISCV::LW:
176*700637cbSDimitry Andric case RISCV::LW_INX:
177*700637cbSDimitry Andric case RISCV::LWU:
178*700637cbSDimitry Andric case RISCV::FLW:
179*700637cbSDimitry Andric case RISCV::SW:
180*700637cbSDimitry Andric case RISCV::SW_INX:
181*700637cbSDimitry Andric case RISCV::FSW:
182*700637cbSDimitry Andric case RISCV::LD:
183*700637cbSDimitry Andric case RISCV::FLD:
184*700637cbSDimitry Andric case RISCV::SD:
185*700637cbSDimitry Andric case RISCV::FSD: {
186*700637cbSDimitry Andric // Can't fold into store value.
187*700637cbSDimitry Andric if (User.getOperand(0).getReg() == Reg)
188*700637cbSDimitry Andric return false;
189*700637cbSDimitry Andric
190*700637cbSDimitry Andric // Existing offset must be immediate.
191*700637cbSDimitry Andric if (!User.getOperand(2).isImm())
192*700637cbSDimitry Andric return false;
193*700637cbSDimitry Andric
194*700637cbSDimitry Andric // Require at least one operation between the ADDI and the load/store.
195*700637cbSDimitry Andric // We have other optimizations that should handle the simple case.
196*700637cbSDimitry Andric if (User.getOperand(1).getReg() == OrigReg)
197*700637cbSDimitry Andric return false;
198*700637cbSDimitry Andric
199*700637cbSDimitry Andric auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
200*700637cbSDimitry Andric if (I == RegToOffsetMap.end())
201*700637cbSDimitry Andric return false;
202*700637cbSDimitry Andric
203*700637cbSDimitry Andric int64_t LocalOffset = User.getOperand(2).getImm();
204*700637cbSDimitry Andric assert(isInt<12>(LocalOffset));
205*700637cbSDimitry Andric int64_t CombinedOffset = (uint64_t)LocalOffset + (uint64_t)I->second;
206*700637cbSDimitry Andric if (!isInt<12>(CombinedOffset))
207*700637cbSDimitry Andric return false;
208*700637cbSDimitry Andric
209*700637cbSDimitry Andric FoldableInstrs[&User] = CombinedOffset;
210*700637cbSDimitry Andric continue;
211*700637cbSDimitry Andric }
212*700637cbSDimitry Andric }
213*700637cbSDimitry Andric
214*700637cbSDimitry Andric // If we reach here we should have an accumulated offset.
215*700637cbSDimitry Andric assert(Offset.hasValue() && "Expected an offset");
216*700637cbSDimitry Andric
217*700637cbSDimitry Andric // If the offset is new or changed, add the destination register to the
218*700637cbSDimitry Andric // work list.
219*700637cbSDimitry Andric int64_t OffsetVal = Offset.getValue();
220*700637cbSDimitry Andric auto P =
221*700637cbSDimitry Andric RegToOffsetMap.try_emplace(User.getOperand(0).getReg(), OffsetVal);
222*700637cbSDimitry Andric if (P.second) {
223*700637cbSDimitry Andric Worklist.push(User.getOperand(0).getReg());
224*700637cbSDimitry Andric } else if (P.first->second != OffsetVal) {
225*700637cbSDimitry Andric P.first->second = OffsetVal;
226*700637cbSDimitry Andric Worklist.push(User.getOperand(0).getReg());
227*700637cbSDimitry Andric }
228*700637cbSDimitry Andric }
229*700637cbSDimitry Andric }
230*700637cbSDimitry Andric
231*700637cbSDimitry Andric return true;
232*700637cbSDimitry Andric }
233*700637cbSDimitry Andric
runOnMachineFunction(MachineFunction & MF)234*700637cbSDimitry Andric bool RISCVFoldMemOffset::runOnMachineFunction(MachineFunction &MF) {
235*700637cbSDimitry Andric if (skipFunction(MF.getFunction()))
236*700637cbSDimitry Andric return false;
237*700637cbSDimitry Andric
238*700637cbSDimitry Andric // This optimization may increase size by preventing compression.
239*700637cbSDimitry Andric if (MF.getFunction().hasOptSize())
240*700637cbSDimitry Andric return false;
241*700637cbSDimitry Andric
242*700637cbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
243*700637cbSDimitry Andric
244*700637cbSDimitry Andric bool MadeChange = false;
245*700637cbSDimitry Andric for (MachineBasicBlock &MBB : MF) {
246*700637cbSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
247*700637cbSDimitry Andric // FIXME: We can support ADDIW from an LUI+ADDIW pair if the result is
248*700637cbSDimitry Andric // equivalent to LUI+ADDI.
249*700637cbSDimitry Andric if (MI.getOpcode() != RISCV::ADDI)
250*700637cbSDimitry Andric continue;
251*700637cbSDimitry Andric
252*700637cbSDimitry Andric // We only want to optimize register ADDIs.
253*700637cbSDimitry Andric if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
254*700637cbSDimitry Andric continue;
255*700637cbSDimitry Andric
256*700637cbSDimitry Andric // Ignore 'li'.
257*700637cbSDimitry Andric if (MI.getOperand(1).getReg() == RISCV::X0)
258*700637cbSDimitry Andric continue;
259*700637cbSDimitry Andric
260*700637cbSDimitry Andric int64_t Offset = MI.getOperand(2).getImm();
261*700637cbSDimitry Andric assert(isInt<12>(Offset));
262*700637cbSDimitry Andric
263*700637cbSDimitry Andric DenseMap<MachineInstr *, int64_t> FoldableInstrs;
264*700637cbSDimitry Andric
265*700637cbSDimitry Andric if (!foldOffset(MI.getOperand(0).getReg(), Offset, MRI, FoldableInstrs))
266*700637cbSDimitry Andric continue;
267*700637cbSDimitry Andric
268*700637cbSDimitry Andric if (FoldableInstrs.empty())
269*700637cbSDimitry Andric continue;
270*700637cbSDimitry Andric
271*700637cbSDimitry Andric // We can fold this ADDI.
272*700637cbSDimitry Andric // Rewrite all the instructions.
273*700637cbSDimitry Andric for (auto [MemMI, NewOffset] : FoldableInstrs)
274*700637cbSDimitry Andric MemMI->getOperand(2).setImm(NewOffset);
275*700637cbSDimitry Andric
276*700637cbSDimitry Andric MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
277*700637cbSDimitry Andric MRI.clearKillFlags(MI.getOperand(1).getReg());
278*700637cbSDimitry Andric MI.eraseFromParent();
279*700637cbSDimitry Andric }
280*700637cbSDimitry Andric }
281*700637cbSDimitry Andric
282*700637cbSDimitry Andric return MadeChange;
283*700637cbSDimitry Andric }
284