xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations  ------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // Merge the offset of address calculation into the offset field
10*0b57cec5SDimitry Andric // of instructions in a global address lowering sequence. This pass transforms:
11*0b57cec5SDimitry Andric //   lui  vreg1, %hi(s)
12*0b57cec5SDimitry Andric //   addi vreg2, vreg1, %lo(s)
13*0b57cec5SDimitry Andric //   addi vreg3, verg2, Offset
14*0b57cec5SDimitry Andric //
15*0b57cec5SDimitry Andric //   Into:
16*0b57cec5SDimitry Andric //   lui  vreg1, %hi(s+Offset)
17*0b57cec5SDimitry Andric //   addi vreg2, vreg1, %lo(s+Offset)
18*0b57cec5SDimitry Andric //
19*0b57cec5SDimitry Andric // The transformation is carried out under certain conditions:
20*0b57cec5SDimitry Andric // 1) The offset field in the base of global address lowering sequence is zero.
21*0b57cec5SDimitry Andric // 2) The lowered global address has only one use.
22*0b57cec5SDimitry Andric //
23*0b57cec5SDimitry Andric // The offset field can be in a different form. This pass handles all of them.
24*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
25*0b57cec5SDimitry Andric 
26*0b57cec5SDimitry Andric #include "RISCV.h"
27*0b57cec5SDimitry Andric #include "RISCVTargetMachine.h"
28*0b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
29*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
30*0b57cec5SDimitry Andric #include "llvm/Support/TargetRegistry.h"
31*0b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
32*0b57cec5SDimitry Andric #include <set>
33*0b57cec5SDimitry Andric using namespace llvm;
34*0b57cec5SDimitry Andric 
35*0b57cec5SDimitry Andric #define DEBUG_TYPE "riscv-merge-base-offset"
36*0b57cec5SDimitry Andric #define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset"
37*0b57cec5SDimitry Andric namespace {
38*0b57cec5SDimitry Andric 
39*0b57cec5SDimitry Andric struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
40*0b57cec5SDimitry Andric   static char ID;
41*0b57cec5SDimitry Andric   const MachineFunction *MF;
42*0b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &Fn) override;
43*0b57cec5SDimitry Andric   bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI);
44*0b57cec5SDimitry Andric 
45*0b57cec5SDimitry Andric   bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI);
46*0b57cec5SDimitry Andric   void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail,
47*0b57cec5SDimitry Andric                   int64_t Offset);
48*0b57cec5SDimitry Andric   bool matchLargeOffset(MachineInstr &TailAdd, unsigned GSReg, int64_t &Offset);
49*0b57cec5SDimitry Andric   RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
50*0b57cec5SDimitry Andric 
51*0b57cec5SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
52*0b57cec5SDimitry Andric     return MachineFunctionProperties().set(
53*0b57cec5SDimitry Andric         MachineFunctionProperties::Property::IsSSA);
54*0b57cec5SDimitry Andric   }
55*0b57cec5SDimitry Andric 
56*0b57cec5SDimitry Andric   StringRef getPassName() const override {
57*0b57cec5SDimitry Andric     return RISCV_MERGE_BASE_OFFSET_NAME;
58*0b57cec5SDimitry Andric   }
59*0b57cec5SDimitry Andric 
60*0b57cec5SDimitry Andric private:
61*0b57cec5SDimitry Andric   MachineRegisterInfo *MRI;
62*0b57cec5SDimitry Andric   std::set<MachineInstr *> DeadInstrs;
63*0b57cec5SDimitry Andric };
64*0b57cec5SDimitry Andric } // end anonymous namespace
65*0b57cec5SDimitry Andric 
66*0b57cec5SDimitry Andric char RISCVMergeBaseOffsetOpt::ID = 0;
67*0b57cec5SDimitry Andric INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset",
68*0b57cec5SDimitry Andric                 RISCV_MERGE_BASE_OFFSET_NAME, false, false)
69*0b57cec5SDimitry Andric 
70*0b57cec5SDimitry Andric // Detect the pattern:
71*0b57cec5SDimitry Andric //   lui   vreg1, %hi(s)
72*0b57cec5SDimitry Andric //   addi  vreg2, vreg1, %lo(s)
73*0b57cec5SDimitry Andric //
74*0b57cec5SDimitry Andric //   Pattern only accepted if:
75*0b57cec5SDimitry Andric //     1) ADDI has only one use.
76*0b57cec5SDimitry Andric //     2) LUI has only one use; which is the ADDI.
77*0b57cec5SDimitry Andric //     3) Both ADDI and LUI have GlobalAddress type which indicates that these
78*0b57cec5SDimitry Andric //        are generated from global address lowering.
79*0b57cec5SDimitry Andric //     4) Offset value in the Global Address is 0.
80*0b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI,
81*0b57cec5SDimitry Andric                                                   MachineInstr *&LoADDI) {
82*0b57cec5SDimitry Andric   if (HiLUI.getOpcode() != RISCV::LUI ||
83*0b57cec5SDimitry Andric       HiLUI.getOperand(1).getTargetFlags() != RISCVII::MO_HI ||
84*0b57cec5SDimitry Andric       HiLUI.getOperand(1).getType() != MachineOperand::MO_GlobalAddress ||
85*0b57cec5SDimitry Andric       HiLUI.getOperand(1).getOffset() != 0 ||
86*0b57cec5SDimitry Andric       !MRI->hasOneUse(HiLUI.getOperand(0).getReg()))
87*0b57cec5SDimitry Andric     return false;
88*0b57cec5SDimitry Andric   unsigned HiLuiDestReg = HiLUI.getOperand(0).getReg();
89*0b57cec5SDimitry Andric   LoADDI = MRI->use_begin(HiLuiDestReg)->getParent();
90*0b57cec5SDimitry Andric   if (LoADDI->getOpcode() != RISCV::ADDI ||
91*0b57cec5SDimitry Andric       LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO ||
92*0b57cec5SDimitry Andric       LoADDI->getOperand(2).getType() != MachineOperand::MO_GlobalAddress ||
93*0b57cec5SDimitry Andric       LoADDI->getOperand(2).getOffset() != 0 ||
94*0b57cec5SDimitry Andric       !MRI->hasOneUse(LoADDI->getOperand(0).getReg()))
95*0b57cec5SDimitry Andric     return false;
96*0b57cec5SDimitry Andric   return true;
97*0b57cec5SDimitry Andric }
98*0b57cec5SDimitry Andric 
99*0b57cec5SDimitry Andric // Update the offset in HiLUI and LoADDI instructions.
100*0b57cec5SDimitry Andric // Delete the tail instruction and update all the uses to use the
101*0b57cec5SDimitry Andric // output from LoADDI.
102*0b57cec5SDimitry Andric void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI,
103*0b57cec5SDimitry Andric                                          MachineInstr &LoADDI,
104*0b57cec5SDimitry Andric                                          MachineInstr &Tail, int64_t Offset) {
105*0b57cec5SDimitry Andric   // Put the offset back in HiLUI and the LoADDI
106*0b57cec5SDimitry Andric   HiLUI.getOperand(1).setOffset(Offset);
107*0b57cec5SDimitry Andric   LoADDI.getOperand(2).setOffset(Offset);
108*0b57cec5SDimitry Andric   // Delete the tail instruction.
109*0b57cec5SDimitry Andric   DeadInstrs.insert(&Tail);
110*0b57cec5SDimitry Andric   MRI->replaceRegWith(Tail.getOperand(0).getReg(),
111*0b57cec5SDimitry Andric                       LoADDI.getOperand(0).getReg());
112*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "  Merged offset " << Offset << " into base.\n"
113*0b57cec5SDimitry Andric                     << "     " << HiLUI << "     " << LoADDI;);
114*0b57cec5SDimitry Andric }
115*0b57cec5SDimitry Andric 
116*0b57cec5SDimitry Andric // Detect patterns for large offsets that are passed into an ADD instruction.
117*0b57cec5SDimitry Andric //
118*0b57cec5SDimitry Andric //                     Base address lowering is of the form:
119*0b57cec5SDimitry Andric //                        HiLUI:  lui   vreg1, %hi(s)
120*0b57cec5SDimitry Andric //                       LoADDI:  addi  vreg2, vreg1, %lo(s)
121*0b57cec5SDimitry Andric //                       /                                  \
122*0b57cec5SDimitry Andric //                      /                                    \
123*0b57cec5SDimitry Andric //                     /                                      \
124*0b57cec5SDimitry Andric //                    /  The large offset can be of two forms: \
125*0b57cec5SDimitry Andric //  1) Offset that has non zero bits in lower      2) Offset that has non zero
126*0b57cec5SDimitry Andric //     12 bits and upper 20 bits                      bits in upper 20 bits only
127*0b57cec5SDimitry Andric //   OffseLUI: lui   vreg3, 4
128*0b57cec5SDimitry Andric // OffsetTail: addi  voff, vreg3, 188                OffsetTail: lui  voff, 128
129*0b57cec5SDimitry Andric //                    \                                        /
130*0b57cec5SDimitry Andric //                     \                                      /
131*0b57cec5SDimitry Andric //                      \                                    /
132*0b57cec5SDimitry Andric //                       \                                  /
133*0b57cec5SDimitry Andric //                         TailAdd: add  vreg4, vreg2, voff
134*0b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
135*0b57cec5SDimitry Andric                                                unsigned GAReg,
136*0b57cec5SDimitry Andric                                                int64_t &Offset) {
137*0b57cec5SDimitry Andric   assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
138*0b57cec5SDimitry Andric   unsigned Rs = TailAdd.getOperand(1).getReg();
139*0b57cec5SDimitry Andric   unsigned Rt = TailAdd.getOperand(2).getReg();
140*0b57cec5SDimitry Andric   unsigned Reg = Rs == GAReg ? Rt : Rs;
141*0b57cec5SDimitry Andric 
142*0b57cec5SDimitry Andric   // Can't fold if the register has more than one use.
143*0b57cec5SDimitry Andric   if (!MRI->hasOneUse(Reg))
144*0b57cec5SDimitry Andric     return false;
145*0b57cec5SDimitry Andric   // This can point to an ADDI or a LUI:
146*0b57cec5SDimitry Andric   MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
147*0b57cec5SDimitry Andric   if (OffsetTail.getOpcode() == RISCV::ADDI) {
148*0b57cec5SDimitry Andric     // The offset value has non zero bits in both %hi and %lo parts.
149*0b57cec5SDimitry Andric     // Detect an ADDI that feeds from a LUI instruction.
150*0b57cec5SDimitry Andric     MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
151*0b57cec5SDimitry Andric     if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
152*0b57cec5SDimitry Andric       return false;
153*0b57cec5SDimitry Andric     int64_t OffLo = AddiImmOp.getImm();
154*0b57cec5SDimitry Andric     MachineInstr &OffsetLui =
155*0b57cec5SDimitry Andric         *MRI->getVRegDef(OffsetTail.getOperand(1).getReg());
156*0b57cec5SDimitry Andric     MachineOperand &LuiImmOp = OffsetLui.getOperand(1);
157*0b57cec5SDimitry Andric     if (OffsetLui.getOpcode() != RISCV::LUI ||
158*0b57cec5SDimitry Andric         LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
159*0b57cec5SDimitry Andric         !MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
160*0b57cec5SDimitry Andric       return false;
161*0b57cec5SDimitry Andric     int64_t OffHi = OffsetLui.getOperand(1).getImm();
162*0b57cec5SDimitry Andric     Offset = (OffHi << 12) + OffLo;
163*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Offset Instrs: " << OffsetTail
164*0b57cec5SDimitry Andric                       << "                 " << OffsetLui);
165*0b57cec5SDimitry Andric     DeadInstrs.insert(&OffsetTail);
166*0b57cec5SDimitry Andric     DeadInstrs.insert(&OffsetLui);
167*0b57cec5SDimitry Andric     return true;
168*0b57cec5SDimitry Andric   } else if (OffsetTail.getOpcode() == RISCV::LUI) {
169*0b57cec5SDimitry Andric     // The offset value has all zero bits in the lower 12 bits. Only LUI
170*0b57cec5SDimitry Andric     // exists.
171*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Offset Instr: " << OffsetTail);
172*0b57cec5SDimitry Andric     Offset = OffsetTail.getOperand(1).getImm() << 12;
173*0b57cec5SDimitry Andric     DeadInstrs.insert(&OffsetTail);
174*0b57cec5SDimitry Andric     return true;
175*0b57cec5SDimitry Andric   }
176*0b57cec5SDimitry Andric   return false;
177*0b57cec5SDimitry Andric }
178*0b57cec5SDimitry Andric 
179*0b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
180*0b57cec5SDimitry Andric                                                   MachineInstr &LoADDI) {
181*0b57cec5SDimitry Andric   unsigned DestReg = LoADDI.getOperand(0).getReg();
182*0b57cec5SDimitry Andric   assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI");
183*0b57cec5SDimitry Andric   // LoADDI has only one use.
184*0b57cec5SDimitry Andric   MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent();
185*0b57cec5SDimitry Andric   switch (Tail.getOpcode()) {
186*0b57cec5SDimitry Andric   default:
187*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
188*0b57cec5SDimitry Andric                       << Tail);
189*0b57cec5SDimitry Andric     return false;
190*0b57cec5SDimitry Andric   case RISCV::ADDI: {
191*0b57cec5SDimitry Andric     // Offset is simply an immediate operand.
192*0b57cec5SDimitry Andric     int64_t Offset = Tail.getOperand(2).getImm();
193*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Offset Instr: " << Tail);
194*0b57cec5SDimitry Andric     foldOffset(HiLUI, LoADDI, Tail, Offset);
195*0b57cec5SDimitry Andric     return true;
196*0b57cec5SDimitry Andric   } break;
197*0b57cec5SDimitry Andric   case RISCV::ADD: {
198*0b57cec5SDimitry Andric     // The offset is too large to fit in the immediate field of ADDI.
199*0b57cec5SDimitry Andric     // This can be in two forms:
200*0b57cec5SDimitry Andric     // 1) LUI hi_Offset followed by:
201*0b57cec5SDimitry Andric     //    ADDI lo_offset
202*0b57cec5SDimitry Andric     //    This happens in case the offset has non zero bits in
203*0b57cec5SDimitry Andric     //    both hi 20 and lo 12 bits.
204*0b57cec5SDimitry Andric     // 2) LUI (offset20)
205*0b57cec5SDimitry Andric     //    This happens in case the lower 12 bits of the offset are zeros.
206*0b57cec5SDimitry Andric     int64_t Offset;
207*0b57cec5SDimitry Andric     if (!matchLargeOffset(Tail, DestReg, Offset))
208*0b57cec5SDimitry Andric       return false;
209*0b57cec5SDimitry Andric     foldOffset(HiLUI, LoADDI, Tail, Offset);
210*0b57cec5SDimitry Andric     return true;
211*0b57cec5SDimitry Andric   } break;
212*0b57cec5SDimitry Andric   case RISCV::LB:
213*0b57cec5SDimitry Andric   case RISCV::LH:
214*0b57cec5SDimitry Andric   case RISCV::LW:
215*0b57cec5SDimitry Andric   case RISCV::LBU:
216*0b57cec5SDimitry Andric   case RISCV::LHU:
217*0b57cec5SDimitry Andric   case RISCV::LWU:
218*0b57cec5SDimitry Andric   case RISCV::LD:
219*0b57cec5SDimitry Andric   case RISCV::FLW:
220*0b57cec5SDimitry Andric   case RISCV::FLD:
221*0b57cec5SDimitry Andric   case RISCV::SB:
222*0b57cec5SDimitry Andric   case RISCV::SH:
223*0b57cec5SDimitry Andric   case RISCV::SW:
224*0b57cec5SDimitry Andric   case RISCV::SD:
225*0b57cec5SDimitry Andric   case RISCV::FSW:
226*0b57cec5SDimitry Andric   case RISCV::FSD: {
227*0b57cec5SDimitry Andric     // Transforms the sequence:            Into:
228*0b57cec5SDimitry Andric     // HiLUI:  lui vreg1, %hi(foo)          --->  lui vreg1, %hi(foo+8)
229*0b57cec5SDimitry Andric     // LoADDI: addi vreg2, vreg1, %lo(foo)  --->  lw vreg3, lo(foo+8)(vreg1)
230*0b57cec5SDimitry Andric     // Tail:   lw vreg3, 8(vreg2)
231*0b57cec5SDimitry Andric     if (Tail.getOperand(1).isFI())
232*0b57cec5SDimitry Andric       return false;
233*0b57cec5SDimitry Andric     // Register defined by LoADDI should be used in the base part of the
234*0b57cec5SDimitry Andric     // load\store instruction. Otherwise, no folding possible.
235*0b57cec5SDimitry Andric     unsigned BaseAddrReg = Tail.getOperand(1).getReg();
236*0b57cec5SDimitry Andric     if (DestReg != BaseAddrReg)
237*0b57cec5SDimitry Andric       return false;
238*0b57cec5SDimitry Andric     MachineOperand &TailImmOp = Tail.getOperand(2);
239*0b57cec5SDimitry Andric     int64_t Offset = TailImmOp.getImm();
240*0b57cec5SDimitry Andric     // Update the offsets in global address lowering.
241*0b57cec5SDimitry Andric     HiLUI.getOperand(1).setOffset(Offset);
242*0b57cec5SDimitry Andric     // Update the immediate in the Tail instruction to add the offset.
243*0b57cec5SDimitry Andric     Tail.RemoveOperand(2);
244*0b57cec5SDimitry Andric     MachineOperand &ImmOp = LoADDI.getOperand(2);
245*0b57cec5SDimitry Andric     ImmOp.setOffset(Offset);
246*0b57cec5SDimitry Andric     Tail.addOperand(ImmOp);
247*0b57cec5SDimitry Andric     // Update the base reg in the Tail instruction to feed from LUI.
248*0b57cec5SDimitry Andric     // Output of HiLUI is only used in LoADDI, no need to use
249*0b57cec5SDimitry Andric     // MRI->replaceRegWith().
250*0b57cec5SDimitry Andric     Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
251*0b57cec5SDimitry Andric     DeadInstrs.insert(&LoADDI);
252*0b57cec5SDimitry Andric     return true;
253*0b57cec5SDimitry Andric   } break;
254*0b57cec5SDimitry Andric   }
255*0b57cec5SDimitry Andric   return false;
256*0b57cec5SDimitry Andric }
257*0b57cec5SDimitry Andric 
258*0b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
259*0b57cec5SDimitry Andric   if (skipFunction(Fn.getFunction()))
260*0b57cec5SDimitry Andric     return false;
261*0b57cec5SDimitry Andric 
262*0b57cec5SDimitry Andric   DeadInstrs.clear();
263*0b57cec5SDimitry Andric   MRI = &Fn.getRegInfo();
264*0b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : Fn) {
265*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
266*0b57cec5SDimitry Andric     for (MachineInstr &HiLUI : MBB) {
267*0b57cec5SDimitry Andric       MachineInstr *LoADDI = nullptr;
268*0b57cec5SDimitry Andric       if (!detectLuiAddiGlobal(HiLUI, LoADDI))
269*0b57cec5SDimitry Andric         continue;
270*0b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Found lowered global address with one use: "
271*0b57cec5SDimitry Andric                         << *LoADDI->getOperand(2).getGlobal() << "\n");
272*0b57cec5SDimitry Andric       // If the use count is only one, merge the offset
273*0b57cec5SDimitry Andric       detectAndFoldOffset(HiLUI, *LoADDI);
274*0b57cec5SDimitry Andric     }
275*0b57cec5SDimitry Andric   }
276*0b57cec5SDimitry Andric   // Delete dead instructions.
277*0b57cec5SDimitry Andric   for (auto *MI : DeadInstrs)
278*0b57cec5SDimitry Andric     MI->eraseFromParent();
279*0b57cec5SDimitry Andric   return true;
280*0b57cec5SDimitry Andric }
281*0b57cec5SDimitry Andric 
282*0b57cec5SDimitry Andric /// Returns an instance of the Merge Base Offset Optimization pass.
283*0b57cec5SDimitry Andric FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() {
284*0b57cec5SDimitry Andric   return new RISCVMergeBaseOffsetOpt();
285*0b57cec5SDimitry Andric }
286