xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
10b57cec5SDimitry Andric //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations  ------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Merge the offset of address calculation into the offset field
100b57cec5SDimitry Andric // of instructions in a global address lowering sequence. This pass transforms:
110b57cec5SDimitry Andric //   lui  vreg1, %hi(s)
120b57cec5SDimitry Andric //   addi vreg2, vreg1, %lo(s)
130b57cec5SDimitry Andric //   addi vreg3, verg2, Offset
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //   Into:
160b57cec5SDimitry Andric //   lui  vreg1, %hi(s+Offset)
170b57cec5SDimitry Andric //   addi vreg2, vreg1, %lo(s+Offset)
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // The transformation is carried out under certain conditions:
200b57cec5SDimitry Andric // 1) The offset field in the base of global address lowering sequence is zero.
210b57cec5SDimitry Andric // 2) The lowered global address has only one use.
220b57cec5SDimitry Andric //
230b57cec5SDimitry Andric // The offset field can be in a different form. This pass handles all of them.
240b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric #include "RISCV.h"
270b57cec5SDimitry Andric #include "RISCVTargetMachine.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
29*349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h"
300b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
310b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
320b57cec5SDimitry Andric #include <set>
330b57cec5SDimitry Andric using namespace llvm;
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric #define DEBUG_TYPE "riscv-merge-base-offset"
360b57cec5SDimitry Andric #define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset"
370b57cec5SDimitry Andric namespace {
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
400b57cec5SDimitry Andric   static char ID;
410b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &Fn) override;
420b57cec5SDimitry Andric   bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI);
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric   bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI);
450b57cec5SDimitry Andric   void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail,
460b57cec5SDimitry Andric                   int64_t Offset);
478bcb0991SDimitry Andric   bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset);
480b57cec5SDimitry Andric   RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
510b57cec5SDimitry Andric     return MachineFunctionProperties().set(
520b57cec5SDimitry Andric         MachineFunctionProperties::Property::IsSSA);
530b57cec5SDimitry Andric   }
540b57cec5SDimitry Andric 
55*349cc55cSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
56*349cc55cSDimitry Andric     AU.setPreservesCFG();
57*349cc55cSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
58*349cc55cSDimitry Andric   }
59*349cc55cSDimitry Andric 
600b57cec5SDimitry Andric   StringRef getPassName() const override {
610b57cec5SDimitry Andric     return RISCV_MERGE_BASE_OFFSET_NAME;
620b57cec5SDimitry Andric   }
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric private:
650b57cec5SDimitry Andric   MachineRegisterInfo *MRI;
660b57cec5SDimitry Andric   std::set<MachineInstr *> DeadInstrs;
670b57cec5SDimitry Andric };
680b57cec5SDimitry Andric } // end anonymous namespace
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric char RISCVMergeBaseOffsetOpt::ID = 0;
71e8d8bef9SDimitry Andric INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
720b57cec5SDimitry Andric                 RISCV_MERGE_BASE_OFFSET_NAME, false, false)
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric // Detect the pattern:
750b57cec5SDimitry Andric //   lui   vreg1, %hi(s)
760b57cec5SDimitry Andric //   addi  vreg2, vreg1, %lo(s)
770b57cec5SDimitry Andric //
780b57cec5SDimitry Andric //   Pattern only accepted if:
790b57cec5SDimitry Andric //     1) ADDI has only one use.
800b57cec5SDimitry Andric //     2) LUI has only one use; which is the ADDI.
810b57cec5SDimitry Andric //     3) Both ADDI and LUI have GlobalAddress type which indicates that these
820b57cec5SDimitry Andric //        are generated from global address lowering.
830b57cec5SDimitry Andric //     4) Offset value in the Global Address is 0.
840b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI,
850b57cec5SDimitry Andric                                                   MachineInstr *&LoADDI) {
860b57cec5SDimitry Andric   if (HiLUI.getOpcode() != RISCV::LUI ||
870b57cec5SDimitry Andric       HiLUI.getOperand(1).getTargetFlags() != RISCVII::MO_HI ||
880b57cec5SDimitry Andric       HiLUI.getOperand(1).getType() != MachineOperand::MO_GlobalAddress ||
890b57cec5SDimitry Andric       HiLUI.getOperand(1).getOffset() != 0 ||
900b57cec5SDimitry Andric       !MRI->hasOneUse(HiLUI.getOperand(0).getReg()))
910b57cec5SDimitry Andric     return false;
928bcb0991SDimitry Andric   Register HiLuiDestReg = HiLUI.getOperand(0).getReg();
930b57cec5SDimitry Andric   LoADDI = MRI->use_begin(HiLuiDestReg)->getParent();
940b57cec5SDimitry Andric   if (LoADDI->getOpcode() != RISCV::ADDI ||
950b57cec5SDimitry Andric       LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO ||
960b57cec5SDimitry Andric       LoADDI->getOperand(2).getType() != MachineOperand::MO_GlobalAddress ||
970b57cec5SDimitry Andric       LoADDI->getOperand(2).getOffset() != 0 ||
980b57cec5SDimitry Andric       !MRI->hasOneUse(LoADDI->getOperand(0).getReg()))
990b57cec5SDimitry Andric     return false;
1000b57cec5SDimitry Andric   return true;
1010b57cec5SDimitry Andric }
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric // Update the offset in HiLUI and LoADDI instructions.
1040b57cec5SDimitry Andric // Delete the tail instruction and update all the uses to use the
1050b57cec5SDimitry Andric // output from LoADDI.
1060b57cec5SDimitry Andric void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI,
1070b57cec5SDimitry Andric                                          MachineInstr &LoADDI,
1080b57cec5SDimitry Andric                                          MachineInstr &Tail, int64_t Offset) {
1090b57cec5SDimitry Andric   // Put the offset back in HiLUI and the LoADDI
1100b57cec5SDimitry Andric   HiLUI.getOperand(1).setOffset(Offset);
1110b57cec5SDimitry Andric   LoADDI.getOperand(2).setOffset(Offset);
1120b57cec5SDimitry Andric   // Delete the tail instruction.
1130b57cec5SDimitry Andric   DeadInstrs.insert(&Tail);
1140b57cec5SDimitry Andric   MRI->replaceRegWith(Tail.getOperand(0).getReg(),
1150b57cec5SDimitry Andric                       LoADDI.getOperand(0).getReg());
1160b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "  Merged offset " << Offset << " into base.\n"
1170b57cec5SDimitry Andric                     << "     " << HiLUI << "     " << LoADDI;);
1180b57cec5SDimitry Andric }
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric // Detect patterns for large offsets that are passed into an ADD instruction.
1210b57cec5SDimitry Andric //
1220b57cec5SDimitry Andric //                     Base address lowering is of the form:
1230b57cec5SDimitry Andric //                        HiLUI:  lui   vreg1, %hi(s)
1240b57cec5SDimitry Andric //                       LoADDI:  addi  vreg2, vreg1, %lo(s)
1250b57cec5SDimitry Andric //                       /                                  \
1260b57cec5SDimitry Andric //                      /                                    \
1270b57cec5SDimitry Andric //                     /                                      \
1280b57cec5SDimitry Andric //                    /  The large offset can be of two forms: \
1290b57cec5SDimitry Andric //  1) Offset that has non zero bits in lower      2) Offset that has non zero
1300b57cec5SDimitry Andric //     12 bits and upper 20 bits                      bits in upper 20 bits only
1310b57cec5SDimitry Andric //   OffseLUI: lui   vreg3, 4
1320b57cec5SDimitry Andric // OffsetTail: addi  voff, vreg3, 188                OffsetTail: lui  voff, 128
1330b57cec5SDimitry Andric //                    \                                        /
1340b57cec5SDimitry Andric //                     \                                      /
1350b57cec5SDimitry Andric //                      \                                    /
1360b57cec5SDimitry Andric //                       \                                  /
1370b57cec5SDimitry Andric //                         TailAdd: add  vreg4, vreg2, voff
1380b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
1398bcb0991SDimitry Andric                                                Register GAReg,
1400b57cec5SDimitry Andric                                                int64_t &Offset) {
1410b57cec5SDimitry Andric   assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
1428bcb0991SDimitry Andric   Register Rs = TailAdd.getOperand(1).getReg();
1438bcb0991SDimitry Andric   Register Rt = TailAdd.getOperand(2).getReg();
1448bcb0991SDimitry Andric   Register Reg = Rs == GAReg ? Rt : Rs;
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric   // Can't fold if the register has more than one use.
1470b57cec5SDimitry Andric   if (!MRI->hasOneUse(Reg))
1480b57cec5SDimitry Andric     return false;
1490b57cec5SDimitry Andric   // This can point to an ADDI or a LUI:
1500b57cec5SDimitry Andric   MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
1510b57cec5SDimitry Andric   if (OffsetTail.getOpcode() == RISCV::ADDI) {
1520b57cec5SDimitry Andric     // The offset value has non zero bits in both %hi and %lo parts.
1530b57cec5SDimitry Andric     // Detect an ADDI that feeds from a LUI instruction.
1540b57cec5SDimitry Andric     MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
1550b57cec5SDimitry Andric     if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
1560b57cec5SDimitry Andric       return false;
1570b57cec5SDimitry Andric     int64_t OffLo = AddiImmOp.getImm();
1580b57cec5SDimitry Andric     MachineInstr &OffsetLui =
1590b57cec5SDimitry Andric         *MRI->getVRegDef(OffsetTail.getOperand(1).getReg());
1600b57cec5SDimitry Andric     MachineOperand &LuiImmOp = OffsetLui.getOperand(1);
1610b57cec5SDimitry Andric     if (OffsetLui.getOpcode() != RISCV::LUI ||
1620b57cec5SDimitry Andric         LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
1630b57cec5SDimitry Andric         !MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
1640b57cec5SDimitry Andric       return false;
1650b57cec5SDimitry Andric     int64_t OffHi = OffsetLui.getOperand(1).getImm();
1660b57cec5SDimitry Andric     Offset = (OffHi << 12) + OffLo;
1670b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Offset Instrs: " << OffsetTail
1680b57cec5SDimitry Andric                       << "                 " << OffsetLui);
1690b57cec5SDimitry Andric     DeadInstrs.insert(&OffsetTail);
1700b57cec5SDimitry Andric     DeadInstrs.insert(&OffsetLui);
1710b57cec5SDimitry Andric     return true;
1720b57cec5SDimitry Andric   } else if (OffsetTail.getOpcode() == RISCV::LUI) {
1730b57cec5SDimitry Andric     // The offset value has all zero bits in the lower 12 bits. Only LUI
1740b57cec5SDimitry Andric     // exists.
1750b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Offset Instr: " << OffsetTail);
1760b57cec5SDimitry Andric     Offset = OffsetTail.getOperand(1).getImm() << 12;
1770b57cec5SDimitry Andric     DeadInstrs.insert(&OffsetTail);
1780b57cec5SDimitry Andric     return true;
1790b57cec5SDimitry Andric   }
1800b57cec5SDimitry Andric   return false;
1810b57cec5SDimitry Andric }
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
1840b57cec5SDimitry Andric                                                   MachineInstr &LoADDI) {
1858bcb0991SDimitry Andric   Register DestReg = LoADDI.getOperand(0).getReg();
1860b57cec5SDimitry Andric   assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI");
1870b57cec5SDimitry Andric   // LoADDI has only one use.
1880b57cec5SDimitry Andric   MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent();
1890b57cec5SDimitry Andric   switch (Tail.getOpcode()) {
1900b57cec5SDimitry Andric   default:
1910b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
1920b57cec5SDimitry Andric                       << Tail);
1930b57cec5SDimitry Andric     return false;
1940b57cec5SDimitry Andric   case RISCV::ADDI: {
1950b57cec5SDimitry Andric     // Offset is simply an immediate operand.
1960b57cec5SDimitry Andric     int64_t Offset = Tail.getOperand(2).getImm();
1970b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Offset Instr: " << Tail);
1980b57cec5SDimitry Andric     foldOffset(HiLUI, LoADDI, Tail, Offset);
1990b57cec5SDimitry Andric     return true;
200*349cc55cSDimitry Andric   }
2010b57cec5SDimitry Andric   case RISCV::ADD: {
2020b57cec5SDimitry Andric     // The offset is too large to fit in the immediate field of ADDI.
2030b57cec5SDimitry Andric     // This can be in two forms:
2040b57cec5SDimitry Andric     // 1) LUI hi_Offset followed by:
2050b57cec5SDimitry Andric     //    ADDI lo_offset
2060b57cec5SDimitry Andric     //    This happens in case the offset has non zero bits in
2070b57cec5SDimitry Andric     //    both hi 20 and lo 12 bits.
2080b57cec5SDimitry Andric     // 2) LUI (offset20)
2090b57cec5SDimitry Andric     //    This happens in case the lower 12 bits of the offset are zeros.
2100b57cec5SDimitry Andric     int64_t Offset;
2110b57cec5SDimitry Andric     if (!matchLargeOffset(Tail, DestReg, Offset))
2120b57cec5SDimitry Andric       return false;
2130b57cec5SDimitry Andric     foldOffset(HiLUI, LoADDI, Tail, Offset);
2140b57cec5SDimitry Andric     return true;
215*349cc55cSDimitry Andric   }
2160b57cec5SDimitry Andric   case RISCV::LB:
2170b57cec5SDimitry Andric   case RISCV::LH:
2180b57cec5SDimitry Andric   case RISCV::LW:
2190b57cec5SDimitry Andric   case RISCV::LBU:
2200b57cec5SDimitry Andric   case RISCV::LHU:
2210b57cec5SDimitry Andric   case RISCV::LWU:
2220b57cec5SDimitry Andric   case RISCV::LD:
223e8d8bef9SDimitry Andric   case RISCV::FLH:
2240b57cec5SDimitry Andric   case RISCV::FLW:
2250b57cec5SDimitry Andric   case RISCV::FLD:
2260b57cec5SDimitry Andric   case RISCV::SB:
2270b57cec5SDimitry Andric   case RISCV::SH:
2280b57cec5SDimitry Andric   case RISCV::SW:
2290b57cec5SDimitry Andric   case RISCV::SD:
230e8d8bef9SDimitry Andric   case RISCV::FSH:
2310b57cec5SDimitry Andric   case RISCV::FSW:
2320b57cec5SDimitry Andric   case RISCV::FSD: {
2330b57cec5SDimitry Andric     // Transforms the sequence:            Into:
2340b57cec5SDimitry Andric     // HiLUI:  lui vreg1, %hi(foo)          --->  lui vreg1, %hi(foo+8)
2350b57cec5SDimitry Andric     // LoADDI: addi vreg2, vreg1, %lo(foo)  --->  lw vreg3, lo(foo+8)(vreg1)
2360b57cec5SDimitry Andric     // Tail:   lw vreg3, 8(vreg2)
2370b57cec5SDimitry Andric     if (Tail.getOperand(1).isFI())
2380b57cec5SDimitry Andric       return false;
2390b57cec5SDimitry Andric     // Register defined by LoADDI should be used in the base part of the
2400b57cec5SDimitry Andric     // load\store instruction. Otherwise, no folding possible.
2418bcb0991SDimitry Andric     Register BaseAddrReg = Tail.getOperand(1).getReg();
2420b57cec5SDimitry Andric     if (DestReg != BaseAddrReg)
2430b57cec5SDimitry Andric       return false;
2440b57cec5SDimitry Andric     MachineOperand &TailImmOp = Tail.getOperand(2);
2450b57cec5SDimitry Andric     int64_t Offset = TailImmOp.getImm();
2460b57cec5SDimitry Andric     // Update the offsets in global address lowering.
2470b57cec5SDimitry Andric     HiLUI.getOperand(1).setOffset(Offset);
2480b57cec5SDimitry Andric     // Update the immediate in the Tail instruction to add the offset.
2490b57cec5SDimitry Andric     Tail.RemoveOperand(2);
2500b57cec5SDimitry Andric     MachineOperand &ImmOp = LoADDI.getOperand(2);
2510b57cec5SDimitry Andric     ImmOp.setOffset(Offset);
2520b57cec5SDimitry Andric     Tail.addOperand(ImmOp);
2530b57cec5SDimitry Andric     // Update the base reg in the Tail instruction to feed from LUI.
2540b57cec5SDimitry Andric     // Output of HiLUI is only used in LoADDI, no need to use
2550b57cec5SDimitry Andric     // MRI->replaceRegWith().
2560b57cec5SDimitry Andric     Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
2570b57cec5SDimitry Andric     DeadInstrs.insert(&LoADDI);
2580b57cec5SDimitry Andric     return true;
259*349cc55cSDimitry Andric   }
2600b57cec5SDimitry Andric   }
2610b57cec5SDimitry Andric   return false;
2620b57cec5SDimitry Andric }
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
2650b57cec5SDimitry Andric   if (skipFunction(Fn.getFunction()))
2660b57cec5SDimitry Andric     return false;
2670b57cec5SDimitry Andric 
268*349cc55cSDimitry Andric   bool MadeChange = false;
2690b57cec5SDimitry Andric   DeadInstrs.clear();
2700b57cec5SDimitry Andric   MRI = &Fn.getRegInfo();
2710b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : Fn) {
2720b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
2730b57cec5SDimitry Andric     for (MachineInstr &HiLUI : MBB) {
2740b57cec5SDimitry Andric       MachineInstr *LoADDI = nullptr;
2750b57cec5SDimitry Andric       if (!detectLuiAddiGlobal(HiLUI, LoADDI))
2760b57cec5SDimitry Andric         continue;
2770b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Found lowered global address with one use: "
2780b57cec5SDimitry Andric                         << *LoADDI->getOperand(2).getGlobal() << "\n");
2790b57cec5SDimitry Andric       // If the use count is only one, merge the offset
280*349cc55cSDimitry Andric       MadeChange |= detectAndFoldOffset(HiLUI, *LoADDI);
2810b57cec5SDimitry Andric     }
2820b57cec5SDimitry Andric   }
2830b57cec5SDimitry Andric   // Delete dead instructions.
2840b57cec5SDimitry Andric   for (auto *MI : DeadInstrs)
2850b57cec5SDimitry Andric     MI->eraseFromParent();
286*349cc55cSDimitry Andric   return MadeChange;
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric /// Returns an instance of the Merge Base Offset Optimization pass.
2900b57cec5SDimitry Andric FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() {
2910b57cec5SDimitry Andric   return new RISCVMergeBaseOffsetOpt();
2920b57cec5SDimitry Andric }
293