1*0b57cec5SDimitry Andric //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // Merge the offset of address calculation into the offset field 10*0b57cec5SDimitry Andric // of instructions in a global address lowering sequence. This pass transforms: 11*0b57cec5SDimitry Andric // lui vreg1, %hi(s) 12*0b57cec5SDimitry Andric // addi vreg2, vreg1, %lo(s) 13*0b57cec5SDimitry Andric // addi vreg3, verg2, Offset 14*0b57cec5SDimitry Andric // 15*0b57cec5SDimitry Andric // Into: 16*0b57cec5SDimitry Andric // lui vreg1, %hi(s+Offset) 17*0b57cec5SDimitry Andric // addi vreg2, vreg1, %lo(s+Offset) 18*0b57cec5SDimitry Andric // 19*0b57cec5SDimitry Andric // The transformation is carried out under certain conditions: 20*0b57cec5SDimitry Andric // 1) The offset field in the base of global address lowering sequence is zero. 21*0b57cec5SDimitry Andric // 2) The lowered global address has only one use. 22*0b57cec5SDimitry Andric // 23*0b57cec5SDimitry Andric // The offset field can be in a different form. This pass handles all of them. 24*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 25*0b57cec5SDimitry Andric 26*0b57cec5SDimitry Andric #include "RISCV.h" 27*0b57cec5SDimitry Andric #include "RISCVTargetMachine.h" 28*0b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h" 29*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 30*0b57cec5SDimitry Andric #include "llvm/Support/TargetRegistry.h" 31*0b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h" 32*0b57cec5SDimitry Andric #include <set> 33*0b57cec5SDimitry Andric using namespace llvm; 34*0b57cec5SDimitry Andric 35*0b57cec5SDimitry Andric #define DEBUG_TYPE "riscv-merge-base-offset" 36*0b57cec5SDimitry Andric #define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset" 37*0b57cec5SDimitry Andric namespace { 38*0b57cec5SDimitry Andric 39*0b57cec5SDimitry Andric struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass { 40*0b57cec5SDimitry Andric static char ID; 41*0b57cec5SDimitry Andric const MachineFunction *MF; 42*0b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override; 43*0b57cec5SDimitry Andric bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI); 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI); 46*0b57cec5SDimitry Andric void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail, 47*0b57cec5SDimitry Andric int64_t Offset); 48*0b57cec5SDimitry Andric bool matchLargeOffset(MachineInstr &TailAdd, unsigned GSReg, int64_t &Offset); 49*0b57cec5SDimitry Andric RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} 50*0b57cec5SDimitry Andric 51*0b57cec5SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 52*0b57cec5SDimitry Andric return MachineFunctionProperties().set( 53*0b57cec5SDimitry Andric MachineFunctionProperties::Property::IsSSA); 54*0b57cec5SDimitry Andric } 55*0b57cec5SDimitry Andric 56*0b57cec5SDimitry Andric StringRef getPassName() const override { 57*0b57cec5SDimitry Andric return RISCV_MERGE_BASE_OFFSET_NAME; 58*0b57cec5SDimitry Andric } 59*0b57cec5SDimitry Andric 60*0b57cec5SDimitry Andric private: 61*0b57cec5SDimitry Andric MachineRegisterInfo *MRI; 62*0b57cec5SDimitry Andric std::set<MachineInstr *> DeadInstrs; 63*0b57cec5SDimitry Andric }; 64*0b57cec5SDimitry Andric } // end anonymous namespace 65*0b57cec5SDimitry Andric 66*0b57cec5SDimitry Andric char RISCVMergeBaseOffsetOpt::ID = 0; 67*0b57cec5SDimitry Andric INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset", 68*0b57cec5SDimitry Andric RISCV_MERGE_BASE_OFFSET_NAME, false, false) 69*0b57cec5SDimitry Andric 70*0b57cec5SDimitry Andric // Detect the pattern: 71*0b57cec5SDimitry Andric // lui vreg1, %hi(s) 72*0b57cec5SDimitry Andric // addi vreg2, vreg1, %lo(s) 73*0b57cec5SDimitry Andric // 74*0b57cec5SDimitry Andric // Pattern only accepted if: 75*0b57cec5SDimitry Andric // 1) ADDI has only one use. 76*0b57cec5SDimitry Andric // 2) LUI has only one use; which is the ADDI. 77*0b57cec5SDimitry Andric // 3) Both ADDI and LUI have GlobalAddress type which indicates that these 78*0b57cec5SDimitry Andric // are generated from global address lowering. 79*0b57cec5SDimitry Andric // 4) Offset value in the Global Address is 0. 80*0b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI, 81*0b57cec5SDimitry Andric MachineInstr *&LoADDI) { 82*0b57cec5SDimitry Andric if (HiLUI.getOpcode() != RISCV::LUI || 83*0b57cec5SDimitry Andric HiLUI.getOperand(1).getTargetFlags() != RISCVII::MO_HI || 84*0b57cec5SDimitry Andric HiLUI.getOperand(1).getType() != MachineOperand::MO_GlobalAddress || 85*0b57cec5SDimitry Andric HiLUI.getOperand(1).getOffset() != 0 || 86*0b57cec5SDimitry Andric !MRI->hasOneUse(HiLUI.getOperand(0).getReg())) 87*0b57cec5SDimitry Andric return false; 88*0b57cec5SDimitry Andric unsigned HiLuiDestReg = HiLUI.getOperand(0).getReg(); 89*0b57cec5SDimitry Andric LoADDI = MRI->use_begin(HiLuiDestReg)->getParent(); 90*0b57cec5SDimitry Andric if (LoADDI->getOpcode() != RISCV::ADDI || 91*0b57cec5SDimitry Andric LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO || 92*0b57cec5SDimitry Andric LoADDI->getOperand(2).getType() != MachineOperand::MO_GlobalAddress || 93*0b57cec5SDimitry Andric LoADDI->getOperand(2).getOffset() != 0 || 94*0b57cec5SDimitry Andric !MRI->hasOneUse(LoADDI->getOperand(0).getReg())) 95*0b57cec5SDimitry Andric return false; 96*0b57cec5SDimitry Andric return true; 97*0b57cec5SDimitry Andric } 98*0b57cec5SDimitry Andric 99*0b57cec5SDimitry Andric // Update the offset in HiLUI and LoADDI instructions. 100*0b57cec5SDimitry Andric // Delete the tail instruction and update all the uses to use the 101*0b57cec5SDimitry Andric // output from LoADDI. 102*0b57cec5SDimitry Andric void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI, 103*0b57cec5SDimitry Andric MachineInstr &LoADDI, 104*0b57cec5SDimitry Andric MachineInstr &Tail, int64_t Offset) { 105*0b57cec5SDimitry Andric // Put the offset back in HiLUI and the LoADDI 106*0b57cec5SDimitry Andric HiLUI.getOperand(1).setOffset(Offset); 107*0b57cec5SDimitry Andric LoADDI.getOperand(2).setOffset(Offset); 108*0b57cec5SDimitry Andric // Delete the tail instruction. 109*0b57cec5SDimitry Andric DeadInstrs.insert(&Tail); 110*0b57cec5SDimitry Andric MRI->replaceRegWith(Tail.getOperand(0).getReg(), 111*0b57cec5SDimitry Andric LoADDI.getOperand(0).getReg()); 112*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n" 113*0b57cec5SDimitry Andric << " " << HiLUI << " " << LoADDI;); 114*0b57cec5SDimitry Andric } 115*0b57cec5SDimitry Andric 116*0b57cec5SDimitry Andric // Detect patterns for large offsets that are passed into an ADD instruction. 117*0b57cec5SDimitry Andric // 118*0b57cec5SDimitry Andric // Base address lowering is of the form: 119*0b57cec5SDimitry Andric // HiLUI: lui vreg1, %hi(s) 120*0b57cec5SDimitry Andric // LoADDI: addi vreg2, vreg1, %lo(s) 121*0b57cec5SDimitry Andric // / \ 122*0b57cec5SDimitry Andric // / \ 123*0b57cec5SDimitry Andric // / \ 124*0b57cec5SDimitry Andric // / The large offset can be of two forms: \ 125*0b57cec5SDimitry Andric // 1) Offset that has non zero bits in lower 2) Offset that has non zero 126*0b57cec5SDimitry Andric // 12 bits and upper 20 bits bits in upper 20 bits only 127*0b57cec5SDimitry Andric // OffseLUI: lui vreg3, 4 128*0b57cec5SDimitry Andric // OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128 129*0b57cec5SDimitry Andric // \ / 130*0b57cec5SDimitry Andric // \ / 131*0b57cec5SDimitry Andric // \ / 132*0b57cec5SDimitry Andric // \ / 133*0b57cec5SDimitry Andric // TailAdd: add vreg4, vreg2, voff 134*0b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, 135*0b57cec5SDimitry Andric unsigned GAReg, 136*0b57cec5SDimitry Andric int64_t &Offset) { 137*0b57cec5SDimitry Andric assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!"); 138*0b57cec5SDimitry Andric unsigned Rs = TailAdd.getOperand(1).getReg(); 139*0b57cec5SDimitry Andric unsigned Rt = TailAdd.getOperand(2).getReg(); 140*0b57cec5SDimitry Andric unsigned Reg = Rs == GAReg ? Rt : Rs; 141*0b57cec5SDimitry Andric 142*0b57cec5SDimitry Andric // Can't fold if the register has more than one use. 143*0b57cec5SDimitry Andric if (!MRI->hasOneUse(Reg)) 144*0b57cec5SDimitry Andric return false; 145*0b57cec5SDimitry Andric // This can point to an ADDI or a LUI: 146*0b57cec5SDimitry Andric MachineInstr &OffsetTail = *MRI->getVRegDef(Reg); 147*0b57cec5SDimitry Andric if (OffsetTail.getOpcode() == RISCV::ADDI) { 148*0b57cec5SDimitry Andric // The offset value has non zero bits in both %hi and %lo parts. 149*0b57cec5SDimitry Andric // Detect an ADDI that feeds from a LUI instruction. 150*0b57cec5SDimitry Andric MachineOperand &AddiImmOp = OffsetTail.getOperand(2); 151*0b57cec5SDimitry Andric if (AddiImmOp.getTargetFlags() != RISCVII::MO_None) 152*0b57cec5SDimitry Andric return false; 153*0b57cec5SDimitry Andric int64_t OffLo = AddiImmOp.getImm(); 154*0b57cec5SDimitry Andric MachineInstr &OffsetLui = 155*0b57cec5SDimitry Andric *MRI->getVRegDef(OffsetTail.getOperand(1).getReg()); 156*0b57cec5SDimitry Andric MachineOperand &LuiImmOp = OffsetLui.getOperand(1); 157*0b57cec5SDimitry Andric if (OffsetLui.getOpcode() != RISCV::LUI || 158*0b57cec5SDimitry Andric LuiImmOp.getTargetFlags() != RISCVII::MO_None || 159*0b57cec5SDimitry Andric !MRI->hasOneUse(OffsetLui.getOperand(0).getReg())) 160*0b57cec5SDimitry Andric return false; 161*0b57cec5SDimitry Andric int64_t OffHi = OffsetLui.getOperand(1).getImm(); 162*0b57cec5SDimitry Andric Offset = (OffHi << 12) + OffLo; 163*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail 164*0b57cec5SDimitry Andric << " " << OffsetLui); 165*0b57cec5SDimitry Andric DeadInstrs.insert(&OffsetTail); 166*0b57cec5SDimitry Andric DeadInstrs.insert(&OffsetLui); 167*0b57cec5SDimitry Andric return true; 168*0b57cec5SDimitry Andric } else if (OffsetTail.getOpcode() == RISCV::LUI) { 169*0b57cec5SDimitry Andric // The offset value has all zero bits in the lower 12 bits. Only LUI 170*0b57cec5SDimitry Andric // exists. 171*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); 172*0b57cec5SDimitry Andric Offset = OffsetTail.getOperand(1).getImm() << 12; 173*0b57cec5SDimitry Andric DeadInstrs.insert(&OffsetTail); 174*0b57cec5SDimitry Andric return true; 175*0b57cec5SDimitry Andric } 176*0b57cec5SDimitry Andric return false; 177*0b57cec5SDimitry Andric } 178*0b57cec5SDimitry Andric 179*0b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, 180*0b57cec5SDimitry Andric MachineInstr &LoADDI) { 181*0b57cec5SDimitry Andric unsigned DestReg = LoADDI.getOperand(0).getReg(); 182*0b57cec5SDimitry Andric assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI"); 183*0b57cec5SDimitry Andric // LoADDI has only one use. 184*0b57cec5SDimitry Andric MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent(); 185*0b57cec5SDimitry Andric switch (Tail.getOpcode()) { 186*0b57cec5SDimitry Andric default: 187*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" 188*0b57cec5SDimitry Andric << Tail); 189*0b57cec5SDimitry Andric return false; 190*0b57cec5SDimitry Andric case RISCV::ADDI: { 191*0b57cec5SDimitry Andric // Offset is simply an immediate operand. 192*0b57cec5SDimitry Andric int64_t Offset = Tail.getOperand(2).getImm(); 193*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); 194*0b57cec5SDimitry Andric foldOffset(HiLUI, LoADDI, Tail, Offset); 195*0b57cec5SDimitry Andric return true; 196*0b57cec5SDimitry Andric } break; 197*0b57cec5SDimitry Andric case RISCV::ADD: { 198*0b57cec5SDimitry Andric // The offset is too large to fit in the immediate field of ADDI. 199*0b57cec5SDimitry Andric // This can be in two forms: 200*0b57cec5SDimitry Andric // 1) LUI hi_Offset followed by: 201*0b57cec5SDimitry Andric // ADDI lo_offset 202*0b57cec5SDimitry Andric // This happens in case the offset has non zero bits in 203*0b57cec5SDimitry Andric // both hi 20 and lo 12 bits. 204*0b57cec5SDimitry Andric // 2) LUI (offset20) 205*0b57cec5SDimitry Andric // This happens in case the lower 12 bits of the offset are zeros. 206*0b57cec5SDimitry Andric int64_t Offset; 207*0b57cec5SDimitry Andric if (!matchLargeOffset(Tail, DestReg, Offset)) 208*0b57cec5SDimitry Andric return false; 209*0b57cec5SDimitry Andric foldOffset(HiLUI, LoADDI, Tail, Offset); 210*0b57cec5SDimitry Andric return true; 211*0b57cec5SDimitry Andric } break; 212*0b57cec5SDimitry Andric case RISCV::LB: 213*0b57cec5SDimitry Andric case RISCV::LH: 214*0b57cec5SDimitry Andric case RISCV::LW: 215*0b57cec5SDimitry Andric case RISCV::LBU: 216*0b57cec5SDimitry Andric case RISCV::LHU: 217*0b57cec5SDimitry Andric case RISCV::LWU: 218*0b57cec5SDimitry Andric case RISCV::LD: 219*0b57cec5SDimitry Andric case RISCV::FLW: 220*0b57cec5SDimitry Andric case RISCV::FLD: 221*0b57cec5SDimitry Andric case RISCV::SB: 222*0b57cec5SDimitry Andric case RISCV::SH: 223*0b57cec5SDimitry Andric case RISCV::SW: 224*0b57cec5SDimitry Andric case RISCV::SD: 225*0b57cec5SDimitry Andric case RISCV::FSW: 226*0b57cec5SDimitry Andric case RISCV::FSD: { 227*0b57cec5SDimitry Andric // Transforms the sequence: Into: 228*0b57cec5SDimitry Andric // HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8) 229*0b57cec5SDimitry Andric // LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1) 230*0b57cec5SDimitry Andric // Tail: lw vreg3, 8(vreg2) 231*0b57cec5SDimitry Andric if (Tail.getOperand(1).isFI()) 232*0b57cec5SDimitry Andric return false; 233*0b57cec5SDimitry Andric // Register defined by LoADDI should be used in the base part of the 234*0b57cec5SDimitry Andric // load\store instruction. Otherwise, no folding possible. 235*0b57cec5SDimitry Andric unsigned BaseAddrReg = Tail.getOperand(1).getReg(); 236*0b57cec5SDimitry Andric if (DestReg != BaseAddrReg) 237*0b57cec5SDimitry Andric return false; 238*0b57cec5SDimitry Andric MachineOperand &TailImmOp = Tail.getOperand(2); 239*0b57cec5SDimitry Andric int64_t Offset = TailImmOp.getImm(); 240*0b57cec5SDimitry Andric // Update the offsets in global address lowering. 241*0b57cec5SDimitry Andric HiLUI.getOperand(1).setOffset(Offset); 242*0b57cec5SDimitry Andric // Update the immediate in the Tail instruction to add the offset. 243*0b57cec5SDimitry Andric Tail.RemoveOperand(2); 244*0b57cec5SDimitry Andric MachineOperand &ImmOp = LoADDI.getOperand(2); 245*0b57cec5SDimitry Andric ImmOp.setOffset(Offset); 246*0b57cec5SDimitry Andric Tail.addOperand(ImmOp); 247*0b57cec5SDimitry Andric // Update the base reg in the Tail instruction to feed from LUI. 248*0b57cec5SDimitry Andric // Output of HiLUI is only used in LoADDI, no need to use 249*0b57cec5SDimitry Andric // MRI->replaceRegWith(). 250*0b57cec5SDimitry Andric Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg()); 251*0b57cec5SDimitry Andric DeadInstrs.insert(&LoADDI); 252*0b57cec5SDimitry Andric return true; 253*0b57cec5SDimitry Andric } break; 254*0b57cec5SDimitry Andric } 255*0b57cec5SDimitry Andric return false; 256*0b57cec5SDimitry Andric } 257*0b57cec5SDimitry Andric 258*0b57cec5SDimitry Andric bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { 259*0b57cec5SDimitry Andric if (skipFunction(Fn.getFunction())) 260*0b57cec5SDimitry Andric return false; 261*0b57cec5SDimitry Andric 262*0b57cec5SDimitry Andric DeadInstrs.clear(); 263*0b57cec5SDimitry Andric MRI = &Fn.getRegInfo(); 264*0b57cec5SDimitry Andric for (MachineBasicBlock &MBB : Fn) { 265*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); 266*0b57cec5SDimitry Andric for (MachineInstr &HiLUI : MBB) { 267*0b57cec5SDimitry Andric MachineInstr *LoADDI = nullptr; 268*0b57cec5SDimitry Andric if (!detectLuiAddiGlobal(HiLUI, LoADDI)) 269*0b57cec5SDimitry Andric continue; 270*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Found lowered global address with one use: " 271*0b57cec5SDimitry Andric << *LoADDI->getOperand(2).getGlobal() << "\n"); 272*0b57cec5SDimitry Andric // If the use count is only one, merge the offset 273*0b57cec5SDimitry Andric detectAndFoldOffset(HiLUI, *LoADDI); 274*0b57cec5SDimitry Andric } 275*0b57cec5SDimitry Andric } 276*0b57cec5SDimitry Andric // Delete dead instructions. 277*0b57cec5SDimitry Andric for (auto *MI : DeadInstrs) 278*0b57cec5SDimitry Andric MI->eraseFromParent(); 279*0b57cec5SDimitry Andric return true; 280*0b57cec5SDimitry Andric } 281*0b57cec5SDimitry Andric 282*0b57cec5SDimitry Andric /// Returns an instance of the Merge Base Offset Optimization pass. 283*0b57cec5SDimitry Andric FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() { 284*0b57cec5SDimitry Andric return new RISCVMergeBaseOffsetOpt(); 285*0b57cec5SDimitry Andric } 286