1 //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Merge the offset of address calculation into the offset field 10 // of instructions in a global address lowering sequence. This pass transforms: 11 // lui vreg1, %hi(s) 12 // addi vreg2, vreg1, %lo(s) 13 // addi vreg3, verg2, Offset 14 // 15 // Into: 16 // lui vreg1, %hi(s+Offset) 17 // addi vreg2, vreg1, %lo(s+Offset) 18 // 19 // The transformation is carried out under certain conditions: 20 // 1) The offset field in the base of global address lowering sequence is zero. 21 // 2) The lowered global address has only one use. 22 // 23 // The offset field can be in a different form. This pass handles all of them. 24 //===----------------------------------------------------------------------===// 25 26 #include "RISCV.h" 27 #include "RISCVTargetMachine.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/MC/TargetRegistry.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Target/TargetOptions.h" 33 #include <set> 34 using namespace llvm; 35 36 #define DEBUG_TYPE "riscv-merge-base-offset" 37 #define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset" 38 namespace { 39 40 struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass { 41 private: 42 const RISCVSubtarget *ST = nullptr; 43 44 public: 45 static char ID; 46 bool runOnMachineFunction(MachineFunction &Fn) override; 47 bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI); 48 49 bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI); 50 void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail, 51 int64_t Offset); 52 bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset); 53 bool matchShiftedOffset(MachineInstr &TailShXAdd, Register GSReg, 54 int64_t &Offset); 55 56 RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} 57 58 MachineFunctionProperties getRequiredProperties() const override { 59 return MachineFunctionProperties().set( 60 MachineFunctionProperties::Property::IsSSA); 61 } 62 63 void getAnalysisUsage(AnalysisUsage &AU) const override { 64 AU.setPreservesCFG(); 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67 68 StringRef getPassName() const override { 69 return RISCV_MERGE_BASE_OFFSET_NAME; 70 } 71 72 private: 73 MachineRegisterInfo *MRI; 74 std::set<MachineInstr *> DeadInstrs; 75 }; 76 } // end anonymous namespace 77 78 char RISCVMergeBaseOffsetOpt::ID = 0; 79 INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE, 80 RISCV_MERGE_BASE_OFFSET_NAME, false, false) 81 82 // Detect the pattern: 83 // lui vreg1, %hi(s) 84 // addi vreg2, vreg1, %lo(s) 85 // 86 // Pattern only accepted if: 87 // 1) ADDI has only one use. 88 // 2) LUI has only one use; which is the ADDI. 89 // 3) Both ADDI and LUI have GlobalAddress type which indicates that these 90 // are generated from global address lowering. 91 // 4) Offset value in the Global Address is 0. 92 bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI, 93 MachineInstr *&LoADDI) { 94 if (HiLUI.getOpcode() != RISCV::LUI || 95 HiLUI.getOperand(1).getTargetFlags() != RISCVII::MO_HI || 96 !HiLUI.getOperand(1).isGlobal() || 97 HiLUI.getOperand(1).getOffset() != 0 || 98 !MRI->hasOneUse(HiLUI.getOperand(0).getReg())) 99 return false; 100 Register HiLuiDestReg = HiLUI.getOperand(0).getReg(); 101 LoADDI = &*MRI->use_instr_begin(HiLuiDestReg); 102 if (LoADDI->getOpcode() != RISCV::ADDI || 103 LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO || 104 !LoADDI->getOperand(2).isGlobal() || 105 LoADDI->getOperand(2).getOffset() != 0) 106 return false; 107 return true; 108 } 109 110 // Update the offset in HiLUI and LoADDI instructions. 111 // Delete the tail instruction and update all the uses to use the 112 // output from LoADDI. 113 void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI, 114 MachineInstr &LoADDI, 115 MachineInstr &Tail, int64_t Offset) { 116 assert(isInt<32>(Offset) && "Unexpected offset"); 117 // Put the offset back in HiLUI and the LoADDI 118 HiLUI.getOperand(1).setOffset(Offset); 119 LoADDI.getOperand(2).setOffset(Offset); 120 // Delete the tail instruction. 121 DeadInstrs.insert(&Tail); 122 MRI->replaceRegWith(Tail.getOperand(0).getReg(), 123 LoADDI.getOperand(0).getReg()); 124 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n" 125 << " " << HiLUI << " " << LoADDI;); 126 } 127 128 // Detect patterns for large offsets that are passed into an ADD instruction. 129 // 130 // Base address lowering is of the form: 131 // HiLUI: lui vreg1, %hi(s) 132 // LoADDI: addi vreg2, vreg1, %lo(s) 133 // / \ 134 // / \ 135 // / \ 136 // / The large offset can be of two forms: \ 137 // 1) Offset that has non zero bits in lower 2) Offset that has non zero 138 // 12 bits and upper 20 bits bits in upper 20 bits only 139 // OffseLUI: lui vreg3, 4 140 // OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128 141 // \ / 142 // \ / 143 // \ / 144 // \ / 145 // TailAdd: add vreg4, vreg2, voff 146 bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, 147 Register GAReg, 148 int64_t &Offset) { 149 assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!"); 150 Register Rs = TailAdd.getOperand(1).getReg(); 151 Register Rt = TailAdd.getOperand(2).getReg(); 152 Register Reg = Rs == GAReg ? Rt : Rs; 153 154 // Can't fold if the register has more than one use. 155 if (!MRI->hasOneUse(Reg)) 156 return false; 157 // This can point to an ADDI or a LUI: 158 MachineInstr &OffsetTail = *MRI->getVRegDef(Reg); 159 if (OffsetTail.getOpcode() == RISCV::ADDI || 160 OffsetTail.getOpcode() == RISCV::ADDIW) { 161 // The offset value has non zero bits in both %hi and %lo parts. 162 // Detect an ADDI that feeds from a LUI instruction. 163 MachineOperand &AddiImmOp = OffsetTail.getOperand(2); 164 if (AddiImmOp.getTargetFlags() != RISCVII::MO_None) 165 return false; 166 int64_t OffLo = AddiImmOp.getImm(); 167 MachineInstr &OffsetLui = 168 *MRI->getVRegDef(OffsetTail.getOperand(1).getReg()); 169 MachineOperand &LuiImmOp = OffsetLui.getOperand(1); 170 if (OffsetLui.getOpcode() != RISCV::LUI || 171 LuiImmOp.getTargetFlags() != RISCVII::MO_None || 172 !MRI->hasOneUse(OffsetLui.getOperand(0).getReg())) 173 return false; 174 Offset = SignExtend64<32>(LuiImmOp.getImm() << 12); 175 Offset += OffLo; 176 // RV32 ignores the upper 32 bits. ADDIW sign extends the result. 177 if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW) 178 Offset = SignExtend64<32>(Offset); 179 // We can only fold simm32 offsets. 180 if (!isInt<32>(Offset)) 181 return false; 182 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail 183 << " " << OffsetLui); 184 DeadInstrs.insert(&OffsetTail); 185 DeadInstrs.insert(&OffsetLui); 186 return true; 187 } else if (OffsetTail.getOpcode() == RISCV::LUI) { 188 // The offset value has all zero bits in the lower 12 bits. Only LUI 189 // exists. 190 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); 191 Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12); 192 DeadInstrs.insert(&OffsetTail); 193 return true; 194 } 195 return false; 196 } 197 198 // Detect patterns for offsets that are passed into a SHXADD instruction. 199 // The offset has 1,2, or 3 trailing zeros and fits in simm13, simm14, simm15. 200 // The constant is created with addi voff, x0, C, and shXadd is used to 201 // fill insert the trailing zeros and do the addition. 202 // 203 // HiLUI: lui vreg1, %hi(s) 204 // LoADDI: addi vreg2, vreg1, %lo(s) 205 // OffsetTail: addi voff, x0, C 206 // TailAdd: shXadd vreg4, voff, vreg2 207 bool RISCVMergeBaseOffsetOpt::matchShiftedOffset(MachineInstr &TailShXAdd, 208 Register GAReg, 209 int64_t &Offset) { 210 assert((TailShXAdd.getOpcode() == RISCV::SH1ADD || 211 TailShXAdd.getOpcode() == RISCV::SH2ADD || 212 TailShXAdd.getOpcode() == RISCV::SH3ADD) && 213 "Expected SHXADD instruction!"); 214 215 // The first source is the shifted operand. 216 Register Rs1 = TailShXAdd.getOperand(1).getReg(); 217 218 if (GAReg != TailShXAdd.getOperand(2).getReg()) 219 return false; 220 221 // Can't fold if the register has more than one use. 222 if (!MRI->hasOneUse(Rs1)) 223 return false; 224 // This can point to an ADDI X0, C. 225 MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1); 226 if (OffsetTail.getOpcode() != RISCV::ADDI) 227 return false; 228 if (!OffsetTail.getOperand(1).isReg() || 229 OffsetTail.getOperand(1).getReg() != RISCV::X0 || 230 !OffsetTail.getOperand(2).isImm()) 231 return false; 232 233 Offset = OffsetTail.getOperand(2).getImm(); 234 assert(isInt<12>(Offset) && "Unexpected offset"); 235 236 unsigned ShAmt; 237 switch (TailShXAdd.getOpcode()) { 238 default: llvm_unreachable("Unexpected opcode"); 239 case RISCV::SH1ADD: ShAmt = 1; break; 240 case RISCV::SH2ADD: ShAmt = 2; break; 241 case RISCV::SH3ADD: ShAmt = 3; break; 242 } 243 244 Offset = (uint64_t)Offset << ShAmt; 245 246 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); 247 DeadInstrs.insert(&OffsetTail); 248 return true; 249 } 250 251 bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, 252 MachineInstr &LoADDI) { 253 Register DestReg = LoADDI.getOperand(0).getReg(); 254 255 // First, look for arithmetic instructions we can get an offset from. 256 // We might be able to remove the arithmetic instructions by folding the 257 // offset into the LUI+ADDI. 258 if (MRI->hasOneUse(DestReg)) { 259 // LoADDI has only one use. 260 MachineInstr &Tail = *MRI->use_instr_begin(DestReg); 261 switch (Tail.getOpcode()) { 262 default: 263 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" 264 << Tail); 265 break; 266 case RISCV::ADDI: { 267 // Offset is simply an immediate operand. 268 int64_t Offset = Tail.getOperand(2).getImm(); 269 270 // We might have two ADDIs in a row. 271 Register TailDestReg = Tail.getOperand(0).getReg(); 272 if (MRI->hasOneUse(TailDestReg)) { 273 MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg); 274 if (TailTail.getOpcode() == RISCV::ADDI) { 275 Offset += TailTail.getOperand(2).getImm(); 276 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail); 277 DeadInstrs.insert(&Tail); 278 foldOffset(HiLUI, LoADDI, TailTail, Offset); 279 return true; 280 } 281 } 282 283 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); 284 foldOffset(HiLUI, LoADDI, Tail, Offset); 285 return true; 286 } 287 case RISCV::ADD: { 288 // The offset is too large to fit in the immediate field of ADDI. 289 // This can be in two forms: 290 // 1) LUI hi_Offset followed by: 291 // ADDI lo_offset 292 // This happens in case the offset has non zero bits in 293 // both hi 20 and lo 12 bits. 294 // 2) LUI (offset20) 295 // This happens in case the lower 12 bits of the offset are zeros. 296 int64_t Offset; 297 if (!matchLargeOffset(Tail, DestReg, Offset)) 298 return false; 299 foldOffset(HiLUI, LoADDI, Tail, Offset); 300 return true; 301 } 302 case RISCV::SH1ADD: 303 case RISCV::SH2ADD: 304 case RISCV::SH3ADD: { 305 // The offset is too large to fit in the immediate field of ADDI. 306 // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or 307 // (SH3ADD (ADDI X0, C), DestReg). 308 int64_t Offset; 309 if (!matchShiftedOffset(Tail, DestReg, Offset)) 310 return false; 311 foldOffset(HiLUI, LoADDI, Tail, Offset); 312 return true; 313 } 314 } 315 } 316 317 // We didn't find an arithmetic instruction. If all the uses are memory ops 318 // with the same offset, we can transform 319 // HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8) 320 // LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1) 321 // Tail: lw vreg3, 8(vreg2) 322 323 Optional<int64_t> CommonOffset; 324 for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) { 325 switch (UseMI.getOpcode()) { 326 default: 327 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI); 328 return false; 329 case RISCV::LB: 330 case RISCV::LH: 331 case RISCV::LW: 332 case RISCV::LBU: 333 case RISCV::LHU: 334 case RISCV::LWU: 335 case RISCV::LD: 336 case RISCV::FLH: 337 case RISCV::FLW: 338 case RISCV::FLD: 339 case RISCV::SB: 340 case RISCV::SH: 341 case RISCV::SW: 342 case RISCV::SD: 343 case RISCV::FSH: 344 case RISCV::FSW: 345 case RISCV::FSD: { 346 if (UseMI.getOperand(1).isFI()) 347 return false; 348 // Register defined by LoADDI should not be the value register. 349 if (DestReg == UseMI.getOperand(0).getReg()) 350 return false; 351 assert(DestReg == UseMI.getOperand(1).getReg() && 352 "Expected base address use"); 353 // All load/store instructions must use the same offset. 354 int64_t Offset = UseMI.getOperand(2).getImm(); 355 if (CommonOffset && Offset != CommonOffset) 356 return false; 357 CommonOffset = Offset; 358 } 359 } 360 } 361 362 // We found a common offset. 363 // Update the offsets in global address lowering. 364 HiLUI.getOperand(1).setOffset(*CommonOffset); 365 MachineOperand &ImmOp = LoADDI.getOperand(2); 366 ImmOp.setOffset(*CommonOffset); 367 368 // Update the immediate in the load/store instructions to add the offset. 369 for (MachineInstr &UseMI : 370 llvm::make_early_inc_range(MRI->use_instructions(DestReg))) { 371 UseMI.removeOperand(2); 372 UseMI.addOperand(ImmOp); 373 // Update the base reg in the Tail instruction to feed from LUI. 374 // Output of HiLUI is only used in LoADDI, no need to use 375 // MRI->replaceRegWith(). 376 UseMI.getOperand(1).setReg(HiLUI.getOperand(0).getReg()); 377 } 378 379 DeadInstrs.insert(&LoADDI); 380 return true; 381 } 382 383 bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { 384 if (skipFunction(Fn.getFunction())) 385 return false; 386 387 ST = &Fn.getSubtarget<RISCVSubtarget>(); 388 389 bool MadeChange = false; 390 DeadInstrs.clear(); 391 MRI = &Fn.getRegInfo(); 392 for (MachineBasicBlock &MBB : Fn) { 393 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); 394 for (MachineInstr &HiLUI : MBB) { 395 MachineInstr *LoADDI = nullptr; 396 if (!detectLuiAddiGlobal(HiLUI, LoADDI)) 397 continue; 398 LLVM_DEBUG(dbgs() << " Found lowered global address: " 399 << *LoADDI->getOperand(2).getGlobal() << "\n"); 400 MadeChange |= detectAndFoldOffset(HiLUI, *LoADDI); 401 } 402 } 403 // Delete dead instructions. 404 for (auto *MI : DeadInstrs) 405 MI->eraseFromParent(); 406 return MadeChange; 407 } 408 409 /// Returns an instance of the Merge Base Offset Optimization pass. 410 FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() { 411 return new RISCVMergeBaseOffsetOpt(); 412 } 413