1 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // A pre-emit peephole for catching opportunities introduced by late passes such 10 // as MachineBlockPlacement. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPC.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCSubtarget.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/CodeGen/LivePhysRegs.h" 20 #include "llvm/CodeGen/MachineBasicBlock.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Support/Debug.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "ppc-pre-emit-peephole" 31 32 STATISTIC(NumRRConvertedInPreEmit, 33 "Number of r+r instructions converted to r+i in pre-emit peephole"); 34 STATISTIC(NumRemovedInPreEmit, 35 "Number of instructions deleted in pre-emit peephole"); 36 STATISTIC(NumberOfSelfCopies, 37 "Number of self copy instructions eliminated"); 38 STATISTIC(NumFrameOffFoldInPreEmit, 39 "Number of folding frame offset by using r+r in pre-emit peephole"); 40 41 static cl::opt<bool> 42 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), 43 cl::desc("Run pre-emit peephole optimizations.")); 44 45 namespace { 46 class PPCPreEmitPeephole : public MachineFunctionPass { 47 public: 48 static char ID; 49 PPCPreEmitPeephole() : MachineFunctionPass(ID) { 50 initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry()); 51 } 52 53 void getAnalysisUsage(AnalysisUsage &AU) const override { 54 MachineFunctionPass::getAnalysisUsage(AU); 55 } 56 57 MachineFunctionProperties getRequiredProperties() const override { 58 return MachineFunctionProperties().set( 59 MachineFunctionProperties::Property::NoVRegs); 60 } 61 62 // This function removes any redundant load immediates. It has two level 63 // loops - The outer loop finds the load immediates BBI that could be used 64 // to replace following redundancy. The inner loop scans instructions that 65 // after BBI to find redundancy and update kill/dead flags accordingly. If 66 // AfterBBI is the same as BBI, it is redundant, otherwise any instructions 67 // that modify the def register of BBI would break the scanning. 68 // DeadOrKillToUnset is a pointer to the previous operand that had the 69 // kill/dead flag set. It keeps track of the def register of BBI, the use 70 // registers of AfterBBIs and the def registers of AfterBBIs. 71 bool removeRedundantLIs(MachineBasicBlock &MBB, 72 const TargetRegisterInfo *TRI) { 73 LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n"; 74 MBB.dump(); dbgs() << "\n"); 75 76 DenseSet<MachineInstr *> InstrsToErase; 77 for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { 78 // Skip load immediate that is marked to be erased later because it 79 // cannot be used to replace any other instructions. 80 if (InstrsToErase.find(&*BBI) != InstrsToErase.end()) 81 continue; 82 // Skip non-load immediate. 83 unsigned Opc = BBI->getOpcode(); 84 if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && 85 Opc != PPC::LIS8) 86 continue; 87 // Skip load immediate, where the operand is a relocation (e.g., $r3 = 88 // LI target-flags(ppc-lo) %const.0). 89 if (!BBI->getOperand(1).isImm()) 90 continue; 91 assert(BBI->getOperand(0).isReg() && 92 "Expected a register for the first operand"); 93 94 LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump();); 95 96 Register Reg = BBI->getOperand(0).getReg(); 97 int64_t Imm = BBI->getOperand(1).getImm(); 98 MachineOperand *DeadOrKillToUnset = nullptr; 99 if (BBI->getOperand(0).isDead()) { 100 DeadOrKillToUnset = &BBI->getOperand(0); 101 LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset 102 << " from load immediate " << *BBI 103 << " is a unsetting candidate\n"); 104 } 105 // This loop scans instructions after BBI to see if there is any 106 // redundant load immediate. 107 for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end(); 108 ++AfterBBI) { 109 // Track the operand that kill Reg. We would unset the kill flag of 110 // the operand if there is a following redundant load immediate. 111 int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI); 112 113 // We can't just clear implicit kills, so if we encounter one, stop 114 // looking further. 115 if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) { 116 LLVM_DEBUG(dbgs() 117 << "Encountered an implicit kill, cannot proceed: "); 118 LLVM_DEBUG(AfterBBI->dump()); 119 break; 120 } 121 122 if (KillIdx != -1) { 123 assert(!DeadOrKillToUnset && "Shouldn't kill same register twice"); 124 DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx); 125 LLVM_DEBUG(dbgs() 126 << " Kill flag of " << *DeadOrKillToUnset << " from " 127 << *AfterBBI << " is a unsetting candidate\n"); 128 } 129 130 if (!AfterBBI->modifiesRegister(Reg, TRI)) 131 continue; 132 // Finish scanning because Reg is overwritten by a non-load 133 // instruction. 134 if (AfterBBI->getOpcode() != Opc) 135 break; 136 assert(AfterBBI->getOperand(0).isReg() && 137 "Expected a register for the first operand"); 138 // Finish scanning because Reg is overwritten by a relocation or a 139 // different value. 140 if (!AfterBBI->getOperand(1).isImm() || 141 AfterBBI->getOperand(1).getImm() != Imm) 142 break; 143 144 // It loads same immediate value to the same Reg, which is redundant. 145 // We would unset kill flag in previous Reg usage to extend live range 146 // of Reg first, then remove the redundancy. 147 if (DeadOrKillToUnset) { 148 LLVM_DEBUG(dbgs() 149 << " Unset dead/kill flag of " << *DeadOrKillToUnset 150 << " from " << *DeadOrKillToUnset->getParent()); 151 if (DeadOrKillToUnset->isDef()) 152 DeadOrKillToUnset->setIsDead(false); 153 else 154 DeadOrKillToUnset->setIsKill(false); 155 } 156 DeadOrKillToUnset = 157 AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); 158 if (DeadOrKillToUnset) 159 LLVM_DEBUG(dbgs() 160 << " Dead flag of " << *DeadOrKillToUnset << " from " 161 << *AfterBBI << " is a unsetting candidate\n"); 162 InstrsToErase.insert(&*AfterBBI); 163 LLVM_DEBUG(dbgs() << " Remove redundant load immediate: "; 164 AfterBBI->dump()); 165 } 166 } 167 168 for (MachineInstr *MI : InstrsToErase) { 169 MI->eraseFromParent(); 170 } 171 NumRemovedInPreEmit += InstrsToErase.size(); 172 return !InstrsToErase.empty(); 173 } 174 175 bool runOnMachineFunction(MachineFunction &MF) override { 176 if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { 177 // Remove UNENCODED_NOP even when this pass is disabled. 178 // This needs to be done unconditionally so we don't emit zeros 179 // in the instruction stream. 180 SmallVector<MachineInstr *, 4> InstrsToErase; 181 for (MachineBasicBlock &MBB : MF) 182 for (MachineInstr &MI : MBB) 183 if (MI.getOpcode() == PPC::UNENCODED_NOP) 184 InstrsToErase.push_back(&MI); 185 for (MachineInstr *MI : InstrsToErase) 186 MI->eraseFromParent(); 187 return false; 188 } 189 bool Changed = false; 190 const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); 191 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 192 SmallVector<MachineInstr *, 4> InstrsToErase; 193 for (MachineBasicBlock &MBB : MF) { 194 Changed |= removeRedundantLIs(MBB, TRI); 195 for (MachineInstr &MI : MBB) { 196 unsigned Opc = MI.getOpcode(); 197 if (Opc == PPC::UNENCODED_NOP) { 198 InstrsToErase.push_back(&MI); 199 continue; 200 } 201 // Detect self copies - these can result from running AADB. 202 if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) { 203 const MCInstrDesc &MCID = TII->get(Opc); 204 if (MCID.getNumOperands() == 3 && 205 MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && 206 MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { 207 NumberOfSelfCopies++; 208 LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); 209 LLVM_DEBUG(MI.dump()); 210 InstrsToErase.push_back(&MI); 211 continue; 212 } 213 else if (MCID.getNumOperands() == 2 && 214 MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { 215 NumberOfSelfCopies++; 216 LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); 217 LLVM_DEBUG(MI.dump()); 218 InstrsToErase.push_back(&MI); 219 continue; 220 } 221 } 222 MachineInstr *DefMIToErase = nullptr; 223 if (TII->convertToImmediateForm(MI, &DefMIToErase)) { 224 Changed = true; 225 NumRRConvertedInPreEmit++; 226 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: "); 227 LLVM_DEBUG(MI.dump()); 228 if (DefMIToErase) { 229 InstrsToErase.push_back(DefMIToErase); 230 } 231 } 232 if (TII->foldFrameOffset(MI)) { 233 Changed = true; 234 NumFrameOffFoldInPreEmit++; 235 LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); 236 LLVM_DEBUG(MI.dump()); 237 } 238 } 239 240 // Eliminate conditional branch based on a constant CR bit by 241 // CRSET or CRUNSET. We eliminate the conditional branch or 242 // convert it into an unconditional branch. Also, if the CR bit 243 // is not used by other instructions, we eliminate CRSET as well. 244 auto I = MBB.getFirstInstrTerminator(); 245 if (I == MBB.instr_end()) 246 continue; 247 MachineInstr *Br = &*I; 248 if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) 249 continue; 250 MachineInstr *CRSetMI = nullptr; 251 Register CRBit = Br->getOperand(0).getReg(); 252 unsigned CRReg = getCRFromCRBit(CRBit); 253 bool SeenUse = false; 254 MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); 255 for (It++; It != Er; It++) { 256 if (It->modifiesRegister(CRBit, TRI)) { 257 if ((It->getOpcode() == PPC::CRUNSET || 258 It->getOpcode() == PPC::CRSET) && 259 It->getOperand(0).getReg() == CRBit) 260 CRSetMI = &*It; 261 break; 262 } 263 if (It->readsRegister(CRBit, TRI)) 264 SeenUse = true; 265 } 266 if (!CRSetMI) continue; 267 268 unsigned CRSetOp = CRSetMI->getOpcode(); 269 if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) || 270 (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) { 271 // Remove this branch since it cannot be taken. 272 InstrsToErase.push_back(Br); 273 MBB.removeSuccessor(Br->getOperand(1).getMBB()); 274 } 275 else { 276 // This conditional branch is always taken. So, remove all branches 277 // and insert an unconditional branch to the destination of this. 278 MachineBasicBlock::iterator It = Br, Er = MBB.end(); 279 for (; It != Er; It++) { 280 if (It->isDebugInstr()) continue; 281 assert(It->isTerminator() && "Non-terminator after a terminator"); 282 InstrsToErase.push_back(&*It); 283 } 284 if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) { 285 ArrayRef<MachineOperand> NoCond; 286 TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr, 287 NoCond, Br->getDebugLoc()); 288 } 289 for (auto &Succ : MBB.successors()) 290 if (Succ != Br->getOperand(1).getMBB()) { 291 MBB.removeSuccessor(Succ); 292 break; 293 } 294 } 295 296 // If the CRBit is not used by another instruction, we can eliminate 297 // CRSET/CRUNSET instruction. 298 if (!SeenUse) { 299 // We need to check use of the CRBit in successors. 300 for (auto &SuccMBB : MBB.successors()) 301 if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) { 302 SeenUse = true; 303 break; 304 } 305 if (!SeenUse) 306 InstrsToErase.push_back(CRSetMI); 307 } 308 } 309 for (MachineInstr *MI : InstrsToErase) { 310 LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: "); 311 LLVM_DEBUG(MI->dump()); 312 MI->eraseFromParent(); 313 NumRemovedInPreEmit++; 314 } 315 return Changed; 316 } 317 }; 318 } 319 320 INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole", 321 false, false) 322 char PPCPreEmitPeephole::ID = 0; 323 324 FunctionPass *llvm::createPPCPreEmitPeepholePass() { 325 return new PPCPreEmitPeephole(); 326 } 327