1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The purpose of this pass is to employ a canonical code transformation so 10 // that code compiled with slightly different IR passes can be diffed more 11 // effectively than otherwise. This is done by renaming vregs in a given 12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to 13 // move defs closer to their use inorder to reduce diffs caused by slightly 14 // different schedules. 15 // 16 // Basic Usage: 17 // 18 // llc -o - -run-pass mir-canonicalizer example.mir 19 // 20 // Reorders instructions canonically. 21 // Renames virtual register operands canonically. 22 // Strips certain MIR artifacts (optionally). 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "MIRVRegNamerUtils.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/Passes.h" 33 #include "llvm/InitializePasses.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 #include <queue> 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "mir-canonicalizer" 42 43 static cl::opt<unsigned> 44 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), 45 cl::value_desc("N"), 46 cl::desc("Function number to canonicalize.")); 47 48 namespace { 49 50 class MIRCanonicalizer : public MachineFunctionPass { 51 public: 52 static char ID; 53 MIRCanonicalizer() : MachineFunctionPass(ID) {} 54 55 StringRef getPassName() const override { 56 return "Rename register operands in a canonical ordering."; 57 } 58 59 void getAnalysisUsage(AnalysisUsage &AU) const override { 60 AU.setPreservesCFG(); 61 MachineFunctionPass::getAnalysisUsage(AU); 62 } 63 64 bool runOnMachineFunction(MachineFunction &MF) override; 65 }; 66 67 } // end anonymous namespace 68 69 char MIRCanonicalizer::ID; 70 71 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; 72 73 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", 74 "Rename Register Operands Canonically", false, false) 75 76 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", 77 "Rename Register Operands Canonically", false, false) 78 79 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { 80 if (MF.empty()) 81 return {}; 82 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); 83 std::vector<MachineBasicBlock *> RPOList; 84 append_range(RPOList, RPOT); 85 86 return RPOList; 87 } 88 89 static bool 90 rescheduleLexographically(std::vector<MachineInstr *> instructions, 91 MachineBasicBlock *MBB, 92 std::function<MachineBasicBlock::iterator()> getPos) { 93 94 bool Changed = false; 95 using StringInstrPair = std::pair<std::string, MachineInstr *>; 96 std::vector<StringInstrPair> StringInstrMap; 97 98 for (auto *II : instructions) { 99 std::string S; 100 raw_string_ostream OS(S); 101 II->print(OS); 102 OS.flush(); 103 104 // Trim the assignment, or start from the beginning in the case of a store. 105 const size_t i = S.find('='); 106 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); 107 } 108 109 llvm::sort(StringInstrMap, 110 [](const StringInstrPair &a, const StringInstrPair &b) -> bool { 111 return (a.first < b.first); 112 }); 113 114 for (auto &II : StringInstrMap) { 115 116 LLVM_DEBUG({ 117 dbgs() << "Splicing "; 118 II.second->dump(); 119 dbgs() << " right before: "; 120 getPos()->dump(); 121 }); 122 123 Changed = true; 124 MBB->splice(getPos(), MBB, II.second); 125 } 126 127 return Changed; 128 } 129 130 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, 131 MachineBasicBlock *MBB) { 132 133 bool Changed = false; 134 135 // Calculates the distance of MI from the beginning of its parent BB. 136 auto getInstrIdx = [](const MachineInstr &MI) { 137 unsigned i = 0; 138 for (auto &CurMI : *MI.getParent()) { 139 if (&CurMI == &MI) 140 return i; 141 i++; 142 } 143 return ~0U; 144 }; 145 146 // Pre-Populate vector of instructions to reschedule so that we don't 147 // clobber the iterator. 148 std::vector<MachineInstr *> Instructions; 149 for (auto &MI : *MBB) { 150 Instructions.push_back(&MI); 151 } 152 153 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; 154 std::map<unsigned, MachineInstr *> MultiUserLookup; 155 unsigned UseToBringDefCloserToCount = 0; 156 std::vector<MachineInstr *> PseudoIdempotentInstructions; 157 std::vector<unsigned> PhysRegDefs; 158 for (auto *II : Instructions) { 159 for (unsigned i = 1; i < II->getNumOperands(); i++) { 160 MachineOperand &MO = II->getOperand(i); 161 if (!MO.isReg()) 162 continue; 163 164 if (Register::isVirtualRegister(MO.getReg())) 165 continue; 166 167 if (!MO.isDef()) 168 continue; 169 170 PhysRegDefs.push_back(MO.getReg()); 171 } 172 } 173 174 for (auto *II : Instructions) { 175 if (II->getNumOperands() == 0) 176 continue; 177 if (II->mayLoadOrStore()) 178 continue; 179 180 MachineOperand &MO = II->getOperand(0); 181 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) 182 continue; 183 if (!MO.isDef()) 184 continue; 185 186 bool IsPseudoIdempotent = true; 187 for (unsigned i = 1; i < II->getNumOperands(); i++) { 188 189 if (II->getOperand(i).isImm()) { 190 continue; 191 } 192 193 if (II->getOperand(i).isReg()) { 194 if (!Register::isVirtualRegister(II->getOperand(i).getReg())) 195 if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) { 196 continue; 197 } 198 } 199 200 IsPseudoIdempotent = false; 201 break; 202 } 203 204 if (IsPseudoIdempotent) { 205 PseudoIdempotentInstructions.push_back(II); 206 continue; 207 } 208 209 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); 210 211 MachineInstr *Def = II; 212 unsigned Distance = ~0U; 213 MachineInstr *UseToBringDefCloserTo = nullptr; 214 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); 215 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { 216 MachineInstr *UseInst = UO.getParent(); 217 218 const unsigned DefLoc = getInstrIdx(*Def); 219 const unsigned UseLoc = getInstrIdx(*UseInst); 220 const unsigned Delta = (UseLoc - DefLoc); 221 222 if (UseInst->getParent() != Def->getParent()) 223 continue; 224 if (DefLoc >= UseLoc) 225 continue; 226 227 if (Delta < Distance) { 228 Distance = Delta; 229 UseToBringDefCloserTo = UseInst; 230 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; 231 } 232 } 233 234 const auto BBE = MBB->instr_end(); 235 MachineBasicBlock::iterator DefI = BBE; 236 MachineBasicBlock::iterator UseI = BBE; 237 238 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { 239 240 if (DefI != BBE && UseI != BBE) 241 break; 242 243 if (&*BBI == Def) { 244 DefI = BBI; 245 continue; 246 } 247 248 if (&*BBI == UseToBringDefCloserTo) { 249 UseI = BBI; 250 continue; 251 } 252 } 253 254 if (DefI == BBE || UseI == BBE) 255 continue; 256 257 LLVM_DEBUG({ 258 dbgs() << "Splicing "; 259 DefI->dump(); 260 dbgs() << " right before: "; 261 UseI->dump(); 262 }); 263 264 MultiUsers[UseToBringDefCloserTo].push_back(Def); 265 Changed = true; 266 MBB->splice(UseI, MBB, DefI); 267 } 268 269 // Sort the defs for users of multiple defs lexographically. 270 for (const auto &E : MultiUserLookup) { 271 272 auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool { 273 return &MI == E.second; 274 }); 275 276 if (UseI == MBB->instr_end()) 277 continue; 278 279 LLVM_DEBUG( 280 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); 281 Changed |= rescheduleLexographically( 282 MultiUsers[E.second], MBB, 283 [&]() -> MachineBasicBlock::iterator { return UseI; }); 284 } 285 286 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); 287 LLVM_DEBUG( 288 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); 289 Changed |= rescheduleLexographically( 290 PseudoIdempotentInstructions, MBB, 291 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); 292 293 return Changed; 294 } 295 296 static bool propagateLocalCopies(MachineBasicBlock *MBB) { 297 bool Changed = false; 298 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 299 300 std::vector<MachineInstr *> Copies; 301 for (MachineInstr &MI : MBB->instrs()) { 302 if (MI.isCopy()) 303 Copies.push_back(&MI); 304 } 305 306 for (MachineInstr *MI : Copies) { 307 308 if (!MI->getOperand(0).isReg()) 309 continue; 310 if (!MI->getOperand(1).isReg()) 311 continue; 312 313 const Register Dst = MI->getOperand(0).getReg(); 314 const Register Src = MI->getOperand(1).getReg(); 315 316 if (!Register::isVirtualRegister(Dst)) 317 continue; 318 if (!Register::isVirtualRegister(Src)) 319 continue; 320 // Not folding COPY instructions if regbankselect has not set the RCs. 321 // Why are we only considering Register Classes? Because the verifier 322 // sometimes gets upset if the register classes don't match even if the 323 // types do. A future patch might add COPY folding for matching types in 324 // pre-registerbankselect code. 325 if (!MRI.getRegClassOrNull(Dst)) 326 continue; 327 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) 328 continue; 329 330 std::vector<MachineOperand *> Uses; 331 for (MachineOperand &MO : MRI.use_operands(Dst)) 332 Uses.push_back(&MO); 333 for (auto *MO : Uses) 334 MO->setReg(Src); 335 336 Changed = true; 337 MI->eraseFromParent(); 338 } 339 340 return Changed; 341 } 342 343 static bool doDefKillClear(MachineBasicBlock *MBB) { 344 bool Changed = false; 345 346 for (auto &MI : *MBB) { 347 for (auto &MO : MI.operands()) { 348 if (!MO.isReg()) 349 continue; 350 if (!MO.isDef() && MO.isKill()) { 351 Changed = true; 352 MO.setIsKill(false); 353 } 354 355 if (MO.isDef() && MO.isDead()) { 356 Changed = true; 357 MO.setIsDead(false); 358 } 359 } 360 } 361 362 return Changed; 363 } 364 365 static bool runOnBasicBlock(MachineBasicBlock *MBB, 366 unsigned BasicBlockNum, VRegRenamer &Renamer) { 367 LLVM_DEBUG({ 368 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; 369 dbgs() << "\n\n================================================\n\n"; 370 }); 371 372 bool Changed = false; 373 374 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); 375 376 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; 377 MBB->dump();); 378 Changed |= propagateLocalCopies(MBB); 379 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); 380 381 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); 382 unsigned IdempotentInstCount = 0; 383 Changed |= rescheduleCanonically(IdempotentInstCount, MBB); 384 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); 385 386 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum); 387 388 // TODO: Consider dropping this. Dropping kill defs is probably not 389 // semantically sound. 390 Changed |= doDefKillClear(MBB); 391 392 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); 393 dbgs() << "\n";); 394 LLVM_DEBUG( 395 dbgs() << "\n\n================================================\n\n"); 396 return Changed; 397 } 398 399 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { 400 401 static unsigned functionNum = 0; 402 if (CanonicalizeFunctionNumber != ~0U) { 403 if (CanonicalizeFunctionNumber != functionNum++) 404 return false; 405 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() 406 << "\n";); 407 } 408 409 // we need a valid vreg to create a vreg type for skipping all those 410 // stray vreg numbers so reach alignment/canonical vreg values. 411 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); 412 413 LLVM_DEBUG( 414 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; 415 dbgs() << "\n\n================================================\n\n"; 416 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; 417 for (auto MBB 418 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() 419 << "\n\n================================================\n\n";); 420 421 unsigned BBNum = 0; 422 bool Changed = false; 423 MachineRegisterInfo &MRI = MF.getRegInfo(); 424 VRegRenamer Renamer(MRI); 425 for (auto MBB : RPOList) 426 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer); 427 428 return Changed; 429 } 430