1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The purpose of this pass is to employ a canonical code transformation so 10 // that code compiled with slightly different IR passes can be diffed more 11 // effectively than otherwise. This is done by renaming vregs in a given 12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to 13 // move defs closer to their use inorder to reduce diffs caused by slightly 14 // different schedules. 15 // 16 // Basic Usage: 17 // 18 // llc -o - -run-pass mir-canonicalizer example.mir 19 // 20 // Reorders instructions canonically. 21 // Renames virtual register operands canonically. 22 // Strips certain MIR artifacts (optionally). 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "MIRVRegNamerUtils.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/Passes.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 #include <queue> 37 38 using namespace llvm; 39 40 namespace llvm { 41 extern char &MIRCanonicalizerID; 42 } // namespace llvm 43 44 #define DEBUG_TYPE "mir-canonicalizer" 45 46 static cl::opt<unsigned> 47 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), 48 cl::value_desc("N"), 49 cl::desc("Function number to canonicalize.")); 50 51 static cl::opt<unsigned> CanonicalizeBasicBlockNumber( 52 "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), 53 cl::desc("BasicBlock number to canonicalize.")); 54 55 namespace { 56 57 class MIRCanonicalizer : public MachineFunctionPass { 58 public: 59 static char ID; 60 MIRCanonicalizer() : MachineFunctionPass(ID) {} 61 62 StringRef getPassName() const override { 63 return "Rename register operands in a canonical ordering."; 64 } 65 66 void getAnalysisUsage(AnalysisUsage &AU) const override { 67 AU.setPreservesCFG(); 68 MachineFunctionPass::getAnalysisUsage(AU); 69 } 70 71 bool runOnMachineFunction(MachineFunction &MF) override; 72 }; 73 74 } // end anonymous namespace 75 76 char MIRCanonicalizer::ID; 77 78 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; 79 80 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", 81 "Rename Register Operands Canonically", false, false) 82 83 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", 84 "Rename Register Operands Canonically", false, false) 85 86 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { 87 if (MF.empty()) 88 return {}; 89 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); 90 std::vector<MachineBasicBlock *> RPOList; 91 for (auto MBB : RPOT) { 92 RPOList.push_back(MBB); 93 } 94 95 return RPOList; 96 } 97 98 static bool 99 rescheduleLexographically(std::vector<MachineInstr *> instructions, 100 MachineBasicBlock *MBB, 101 std::function<MachineBasicBlock::iterator()> getPos) { 102 103 bool Changed = false; 104 using StringInstrPair = std::pair<std::string, MachineInstr *>; 105 std::vector<StringInstrPair> StringInstrMap; 106 107 for (auto *II : instructions) { 108 std::string S; 109 raw_string_ostream OS(S); 110 II->print(OS); 111 OS.flush(); 112 113 // Trim the assignment, or start from the begining in the case of a store. 114 const size_t i = S.find("="); 115 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); 116 } 117 118 llvm::sort(StringInstrMap, 119 [](const StringInstrPair &a, const StringInstrPair &b) -> bool { 120 return (a.first < b.first); 121 }); 122 123 for (auto &II : StringInstrMap) { 124 125 LLVM_DEBUG({ 126 dbgs() << "Splicing "; 127 II.second->dump(); 128 dbgs() << " right before: "; 129 getPos()->dump(); 130 }); 131 132 Changed = true; 133 MBB->splice(getPos(), MBB, II.second); 134 } 135 136 return Changed; 137 } 138 139 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, 140 MachineBasicBlock *MBB) { 141 142 bool Changed = false; 143 144 // Calculates the distance of MI from the begining of its parent BB. 145 auto getInstrIdx = [](const MachineInstr &MI) { 146 unsigned i = 0; 147 for (auto &CurMI : *MI.getParent()) { 148 if (&CurMI == &MI) 149 return i; 150 i++; 151 } 152 return ~0U; 153 }; 154 155 // Pre-Populate vector of instructions to reschedule so that we don't 156 // clobber the iterator. 157 std::vector<MachineInstr *> Instructions; 158 for (auto &MI : *MBB) { 159 Instructions.push_back(&MI); 160 } 161 162 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; 163 std::map<unsigned, MachineInstr *> MultiUserLookup; 164 unsigned UseToBringDefCloserToCount = 0; 165 std::vector<MachineInstr *> PseudoIdempotentInstructions; 166 std::vector<unsigned> PhysRegDefs; 167 for (auto *II : Instructions) { 168 for (unsigned i = 1; i < II->getNumOperands(); i++) { 169 MachineOperand &MO = II->getOperand(i); 170 if (!MO.isReg()) 171 continue; 172 173 if (Register::isVirtualRegister(MO.getReg())) 174 continue; 175 176 if (!MO.isDef()) 177 continue; 178 179 PhysRegDefs.push_back(MO.getReg()); 180 } 181 } 182 183 for (auto *II : Instructions) { 184 if (II->getNumOperands() == 0) 185 continue; 186 if (II->mayLoadOrStore()) 187 continue; 188 189 MachineOperand &MO = II->getOperand(0); 190 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) 191 continue; 192 if (!MO.isDef()) 193 continue; 194 195 bool IsPseudoIdempotent = true; 196 for (unsigned i = 1; i < II->getNumOperands(); i++) { 197 198 if (II->getOperand(i).isImm()) { 199 continue; 200 } 201 202 if (II->getOperand(i).isReg()) { 203 if (!Register::isVirtualRegister(II->getOperand(i).getReg())) 204 if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == 205 PhysRegDefs.end()) { 206 continue; 207 } 208 } 209 210 IsPseudoIdempotent = false; 211 break; 212 } 213 214 if (IsPseudoIdempotent) { 215 PseudoIdempotentInstructions.push_back(II); 216 continue; 217 } 218 219 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); 220 221 MachineInstr *Def = II; 222 unsigned Distance = ~0U; 223 MachineInstr *UseToBringDefCloserTo = nullptr; 224 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); 225 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { 226 MachineInstr *UseInst = UO.getParent(); 227 228 const unsigned DefLoc = getInstrIdx(*Def); 229 const unsigned UseLoc = getInstrIdx(*UseInst); 230 const unsigned Delta = (UseLoc - DefLoc); 231 232 if (UseInst->getParent() != Def->getParent()) 233 continue; 234 if (DefLoc >= UseLoc) 235 continue; 236 237 if (Delta < Distance) { 238 Distance = Delta; 239 UseToBringDefCloserTo = UseInst; 240 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; 241 } 242 } 243 244 const auto BBE = MBB->instr_end(); 245 MachineBasicBlock::iterator DefI = BBE; 246 MachineBasicBlock::iterator UseI = BBE; 247 248 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { 249 250 if (DefI != BBE && UseI != BBE) 251 break; 252 253 if (&*BBI == Def) { 254 DefI = BBI; 255 continue; 256 } 257 258 if (&*BBI == UseToBringDefCloserTo) { 259 UseI = BBI; 260 continue; 261 } 262 } 263 264 if (DefI == BBE || UseI == BBE) 265 continue; 266 267 LLVM_DEBUG({ 268 dbgs() << "Splicing "; 269 DefI->dump(); 270 dbgs() << " right before: "; 271 UseI->dump(); 272 }); 273 274 MultiUsers[UseToBringDefCloserTo].push_back(Def); 275 Changed = true; 276 MBB->splice(UseI, MBB, DefI); 277 } 278 279 // Sort the defs for users of multiple defs lexographically. 280 for (const auto &E : MultiUserLookup) { 281 282 auto UseI = 283 std::find_if(MBB->instr_begin(), MBB->instr_end(), 284 [&](MachineInstr &MI) -> bool { return &MI == E.second; }); 285 286 if (UseI == MBB->instr_end()) 287 continue; 288 289 LLVM_DEBUG( 290 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); 291 Changed |= rescheduleLexographically( 292 MultiUsers[E.second], MBB, 293 [&]() -> MachineBasicBlock::iterator { return UseI; }); 294 } 295 296 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); 297 LLVM_DEBUG( 298 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); 299 Changed |= rescheduleLexographically( 300 PseudoIdempotentInstructions, MBB, 301 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); 302 303 return Changed; 304 } 305 306 static bool propagateLocalCopies(MachineBasicBlock *MBB) { 307 bool Changed = false; 308 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 309 310 std::vector<MachineInstr *> Copies; 311 for (MachineInstr &MI : MBB->instrs()) { 312 if (MI.isCopy()) 313 Copies.push_back(&MI); 314 } 315 316 for (MachineInstr *MI : Copies) { 317 318 if (!MI->getOperand(0).isReg()) 319 continue; 320 if (!MI->getOperand(1).isReg()) 321 continue; 322 323 const Register Dst = MI->getOperand(0).getReg(); 324 const Register Src = MI->getOperand(1).getReg(); 325 326 if (!Register::isVirtualRegister(Dst)) 327 continue; 328 if (!Register::isVirtualRegister(Src)) 329 continue; 330 // Not folding COPY instructions if regbankselect has not set the RCs. 331 // Why are we only considering Register Classes? Because the verifier 332 // sometimes gets upset if the register classes don't match even if the 333 // types do. A future patch might add COPY folding for matching types in 334 // pre-registerbankselect code. 335 if (!MRI.getRegClassOrNull(Dst)) 336 continue; 337 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) 338 continue; 339 340 std::vector<MachineOperand *> Uses; 341 for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) 342 Uses.push_back(&*UI); 343 for (auto *MO : Uses) 344 MO->setReg(Src); 345 346 Changed = true; 347 MI->eraseFromParent(); 348 } 349 350 return Changed; 351 } 352 353 static bool doDefKillClear(MachineBasicBlock *MBB) { 354 bool Changed = false; 355 356 for (auto &MI : *MBB) { 357 for (auto &MO : MI.operands()) { 358 if (!MO.isReg()) 359 continue; 360 if (!MO.isDef() && MO.isKill()) { 361 Changed = true; 362 MO.setIsKill(false); 363 } 364 365 if (MO.isDef() && MO.isDead()) { 366 Changed = true; 367 MO.setIsDead(false); 368 } 369 } 370 } 371 372 return Changed; 373 } 374 375 static bool runOnBasicBlock(MachineBasicBlock *MBB, 376 std::vector<StringRef> &bbNames, 377 unsigned &basicBlockNum, NamedVRegCursor &NVC) { 378 379 if (CanonicalizeBasicBlockNumber != ~0U) { 380 if (CanonicalizeBasicBlockNumber != basicBlockNum++) 381 return false; 382 LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() 383 << "\n";); 384 } 385 386 if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { 387 LLVM_DEBUG({ 388 dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() 389 << "\n"; 390 }); 391 return false; 392 } 393 394 LLVM_DEBUG({ 395 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; 396 dbgs() << "\n\n================================================\n\n"; 397 }); 398 399 bool Changed = false; 400 MachineFunction &MF = *MBB->getParent(); 401 MachineRegisterInfo &MRI = MF.getRegInfo(); 402 403 bbNames.push_back(MBB->getName()); 404 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); 405 406 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; 407 MBB->dump();); 408 Changed |= propagateLocalCopies(MBB); 409 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); 410 411 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); 412 unsigned IdempotentInstCount = 0; 413 Changed |= rescheduleCanonically(IdempotentInstCount, MBB); 414 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); 415 416 Changed |= NVC.renameVRegs(MBB); 417 418 // Here we renumber the def vregs for the idempotent instructions from the top 419 // of the MachineBasicBlock so that they are named in the order that we sorted 420 // them alphabetically. Eventually we wont need SkipVRegs because we will use 421 // named vregs instead. 422 if (IdempotentInstCount) 423 NVC.skipVRegs(); 424 425 auto MII = MBB->begin(); 426 for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { 427 MachineInstr &MI = *MII++; 428 Changed = true; 429 Register vRegToRename = MI.getOperand(0).getReg(); 430 auto Rename = NVC.createVirtualRegister(vRegToRename); 431 432 std::vector<MachineOperand *> RenameMOs; 433 for (auto &MO : MRI.reg_operands(vRegToRename)) { 434 RenameMOs.push_back(&MO); 435 } 436 437 for (auto *MO : RenameMOs) { 438 MO->setReg(Rename); 439 } 440 } 441 442 Changed |= doDefKillClear(MBB); 443 444 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); 445 dbgs() << "\n";); 446 LLVM_DEBUG( 447 dbgs() << "\n\n================================================\n\n"); 448 return Changed; 449 } 450 451 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { 452 453 static unsigned functionNum = 0; 454 if (CanonicalizeFunctionNumber != ~0U) { 455 if (CanonicalizeFunctionNumber != functionNum++) 456 return false; 457 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() 458 << "\n";); 459 } 460 461 // we need a valid vreg to create a vreg type for skipping all those 462 // stray vreg numbers so reach alignment/canonical vreg values. 463 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); 464 465 LLVM_DEBUG( 466 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; 467 dbgs() << "\n\n================================================\n\n"; 468 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; 469 for (auto MBB 470 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() 471 << "\n\n================================================\n\n";); 472 473 std::vector<StringRef> BBNames; 474 475 unsigned BBNum = 0; 476 477 bool Changed = false; 478 479 MachineRegisterInfo &MRI = MF.getRegInfo(); 480 NamedVRegCursor NVC(MRI); 481 for (auto MBB : RPOList) 482 Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC); 483 484 return Changed; 485 } 486