1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The purpose of this pass is to employ a canonical code transformation so 10 // that code compiled with slightly different IR passes can be diffed more 11 // effectively than otherwise. This is done by renaming vregs in a given 12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to 13 // move defs closer to their use inorder to reduce diffs caused by slightly 14 // different schedules. 15 // 16 // Basic Usage: 17 // 18 // llc -o - -run-pass mir-canonicalizer example.mir 19 // 20 // Reorders instructions canonically. 21 // Renames virtual register operands canonically. 22 // Strips certain MIR artifacts (optionally). 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "MIRVRegNamerUtils.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/Passes.h" 33 #include "llvm/InitializePasses.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 #include <queue> 38 39 using namespace llvm; 40 41 namespace llvm { 42 extern char &MIRCanonicalizerID; 43 } // namespace llvm 44 45 #define DEBUG_TYPE "mir-canonicalizer" 46 47 static cl::opt<unsigned> 48 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), 49 cl::value_desc("N"), 50 cl::desc("Function number to canonicalize.")); 51 52 namespace { 53 54 class MIRCanonicalizer : public MachineFunctionPass { 55 public: 56 static char ID; 57 MIRCanonicalizer() : MachineFunctionPass(ID) {} 58 59 StringRef getPassName() const override { 60 return "Rename register operands in a canonical ordering."; 61 } 62 63 void getAnalysisUsage(AnalysisUsage &AU) const override { 64 AU.setPreservesCFG(); 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67 68 bool runOnMachineFunction(MachineFunction &MF) override; 69 }; 70 71 } // end anonymous namespace 72 73 char MIRCanonicalizer::ID; 74 75 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; 76 77 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", 78 "Rename Register Operands Canonically", false, false) 79 80 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", 81 "Rename Register Operands Canonically", false, false) 82 83 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { 84 if (MF.empty()) 85 return {}; 86 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); 87 std::vector<MachineBasicBlock *> RPOList; 88 for (auto MBB : RPOT) { 89 RPOList.push_back(MBB); 90 } 91 92 return RPOList; 93 } 94 95 static bool 96 rescheduleLexographically(std::vector<MachineInstr *> instructions, 97 MachineBasicBlock *MBB, 98 std::function<MachineBasicBlock::iterator()> getPos) { 99 100 bool Changed = false; 101 using StringInstrPair = std::pair<std::string, MachineInstr *>; 102 std::vector<StringInstrPair> StringInstrMap; 103 104 for (auto *II : instructions) { 105 std::string S; 106 raw_string_ostream OS(S); 107 II->print(OS); 108 OS.flush(); 109 110 // Trim the assignment, or start from the beginning in the case of a store. 111 const size_t i = S.find("="); 112 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); 113 } 114 115 llvm::sort(StringInstrMap, 116 [](const StringInstrPair &a, const StringInstrPair &b) -> bool { 117 return (a.first < b.first); 118 }); 119 120 for (auto &II : StringInstrMap) { 121 122 LLVM_DEBUG({ 123 dbgs() << "Splicing "; 124 II.second->dump(); 125 dbgs() << " right before: "; 126 getPos()->dump(); 127 }); 128 129 Changed = true; 130 MBB->splice(getPos(), MBB, II.second); 131 } 132 133 return Changed; 134 } 135 136 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, 137 MachineBasicBlock *MBB) { 138 139 bool Changed = false; 140 141 // Calculates the distance of MI from the beginning of its parent BB. 142 auto getInstrIdx = [](const MachineInstr &MI) { 143 unsigned i = 0; 144 for (auto &CurMI : *MI.getParent()) { 145 if (&CurMI == &MI) 146 return i; 147 i++; 148 } 149 return ~0U; 150 }; 151 152 // Pre-Populate vector of instructions to reschedule so that we don't 153 // clobber the iterator. 154 std::vector<MachineInstr *> Instructions; 155 for (auto &MI : *MBB) { 156 Instructions.push_back(&MI); 157 } 158 159 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; 160 std::map<unsigned, MachineInstr *> MultiUserLookup; 161 unsigned UseToBringDefCloserToCount = 0; 162 std::vector<MachineInstr *> PseudoIdempotentInstructions; 163 std::vector<unsigned> PhysRegDefs; 164 for (auto *II : Instructions) { 165 for (unsigned i = 1; i < II->getNumOperands(); i++) { 166 MachineOperand &MO = II->getOperand(i); 167 if (!MO.isReg()) 168 continue; 169 170 if (Register::isVirtualRegister(MO.getReg())) 171 continue; 172 173 if (!MO.isDef()) 174 continue; 175 176 PhysRegDefs.push_back(MO.getReg()); 177 } 178 } 179 180 for (auto *II : Instructions) { 181 if (II->getNumOperands() == 0) 182 continue; 183 if (II->mayLoadOrStore()) 184 continue; 185 186 MachineOperand &MO = II->getOperand(0); 187 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) 188 continue; 189 if (!MO.isDef()) 190 continue; 191 192 bool IsPseudoIdempotent = true; 193 for (unsigned i = 1; i < II->getNumOperands(); i++) { 194 195 if (II->getOperand(i).isImm()) { 196 continue; 197 } 198 199 if (II->getOperand(i).isReg()) { 200 if (!Register::isVirtualRegister(II->getOperand(i).getReg())) 201 if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == 202 PhysRegDefs.end()) { 203 continue; 204 } 205 } 206 207 IsPseudoIdempotent = false; 208 break; 209 } 210 211 if (IsPseudoIdempotent) { 212 PseudoIdempotentInstructions.push_back(II); 213 continue; 214 } 215 216 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); 217 218 MachineInstr *Def = II; 219 unsigned Distance = ~0U; 220 MachineInstr *UseToBringDefCloserTo = nullptr; 221 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); 222 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { 223 MachineInstr *UseInst = UO.getParent(); 224 225 const unsigned DefLoc = getInstrIdx(*Def); 226 const unsigned UseLoc = getInstrIdx(*UseInst); 227 const unsigned Delta = (UseLoc - DefLoc); 228 229 if (UseInst->getParent() != Def->getParent()) 230 continue; 231 if (DefLoc >= UseLoc) 232 continue; 233 234 if (Delta < Distance) { 235 Distance = Delta; 236 UseToBringDefCloserTo = UseInst; 237 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; 238 } 239 } 240 241 const auto BBE = MBB->instr_end(); 242 MachineBasicBlock::iterator DefI = BBE; 243 MachineBasicBlock::iterator UseI = BBE; 244 245 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { 246 247 if (DefI != BBE && UseI != BBE) 248 break; 249 250 if (&*BBI == Def) { 251 DefI = BBI; 252 continue; 253 } 254 255 if (&*BBI == UseToBringDefCloserTo) { 256 UseI = BBI; 257 continue; 258 } 259 } 260 261 if (DefI == BBE || UseI == BBE) 262 continue; 263 264 LLVM_DEBUG({ 265 dbgs() << "Splicing "; 266 DefI->dump(); 267 dbgs() << " right before: "; 268 UseI->dump(); 269 }); 270 271 MultiUsers[UseToBringDefCloserTo].push_back(Def); 272 Changed = true; 273 MBB->splice(UseI, MBB, DefI); 274 } 275 276 // Sort the defs for users of multiple defs lexographically. 277 for (const auto &E : MultiUserLookup) { 278 279 auto UseI = 280 std::find_if(MBB->instr_begin(), MBB->instr_end(), 281 [&](MachineInstr &MI) -> bool { return &MI == E.second; }); 282 283 if (UseI == MBB->instr_end()) 284 continue; 285 286 LLVM_DEBUG( 287 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); 288 Changed |= rescheduleLexographically( 289 MultiUsers[E.second], MBB, 290 [&]() -> MachineBasicBlock::iterator { return UseI; }); 291 } 292 293 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); 294 LLVM_DEBUG( 295 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); 296 Changed |= rescheduleLexographically( 297 PseudoIdempotentInstructions, MBB, 298 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); 299 300 return Changed; 301 } 302 303 static bool propagateLocalCopies(MachineBasicBlock *MBB) { 304 bool Changed = false; 305 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 306 307 std::vector<MachineInstr *> Copies; 308 for (MachineInstr &MI : MBB->instrs()) { 309 if (MI.isCopy()) 310 Copies.push_back(&MI); 311 } 312 313 for (MachineInstr *MI : Copies) { 314 315 if (!MI->getOperand(0).isReg()) 316 continue; 317 if (!MI->getOperand(1).isReg()) 318 continue; 319 320 const Register Dst = MI->getOperand(0).getReg(); 321 const Register Src = MI->getOperand(1).getReg(); 322 323 if (!Register::isVirtualRegister(Dst)) 324 continue; 325 if (!Register::isVirtualRegister(Src)) 326 continue; 327 // Not folding COPY instructions if regbankselect has not set the RCs. 328 // Why are we only considering Register Classes? Because the verifier 329 // sometimes gets upset if the register classes don't match even if the 330 // types do. A future patch might add COPY folding for matching types in 331 // pre-registerbankselect code. 332 if (!MRI.getRegClassOrNull(Dst)) 333 continue; 334 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) 335 continue; 336 337 std::vector<MachineOperand *> Uses; 338 for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) 339 Uses.push_back(&*UI); 340 for (auto *MO : Uses) 341 MO->setReg(Src); 342 343 Changed = true; 344 MI->eraseFromParent(); 345 } 346 347 return Changed; 348 } 349 350 static bool doDefKillClear(MachineBasicBlock *MBB) { 351 bool Changed = false; 352 353 for (auto &MI : *MBB) { 354 for (auto &MO : MI.operands()) { 355 if (!MO.isReg()) 356 continue; 357 if (!MO.isDef() && MO.isKill()) { 358 Changed = true; 359 MO.setIsKill(false); 360 } 361 362 if (MO.isDef() && MO.isDead()) { 363 Changed = true; 364 MO.setIsDead(false); 365 } 366 } 367 } 368 369 return Changed; 370 } 371 372 static bool runOnBasicBlock(MachineBasicBlock *MBB, 373 unsigned BasicBlockNum, VRegRenamer &Renamer) { 374 LLVM_DEBUG({ 375 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; 376 dbgs() << "\n\n================================================\n\n"; 377 }); 378 379 bool Changed = false; 380 381 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); 382 383 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; 384 MBB->dump();); 385 Changed |= propagateLocalCopies(MBB); 386 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); 387 388 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); 389 unsigned IdempotentInstCount = 0; 390 Changed |= rescheduleCanonically(IdempotentInstCount, MBB); 391 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); 392 393 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum); 394 395 // TODO: Consider dropping this. Dropping kill defs is probably not 396 // semantically sound. 397 Changed |= doDefKillClear(MBB); 398 399 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); 400 dbgs() << "\n";); 401 LLVM_DEBUG( 402 dbgs() << "\n\n================================================\n\n"); 403 return Changed; 404 } 405 406 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { 407 408 static unsigned functionNum = 0; 409 if (CanonicalizeFunctionNumber != ~0U) { 410 if (CanonicalizeFunctionNumber != functionNum++) 411 return false; 412 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() 413 << "\n";); 414 } 415 416 // we need a valid vreg to create a vreg type for skipping all those 417 // stray vreg numbers so reach alignment/canonical vreg values. 418 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); 419 420 LLVM_DEBUG( 421 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; 422 dbgs() << "\n\n================================================\n\n"; 423 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; 424 for (auto MBB 425 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() 426 << "\n\n================================================\n\n";); 427 428 unsigned BBNum = 0; 429 bool Changed = false; 430 MachineRegisterInfo &MRI = MF.getRegInfo(); 431 VRegRenamer Renamer(MRI); 432 for (auto MBB : RPOList) 433 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer); 434 435 return Changed; 436 } 437