1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The purpose of this pass is to employ a canonical code transformation so 10 // that code compiled with slightly different IR passes can be diffed more 11 // effectively than otherwise. This is done by renaming vregs in a given 12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to 13 // move defs closer to their use inorder to reduce diffs caused by slightly 14 // different schedules. 15 // 16 // Basic Usage: 17 // 18 // llc -o - -run-pass mir-canonicalizer example.mir 19 // 20 // Reorders instructions canonically. 21 // Renames virtual register operands canonically. 22 // Strips certain MIR artifacts (optionally). 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "MIRVRegNamerUtils.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/Passes.h" 33 #include "llvm/InitializePasses.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 #include <queue> 38 39 using namespace llvm; 40 41 namespace llvm { 42 extern char &MIRCanonicalizerID; 43 } // namespace llvm 44 45 #define DEBUG_TYPE "mir-canonicalizer" 46 47 static cl::opt<unsigned> 48 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), 49 cl::value_desc("N"), 50 cl::desc("Function number to canonicalize.")); 51 52 namespace { 53 54 class MIRCanonicalizer : public MachineFunctionPass { 55 public: 56 static char ID; 57 MIRCanonicalizer() : MachineFunctionPass(ID) {} 58 59 StringRef getPassName() const override { 60 return "Rename register operands in a canonical ordering."; 61 } 62 63 void getAnalysisUsage(AnalysisUsage &AU) const override { 64 AU.setPreservesCFG(); 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67 68 bool runOnMachineFunction(MachineFunction &MF) override; 69 }; 70 71 } // end anonymous namespace 72 73 char MIRCanonicalizer::ID; 74 75 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; 76 77 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", 78 "Rename Register Operands Canonically", false, false) 79 80 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", 81 "Rename Register Operands Canonically", false, false) 82 83 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { 84 if (MF.empty()) 85 return {}; 86 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); 87 std::vector<MachineBasicBlock *> RPOList; 88 append_range(RPOList, RPOT); 89 90 return RPOList; 91 } 92 93 static bool 94 rescheduleLexographically(std::vector<MachineInstr *> instructions, 95 MachineBasicBlock *MBB, 96 std::function<MachineBasicBlock::iterator()> getPos) { 97 98 bool Changed = false; 99 using StringInstrPair = std::pair<std::string, MachineInstr *>; 100 std::vector<StringInstrPair> StringInstrMap; 101 102 for (auto *II : instructions) { 103 std::string S; 104 raw_string_ostream OS(S); 105 II->print(OS); 106 OS.flush(); 107 108 // Trim the assignment, or start from the beginning in the case of a store. 109 const size_t i = S.find('='); 110 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); 111 } 112 113 llvm::sort(StringInstrMap, 114 [](const StringInstrPair &a, const StringInstrPair &b) -> bool { 115 return (a.first < b.first); 116 }); 117 118 for (auto &II : StringInstrMap) { 119 120 LLVM_DEBUG({ 121 dbgs() << "Splicing "; 122 II.second->dump(); 123 dbgs() << " right before: "; 124 getPos()->dump(); 125 }); 126 127 Changed = true; 128 MBB->splice(getPos(), MBB, II.second); 129 } 130 131 return Changed; 132 } 133 134 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, 135 MachineBasicBlock *MBB) { 136 137 bool Changed = false; 138 139 // Calculates the distance of MI from the beginning of its parent BB. 140 auto getInstrIdx = [](const MachineInstr &MI) { 141 unsigned i = 0; 142 for (auto &CurMI : *MI.getParent()) { 143 if (&CurMI == &MI) 144 return i; 145 i++; 146 } 147 return ~0U; 148 }; 149 150 // Pre-Populate vector of instructions to reschedule so that we don't 151 // clobber the iterator. 152 std::vector<MachineInstr *> Instructions; 153 for (auto &MI : *MBB) { 154 Instructions.push_back(&MI); 155 } 156 157 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; 158 std::map<unsigned, MachineInstr *> MultiUserLookup; 159 unsigned UseToBringDefCloserToCount = 0; 160 std::vector<MachineInstr *> PseudoIdempotentInstructions; 161 std::vector<unsigned> PhysRegDefs; 162 for (auto *II : Instructions) { 163 for (unsigned i = 1; i < II->getNumOperands(); i++) { 164 MachineOperand &MO = II->getOperand(i); 165 if (!MO.isReg()) 166 continue; 167 168 if (Register::isVirtualRegister(MO.getReg())) 169 continue; 170 171 if (!MO.isDef()) 172 continue; 173 174 PhysRegDefs.push_back(MO.getReg()); 175 } 176 } 177 178 for (auto *II : Instructions) { 179 if (II->getNumOperands() == 0) 180 continue; 181 if (II->mayLoadOrStore()) 182 continue; 183 184 MachineOperand &MO = II->getOperand(0); 185 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) 186 continue; 187 if (!MO.isDef()) 188 continue; 189 190 bool IsPseudoIdempotent = true; 191 for (unsigned i = 1; i < II->getNumOperands(); i++) { 192 193 if (II->getOperand(i).isImm()) { 194 continue; 195 } 196 197 if (II->getOperand(i).isReg()) { 198 if (!Register::isVirtualRegister(II->getOperand(i).getReg())) 199 if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) { 200 continue; 201 } 202 } 203 204 IsPseudoIdempotent = false; 205 break; 206 } 207 208 if (IsPseudoIdempotent) { 209 PseudoIdempotentInstructions.push_back(II); 210 continue; 211 } 212 213 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); 214 215 MachineInstr *Def = II; 216 unsigned Distance = ~0U; 217 MachineInstr *UseToBringDefCloserTo = nullptr; 218 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); 219 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { 220 MachineInstr *UseInst = UO.getParent(); 221 222 const unsigned DefLoc = getInstrIdx(*Def); 223 const unsigned UseLoc = getInstrIdx(*UseInst); 224 const unsigned Delta = (UseLoc - DefLoc); 225 226 if (UseInst->getParent() != Def->getParent()) 227 continue; 228 if (DefLoc >= UseLoc) 229 continue; 230 231 if (Delta < Distance) { 232 Distance = Delta; 233 UseToBringDefCloserTo = UseInst; 234 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; 235 } 236 } 237 238 const auto BBE = MBB->instr_end(); 239 MachineBasicBlock::iterator DefI = BBE; 240 MachineBasicBlock::iterator UseI = BBE; 241 242 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { 243 244 if (DefI != BBE && UseI != BBE) 245 break; 246 247 if (&*BBI == Def) { 248 DefI = BBI; 249 continue; 250 } 251 252 if (&*BBI == UseToBringDefCloserTo) { 253 UseI = BBI; 254 continue; 255 } 256 } 257 258 if (DefI == BBE || UseI == BBE) 259 continue; 260 261 LLVM_DEBUG({ 262 dbgs() << "Splicing "; 263 DefI->dump(); 264 dbgs() << " right before: "; 265 UseI->dump(); 266 }); 267 268 MultiUsers[UseToBringDefCloserTo].push_back(Def); 269 Changed = true; 270 MBB->splice(UseI, MBB, DefI); 271 } 272 273 // Sort the defs for users of multiple defs lexographically. 274 for (const auto &E : MultiUserLookup) { 275 276 auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool { 277 return &MI == E.second; 278 }); 279 280 if (UseI == MBB->instr_end()) 281 continue; 282 283 LLVM_DEBUG( 284 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); 285 Changed |= rescheduleLexographically( 286 MultiUsers[E.second], MBB, 287 [&]() -> MachineBasicBlock::iterator { return UseI; }); 288 } 289 290 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); 291 LLVM_DEBUG( 292 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); 293 Changed |= rescheduleLexographically( 294 PseudoIdempotentInstructions, MBB, 295 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); 296 297 return Changed; 298 } 299 300 static bool propagateLocalCopies(MachineBasicBlock *MBB) { 301 bool Changed = false; 302 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 303 304 std::vector<MachineInstr *> Copies; 305 for (MachineInstr &MI : MBB->instrs()) { 306 if (MI.isCopy()) 307 Copies.push_back(&MI); 308 } 309 310 for (MachineInstr *MI : Copies) { 311 312 if (!MI->getOperand(0).isReg()) 313 continue; 314 if (!MI->getOperand(1).isReg()) 315 continue; 316 317 const Register Dst = MI->getOperand(0).getReg(); 318 const Register Src = MI->getOperand(1).getReg(); 319 320 if (!Register::isVirtualRegister(Dst)) 321 continue; 322 if (!Register::isVirtualRegister(Src)) 323 continue; 324 // Not folding COPY instructions if regbankselect has not set the RCs. 325 // Why are we only considering Register Classes? Because the verifier 326 // sometimes gets upset if the register classes don't match even if the 327 // types do. A future patch might add COPY folding for matching types in 328 // pre-registerbankselect code. 329 if (!MRI.getRegClassOrNull(Dst)) 330 continue; 331 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) 332 continue; 333 334 std::vector<MachineOperand *> Uses; 335 for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) 336 Uses.push_back(&*UI); 337 for (auto *MO : Uses) 338 MO->setReg(Src); 339 340 Changed = true; 341 MI->eraseFromParent(); 342 } 343 344 return Changed; 345 } 346 347 static bool doDefKillClear(MachineBasicBlock *MBB) { 348 bool Changed = false; 349 350 for (auto &MI : *MBB) { 351 for (auto &MO : MI.operands()) { 352 if (!MO.isReg()) 353 continue; 354 if (!MO.isDef() && MO.isKill()) { 355 Changed = true; 356 MO.setIsKill(false); 357 } 358 359 if (MO.isDef() && MO.isDead()) { 360 Changed = true; 361 MO.setIsDead(false); 362 } 363 } 364 } 365 366 return Changed; 367 } 368 369 static bool runOnBasicBlock(MachineBasicBlock *MBB, 370 unsigned BasicBlockNum, VRegRenamer &Renamer) { 371 LLVM_DEBUG({ 372 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; 373 dbgs() << "\n\n================================================\n\n"; 374 }); 375 376 bool Changed = false; 377 378 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); 379 380 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; 381 MBB->dump();); 382 Changed |= propagateLocalCopies(MBB); 383 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); 384 385 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); 386 unsigned IdempotentInstCount = 0; 387 Changed |= rescheduleCanonically(IdempotentInstCount, MBB); 388 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); 389 390 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum); 391 392 // TODO: Consider dropping this. Dropping kill defs is probably not 393 // semantically sound. 394 Changed |= doDefKillClear(MBB); 395 396 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); 397 dbgs() << "\n";); 398 LLVM_DEBUG( 399 dbgs() << "\n\n================================================\n\n"); 400 return Changed; 401 } 402 403 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { 404 405 static unsigned functionNum = 0; 406 if (CanonicalizeFunctionNumber != ~0U) { 407 if (CanonicalizeFunctionNumber != functionNum++) 408 return false; 409 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() 410 << "\n";); 411 } 412 413 // we need a valid vreg to create a vreg type for skipping all those 414 // stray vreg numbers so reach alignment/canonical vreg values. 415 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); 416 417 LLVM_DEBUG( 418 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; 419 dbgs() << "\n\n================================================\n\n"; 420 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; 421 for (auto MBB 422 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() 423 << "\n\n================================================\n\n";); 424 425 unsigned BBNum = 0; 426 bool Changed = false; 427 MachineRegisterInfo &MRI = MF.getRegInfo(); 428 VRegRenamer Renamer(MRI); 429 for (auto MBB : RPOList) 430 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer); 431 432 return Changed; 433 } 434