xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp (revision 85868e8a1daeaae7a0e48effb2ea2310ae3b02c6)
1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 #include <queue>
37 
38 using namespace llvm;
39 
40 namespace llvm {
41 extern char &MIRCanonicalizerID;
42 } // namespace llvm
43 
44 #define DEBUG_TYPE "mir-canonicalizer"
45 
46 static cl::opt<unsigned>
47     CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
48                                cl::value_desc("N"),
49                                cl::desc("Function number to canonicalize."));
50 
51 static cl::opt<unsigned> CanonicalizeBasicBlockNumber(
52     "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
53     cl::desc("BasicBlock number to canonicalize."));
54 
55 namespace {
56 
57 class MIRCanonicalizer : public MachineFunctionPass {
58 public:
59   static char ID;
60   MIRCanonicalizer() : MachineFunctionPass(ID) {}
61 
62   StringRef getPassName() const override {
63     return "Rename register operands in a canonical ordering.";
64   }
65 
66   void getAnalysisUsage(AnalysisUsage &AU) const override {
67     AU.setPreservesCFG();
68     MachineFunctionPass::getAnalysisUsage(AU);
69   }
70 
71   bool runOnMachineFunction(MachineFunction &MF) override;
72 };
73 
74 } // end anonymous namespace
75 
76 char MIRCanonicalizer::ID;
77 
78 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
79 
80 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
81                       "Rename Register Operands Canonically", false, false)
82 
83 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
84                     "Rename Register Operands Canonically", false, false)
85 
86 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
87   if (MF.empty())
88     return {};
89   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
90   std::vector<MachineBasicBlock *> RPOList;
91   for (auto MBB : RPOT) {
92     RPOList.push_back(MBB);
93   }
94 
95   return RPOList;
96 }
97 
98 static bool
99 rescheduleLexographically(std::vector<MachineInstr *> instructions,
100                           MachineBasicBlock *MBB,
101                           std::function<MachineBasicBlock::iterator()> getPos) {
102 
103   bool Changed = false;
104   using StringInstrPair = std::pair<std::string, MachineInstr *>;
105   std::vector<StringInstrPair> StringInstrMap;
106 
107   for (auto *II : instructions) {
108     std::string S;
109     raw_string_ostream OS(S);
110     II->print(OS);
111     OS.flush();
112 
113     // Trim the assignment, or start from the begining in the case of a store.
114     const size_t i = S.find("=");
115     StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
116   }
117 
118   llvm::sort(StringInstrMap,
119              [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
120                return (a.first < b.first);
121              });
122 
123   for (auto &II : StringInstrMap) {
124 
125     LLVM_DEBUG({
126       dbgs() << "Splicing ";
127       II.second->dump();
128       dbgs() << " right before: ";
129       getPos()->dump();
130     });
131 
132     Changed = true;
133     MBB->splice(getPos(), MBB, II.second);
134   }
135 
136   return Changed;
137 }
138 
139 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
140                                   MachineBasicBlock *MBB) {
141 
142   bool Changed = false;
143 
144   // Calculates the distance of MI from the begining of its parent BB.
145   auto getInstrIdx = [](const MachineInstr &MI) {
146     unsigned i = 0;
147     for (auto &CurMI : *MI.getParent()) {
148       if (&CurMI == &MI)
149         return i;
150       i++;
151     }
152     return ~0U;
153   };
154 
155   // Pre-Populate vector of instructions to reschedule so that we don't
156   // clobber the iterator.
157   std::vector<MachineInstr *> Instructions;
158   for (auto &MI : *MBB) {
159     Instructions.push_back(&MI);
160   }
161 
162   std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
163   std::map<unsigned, MachineInstr *> MultiUserLookup;
164   unsigned UseToBringDefCloserToCount = 0;
165   std::vector<MachineInstr *> PseudoIdempotentInstructions;
166   std::vector<unsigned> PhysRegDefs;
167   for (auto *II : Instructions) {
168     for (unsigned i = 1; i < II->getNumOperands(); i++) {
169       MachineOperand &MO = II->getOperand(i);
170       if (!MO.isReg())
171         continue;
172 
173       if (Register::isVirtualRegister(MO.getReg()))
174         continue;
175 
176       if (!MO.isDef())
177         continue;
178 
179       PhysRegDefs.push_back(MO.getReg());
180     }
181   }
182 
183   for (auto *II : Instructions) {
184     if (II->getNumOperands() == 0)
185       continue;
186     if (II->mayLoadOrStore())
187       continue;
188 
189     MachineOperand &MO = II->getOperand(0);
190     if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
191       continue;
192     if (!MO.isDef())
193       continue;
194 
195     bool IsPseudoIdempotent = true;
196     for (unsigned i = 1; i < II->getNumOperands(); i++) {
197 
198       if (II->getOperand(i).isImm()) {
199         continue;
200       }
201 
202       if (II->getOperand(i).isReg()) {
203         if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
204           if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
205               PhysRegDefs.end()) {
206             continue;
207           }
208       }
209 
210       IsPseudoIdempotent = false;
211       break;
212     }
213 
214     if (IsPseudoIdempotent) {
215       PseudoIdempotentInstructions.push_back(II);
216       continue;
217     }
218 
219     LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
220 
221     MachineInstr *Def = II;
222     unsigned Distance = ~0U;
223     MachineInstr *UseToBringDefCloserTo = nullptr;
224     MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
225     for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
226       MachineInstr *UseInst = UO.getParent();
227 
228       const unsigned DefLoc = getInstrIdx(*Def);
229       const unsigned UseLoc = getInstrIdx(*UseInst);
230       const unsigned Delta = (UseLoc - DefLoc);
231 
232       if (UseInst->getParent() != Def->getParent())
233         continue;
234       if (DefLoc >= UseLoc)
235         continue;
236 
237       if (Delta < Distance) {
238         Distance = Delta;
239         UseToBringDefCloserTo = UseInst;
240         MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
241       }
242     }
243 
244     const auto BBE = MBB->instr_end();
245     MachineBasicBlock::iterator DefI = BBE;
246     MachineBasicBlock::iterator UseI = BBE;
247 
248     for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
249 
250       if (DefI != BBE && UseI != BBE)
251         break;
252 
253       if (&*BBI == Def) {
254         DefI = BBI;
255         continue;
256       }
257 
258       if (&*BBI == UseToBringDefCloserTo) {
259         UseI = BBI;
260         continue;
261       }
262     }
263 
264     if (DefI == BBE || UseI == BBE)
265       continue;
266 
267     LLVM_DEBUG({
268       dbgs() << "Splicing ";
269       DefI->dump();
270       dbgs() << " right before: ";
271       UseI->dump();
272     });
273 
274     MultiUsers[UseToBringDefCloserTo].push_back(Def);
275     Changed = true;
276     MBB->splice(UseI, MBB, DefI);
277   }
278 
279   // Sort the defs for users of multiple defs lexographically.
280   for (const auto &E : MultiUserLookup) {
281 
282     auto UseI =
283         std::find_if(MBB->instr_begin(), MBB->instr_end(),
284                      [&](MachineInstr &MI) -> bool { return &MI == E.second; });
285 
286     if (UseI == MBB->instr_end())
287       continue;
288 
289     LLVM_DEBUG(
290         dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
291     Changed |= rescheduleLexographically(
292         MultiUsers[E.second], MBB,
293         [&]() -> MachineBasicBlock::iterator { return UseI; });
294   }
295 
296   PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
297   LLVM_DEBUG(
298       dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
299   Changed |= rescheduleLexographically(
300       PseudoIdempotentInstructions, MBB,
301       [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
302 
303   return Changed;
304 }
305 
306 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
307   bool Changed = false;
308   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
309 
310   std::vector<MachineInstr *> Copies;
311   for (MachineInstr &MI : MBB->instrs()) {
312     if (MI.isCopy())
313       Copies.push_back(&MI);
314   }
315 
316   for (MachineInstr *MI : Copies) {
317 
318     if (!MI->getOperand(0).isReg())
319       continue;
320     if (!MI->getOperand(1).isReg())
321       continue;
322 
323     const Register Dst = MI->getOperand(0).getReg();
324     const Register Src = MI->getOperand(1).getReg();
325 
326     if (!Register::isVirtualRegister(Dst))
327       continue;
328     if (!Register::isVirtualRegister(Src))
329       continue;
330     // Not folding COPY instructions if regbankselect has not set the RCs.
331     // Why are we only considering Register Classes? Because the verifier
332     // sometimes gets upset if the register classes don't match even if the
333     // types do. A future patch might add COPY folding for matching types in
334     // pre-registerbankselect code.
335     if (!MRI.getRegClassOrNull(Dst))
336       continue;
337     if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
338       continue;
339 
340     std::vector<MachineOperand *> Uses;
341     for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
342       Uses.push_back(&*UI);
343     for (auto *MO : Uses)
344       MO->setReg(Src);
345 
346     Changed = true;
347     MI->eraseFromParent();
348   }
349 
350   return Changed;
351 }
352 
353 static bool doDefKillClear(MachineBasicBlock *MBB) {
354   bool Changed = false;
355 
356   for (auto &MI : *MBB) {
357     for (auto &MO : MI.operands()) {
358       if (!MO.isReg())
359         continue;
360       if (!MO.isDef() && MO.isKill()) {
361         Changed = true;
362         MO.setIsKill(false);
363       }
364 
365       if (MO.isDef() && MO.isDead()) {
366         Changed = true;
367         MO.setIsDead(false);
368       }
369     }
370   }
371 
372   return Changed;
373 }
374 
375 static bool runOnBasicBlock(MachineBasicBlock *MBB,
376                             std::vector<StringRef> &bbNames,
377                             unsigned &basicBlockNum, NamedVRegCursor &NVC) {
378 
379   if (CanonicalizeBasicBlockNumber != ~0U) {
380     if (CanonicalizeBasicBlockNumber != basicBlockNum++)
381       return false;
382     LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
383                       << "\n";);
384   }
385 
386   if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
387     LLVM_DEBUG({
388       dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
389              << "\n";
390     });
391     return false;
392   }
393 
394   LLVM_DEBUG({
395     dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
396     dbgs() << "\n\n================================================\n\n";
397   });
398 
399   bool Changed = false;
400   MachineFunction &MF = *MBB->getParent();
401   MachineRegisterInfo &MRI = MF.getRegInfo();
402 
403   bbNames.push_back(MBB->getName());
404   LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
405 
406   LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
407              MBB->dump(););
408   Changed |= propagateLocalCopies(MBB);
409   LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
410 
411   LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
412   unsigned IdempotentInstCount = 0;
413   Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
414   LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
415 
416   Changed |= NVC.renameVRegs(MBB);
417 
418   // Here we renumber the def vregs for the idempotent instructions from the top
419   // of the MachineBasicBlock so that they are named in the order that we sorted
420   // them alphabetically. Eventually we wont need SkipVRegs because we will use
421   // named vregs instead.
422   if (IdempotentInstCount)
423     NVC.skipVRegs();
424 
425   auto MII = MBB->begin();
426   for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
427     MachineInstr &MI = *MII++;
428     Changed = true;
429     Register vRegToRename = MI.getOperand(0).getReg();
430     auto Rename = NVC.createVirtualRegister(vRegToRename);
431 
432     std::vector<MachineOperand *> RenameMOs;
433     for (auto &MO : MRI.reg_operands(vRegToRename)) {
434       RenameMOs.push_back(&MO);
435     }
436 
437     for (auto *MO : RenameMOs) {
438       MO->setReg(Rename);
439     }
440   }
441 
442   Changed |= doDefKillClear(MBB);
443 
444   LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
445              dbgs() << "\n";);
446   LLVM_DEBUG(
447       dbgs() << "\n\n================================================\n\n");
448   return Changed;
449 }
450 
451 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
452 
453   static unsigned functionNum = 0;
454   if (CanonicalizeFunctionNumber != ~0U) {
455     if (CanonicalizeFunctionNumber != functionNum++)
456       return false;
457     LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
458                       << "\n";);
459   }
460 
461   // we need a valid vreg to create a vreg type for skipping all those
462   // stray vreg numbers so reach alignment/canonical vreg values.
463   std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
464 
465   LLVM_DEBUG(
466       dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
467       dbgs() << "\n\n================================================\n\n";
468       dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
469       for (auto MBB
470            : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
471       << "\n\n================================================\n\n";);
472 
473   std::vector<StringRef> BBNames;
474 
475   unsigned BBNum = 0;
476 
477   bool Changed = false;
478 
479   MachineRegisterInfo &MRI = MF.getRegInfo();
480   NamedVRegCursor NVC(MRI);
481   for (auto MBB : RPOList)
482     Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
483 
484   return Changed;
485 }
486