xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp (revision 38a52bd3b5cac3da6f7f6eef3dd050e6aa08ebb3)
1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 #include <queue>
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "mir-canonicalizer"
42 
43 static cl::opt<unsigned>
44     CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
45                                cl::value_desc("N"),
46                                cl::desc("Function number to canonicalize."));
47 
48 namespace {
49 
50 class MIRCanonicalizer : public MachineFunctionPass {
51 public:
52   static char ID;
53   MIRCanonicalizer() : MachineFunctionPass(ID) {}
54 
55   StringRef getPassName() const override {
56     return "Rename register operands in a canonical ordering.";
57   }
58 
59   void getAnalysisUsage(AnalysisUsage &AU) const override {
60     AU.setPreservesCFG();
61     MachineFunctionPass::getAnalysisUsage(AU);
62   }
63 
64   bool runOnMachineFunction(MachineFunction &MF) override;
65 };
66 
67 } // end anonymous namespace
68 
69 char MIRCanonicalizer::ID;
70 
71 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
72 
73 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
74                       "Rename Register Operands Canonically", false, false)
75 
76 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
77                     "Rename Register Operands Canonically", false, false)
78 
79 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
80   if (MF.empty())
81     return {};
82   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
83   std::vector<MachineBasicBlock *> RPOList;
84   append_range(RPOList, RPOT);
85 
86   return RPOList;
87 }
88 
89 static bool
90 rescheduleLexographically(std::vector<MachineInstr *> instructions,
91                           MachineBasicBlock *MBB,
92                           std::function<MachineBasicBlock::iterator()> getPos) {
93 
94   bool Changed = false;
95   using StringInstrPair = std::pair<std::string, MachineInstr *>;
96   std::vector<StringInstrPair> StringInstrMap;
97 
98   for (auto *II : instructions) {
99     std::string S;
100     raw_string_ostream OS(S);
101     II->print(OS);
102     OS.flush();
103 
104     // Trim the assignment, or start from the beginning in the case of a store.
105     const size_t i = S.find('=');
106     StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
107   }
108 
109   llvm::sort(StringInstrMap,
110              [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
111                return (a.first < b.first);
112              });
113 
114   for (auto &II : StringInstrMap) {
115 
116     LLVM_DEBUG({
117       dbgs() << "Splicing ";
118       II.second->dump();
119       dbgs() << " right before: ";
120       getPos()->dump();
121     });
122 
123     Changed = true;
124     MBB->splice(getPos(), MBB, II.second);
125   }
126 
127   return Changed;
128 }
129 
130 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
131                                   MachineBasicBlock *MBB) {
132 
133   bool Changed = false;
134 
135   // Calculates the distance of MI from the beginning of its parent BB.
136   auto getInstrIdx = [](const MachineInstr &MI) {
137     unsigned i = 0;
138     for (auto &CurMI : *MI.getParent()) {
139       if (&CurMI == &MI)
140         return i;
141       i++;
142     }
143     return ~0U;
144   };
145 
146   // Pre-Populate vector of instructions to reschedule so that we don't
147   // clobber the iterator.
148   std::vector<MachineInstr *> Instructions;
149   for (auto &MI : *MBB) {
150     Instructions.push_back(&MI);
151   }
152 
153   std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
154   std::map<unsigned, MachineInstr *> MultiUserLookup;
155   unsigned UseToBringDefCloserToCount = 0;
156   std::vector<MachineInstr *> PseudoIdempotentInstructions;
157   std::vector<unsigned> PhysRegDefs;
158   for (auto *II : Instructions) {
159     for (unsigned i = 1; i < II->getNumOperands(); i++) {
160       MachineOperand &MO = II->getOperand(i);
161       if (!MO.isReg())
162         continue;
163 
164       if (Register::isVirtualRegister(MO.getReg()))
165         continue;
166 
167       if (!MO.isDef())
168         continue;
169 
170       PhysRegDefs.push_back(MO.getReg());
171     }
172   }
173 
174   for (auto *II : Instructions) {
175     if (II->getNumOperands() == 0)
176       continue;
177     if (II->mayLoadOrStore())
178       continue;
179 
180     MachineOperand &MO = II->getOperand(0);
181     if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
182       continue;
183     if (!MO.isDef())
184       continue;
185 
186     bool IsPseudoIdempotent = true;
187     for (unsigned i = 1; i < II->getNumOperands(); i++) {
188 
189       if (II->getOperand(i).isImm()) {
190         continue;
191       }
192 
193       if (II->getOperand(i).isReg()) {
194         if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
195           if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
196             continue;
197           }
198       }
199 
200       IsPseudoIdempotent = false;
201       break;
202     }
203 
204     if (IsPseudoIdempotent) {
205       PseudoIdempotentInstructions.push_back(II);
206       continue;
207     }
208 
209     LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
210 
211     MachineInstr *Def = II;
212     unsigned Distance = ~0U;
213     MachineInstr *UseToBringDefCloserTo = nullptr;
214     MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
215     for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
216       MachineInstr *UseInst = UO.getParent();
217 
218       const unsigned DefLoc = getInstrIdx(*Def);
219       const unsigned UseLoc = getInstrIdx(*UseInst);
220       const unsigned Delta = (UseLoc - DefLoc);
221 
222       if (UseInst->getParent() != Def->getParent())
223         continue;
224       if (DefLoc >= UseLoc)
225         continue;
226 
227       if (Delta < Distance) {
228         Distance = Delta;
229         UseToBringDefCloserTo = UseInst;
230         MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
231       }
232     }
233 
234     const auto BBE = MBB->instr_end();
235     MachineBasicBlock::iterator DefI = BBE;
236     MachineBasicBlock::iterator UseI = BBE;
237 
238     for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
239 
240       if (DefI != BBE && UseI != BBE)
241         break;
242 
243       if (&*BBI == Def) {
244         DefI = BBI;
245         continue;
246       }
247 
248       if (&*BBI == UseToBringDefCloserTo) {
249         UseI = BBI;
250         continue;
251       }
252     }
253 
254     if (DefI == BBE || UseI == BBE)
255       continue;
256 
257     LLVM_DEBUG({
258       dbgs() << "Splicing ";
259       DefI->dump();
260       dbgs() << " right before: ";
261       UseI->dump();
262     });
263 
264     MultiUsers[UseToBringDefCloserTo].push_back(Def);
265     Changed = true;
266     MBB->splice(UseI, MBB, DefI);
267   }
268 
269   // Sort the defs for users of multiple defs lexographically.
270   for (const auto &E : MultiUserLookup) {
271 
272     auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
273       return &MI == E.second;
274     });
275 
276     if (UseI == MBB->instr_end())
277       continue;
278 
279     LLVM_DEBUG(
280         dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
281     Changed |= rescheduleLexographically(
282         MultiUsers[E.second], MBB,
283         [&]() -> MachineBasicBlock::iterator { return UseI; });
284   }
285 
286   PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
287   LLVM_DEBUG(
288       dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
289   Changed |= rescheduleLexographically(
290       PseudoIdempotentInstructions, MBB,
291       [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
292 
293   return Changed;
294 }
295 
296 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
297   bool Changed = false;
298   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
299 
300   std::vector<MachineInstr *> Copies;
301   for (MachineInstr &MI : MBB->instrs()) {
302     if (MI.isCopy())
303       Copies.push_back(&MI);
304   }
305 
306   for (MachineInstr *MI : Copies) {
307 
308     if (!MI->getOperand(0).isReg())
309       continue;
310     if (!MI->getOperand(1).isReg())
311       continue;
312 
313     const Register Dst = MI->getOperand(0).getReg();
314     const Register Src = MI->getOperand(1).getReg();
315 
316     if (!Register::isVirtualRegister(Dst))
317       continue;
318     if (!Register::isVirtualRegister(Src))
319       continue;
320     // Not folding COPY instructions if regbankselect has not set the RCs.
321     // Why are we only considering Register Classes? Because the verifier
322     // sometimes gets upset if the register classes don't match even if the
323     // types do. A future patch might add COPY folding for matching types in
324     // pre-registerbankselect code.
325     if (!MRI.getRegClassOrNull(Dst))
326       continue;
327     if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
328       continue;
329 
330     std::vector<MachineOperand *> Uses;
331     for (MachineOperand &MO : MRI.use_operands(Dst))
332       Uses.push_back(&MO);
333     for (auto *MO : Uses)
334       MO->setReg(Src);
335 
336     Changed = true;
337     MI->eraseFromParent();
338   }
339 
340   return Changed;
341 }
342 
343 static bool doDefKillClear(MachineBasicBlock *MBB) {
344   bool Changed = false;
345 
346   for (auto &MI : *MBB) {
347     for (auto &MO : MI.operands()) {
348       if (!MO.isReg())
349         continue;
350       if (!MO.isDef() && MO.isKill()) {
351         Changed = true;
352         MO.setIsKill(false);
353       }
354 
355       if (MO.isDef() && MO.isDead()) {
356         Changed = true;
357         MO.setIsDead(false);
358       }
359     }
360   }
361 
362   return Changed;
363 }
364 
365 static bool runOnBasicBlock(MachineBasicBlock *MBB,
366                             unsigned BasicBlockNum, VRegRenamer &Renamer) {
367   LLVM_DEBUG({
368     dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
369     dbgs() << "\n\n================================================\n\n";
370   });
371 
372   bool Changed = false;
373 
374   LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
375 
376   LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
377              MBB->dump(););
378   Changed |= propagateLocalCopies(MBB);
379   LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
380 
381   LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
382   unsigned IdempotentInstCount = 0;
383   Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
384   LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
385 
386   Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
387 
388   // TODO: Consider dropping this. Dropping kill defs is probably not
389   // semantically sound.
390   Changed |= doDefKillClear(MBB);
391 
392   LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
393              dbgs() << "\n";);
394   LLVM_DEBUG(
395       dbgs() << "\n\n================================================\n\n");
396   return Changed;
397 }
398 
399 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
400 
401   static unsigned functionNum = 0;
402   if (CanonicalizeFunctionNumber != ~0U) {
403     if (CanonicalizeFunctionNumber != functionNum++)
404       return false;
405     LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
406                       << "\n";);
407   }
408 
409   // we need a valid vreg to create a vreg type for skipping all those
410   // stray vreg numbers so reach alignment/canonical vreg values.
411   std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
412 
413   LLVM_DEBUG(
414       dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
415       dbgs() << "\n\n================================================\n\n";
416       dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
417       for (auto MBB
418            : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
419       << "\n\n================================================\n\n";);
420 
421   unsigned BBNum = 0;
422   bool Changed = false;
423   MachineRegisterInfo &MRI = MF.getRegInfo();
424   VRegRenamer Renamer(MRI);
425   for (auto MBB : RPOList)
426     Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
427 
428   return Changed;
429 }
430