1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35
36 using namespace llvm;
37
38 #define DEBUG_TYPE "mir-canonicalizer"
39
40 static cl::opt<unsigned>
41 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
42 cl::value_desc("N"),
43 cl::desc("Function number to canonicalize."));
44
45 namespace {
46
47 class MIRCanonicalizer : public MachineFunctionPass {
48 public:
49 static char ID;
MIRCanonicalizer()50 MIRCanonicalizer() : MachineFunctionPass(ID) {}
51
getPassName() const52 StringRef getPassName() const override {
53 return "Rename register operands in a canonical ordering.";
54 }
55
getAnalysisUsage(AnalysisUsage & AU) const56 void getAnalysisUsage(AnalysisUsage &AU) const override {
57 AU.setPreservesCFG();
58 MachineFunctionPass::getAnalysisUsage(AU);
59 }
60
61 bool runOnMachineFunction(MachineFunction &MF) override;
62 };
63
64 } // end anonymous namespace
65
66 char MIRCanonicalizer::ID;
67
68 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
69
70 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
71 "Rename Register Operands Canonically", false, false)
72
73 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
74 "Rename Register Operands Canonically", false, false)
75
GetRPOList(MachineFunction & MF)76 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
77 if (MF.empty())
78 return {};
79 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
80 std::vector<MachineBasicBlock *> RPOList;
81 append_range(RPOList, RPOT);
82
83 return RPOList;
84 }
85
86 static bool
rescheduleLexographically(std::vector<MachineInstr * > instructions,MachineBasicBlock * MBB,std::function<MachineBasicBlock::iterator ()> getPos)87 rescheduleLexographically(std::vector<MachineInstr *> instructions,
88 MachineBasicBlock *MBB,
89 std::function<MachineBasicBlock::iterator()> getPos) {
90
91 bool Changed = false;
92 using StringInstrPair = std::pair<std::string, MachineInstr *>;
93 std::vector<StringInstrPair> StringInstrMap;
94
95 for (auto *II : instructions) {
96 std::string S;
97 raw_string_ostream OS(S);
98 II->print(OS);
99 OS.flush();
100
101 // Trim the assignment, or start from the beginning in the case of a store.
102 const size_t i = S.find('=');
103 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
104 }
105
106 llvm::sort(StringInstrMap, llvm::less_first());
107
108 for (auto &II : StringInstrMap) {
109
110 LLVM_DEBUG({
111 dbgs() << "Splicing ";
112 II.second->dump();
113 dbgs() << " right before: ";
114 getPos()->dump();
115 });
116
117 Changed = true;
118 MBB->splice(getPos(), MBB, II.second);
119 }
120
121 return Changed;
122 }
123
rescheduleCanonically(unsigned & PseudoIdempotentInstCount,MachineBasicBlock * MBB)124 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
125 MachineBasicBlock *MBB) {
126
127 bool Changed = false;
128
129 // Calculates the distance of MI from the beginning of its parent BB.
130 auto getInstrIdx = [](const MachineInstr &MI) {
131 unsigned i = 0;
132 for (const auto &CurMI : *MI.getParent()) {
133 if (&CurMI == &MI)
134 return i;
135 i++;
136 }
137 return ~0U;
138 };
139
140 // Pre-Populate vector of instructions to reschedule so that we don't
141 // clobber the iterator.
142 std::vector<MachineInstr *> Instructions;
143 for (auto &MI : *MBB) {
144 Instructions.push_back(&MI);
145 }
146
147 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
148 std::map<unsigned, MachineInstr *> MultiUserLookup;
149 unsigned UseToBringDefCloserToCount = 0;
150 std::vector<MachineInstr *> PseudoIdempotentInstructions;
151 std::vector<unsigned> PhysRegDefs;
152 for (auto *II : Instructions) {
153 for (unsigned i = 1; i < II->getNumOperands(); i++) {
154 MachineOperand &MO = II->getOperand(i);
155 if (!MO.isReg())
156 continue;
157
158 if (MO.getReg().isVirtual())
159 continue;
160
161 if (!MO.isDef())
162 continue;
163
164 PhysRegDefs.push_back(MO.getReg());
165 }
166 }
167
168 for (auto *II : Instructions) {
169 if (II->getNumOperands() == 0)
170 continue;
171 if (II->mayLoadOrStore())
172 continue;
173
174 MachineOperand &MO = II->getOperand(0);
175 if (!MO.isReg() || !MO.getReg().isVirtual())
176 continue;
177 if (!MO.isDef())
178 continue;
179
180 bool IsPseudoIdempotent = true;
181 for (unsigned i = 1; i < II->getNumOperands(); i++) {
182
183 if (II->getOperand(i).isImm()) {
184 continue;
185 }
186
187 if (II->getOperand(i).isReg()) {
188 if (!II->getOperand(i).getReg().isVirtual())
189 if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
190 continue;
191 }
192 }
193
194 IsPseudoIdempotent = false;
195 break;
196 }
197
198 if (IsPseudoIdempotent) {
199 PseudoIdempotentInstructions.push_back(II);
200 continue;
201 }
202
203 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
204
205 MachineInstr *Def = II;
206 unsigned Distance = ~0U;
207 MachineInstr *UseToBringDefCloserTo = nullptr;
208 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
209 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
210 MachineInstr *UseInst = UO.getParent();
211
212 const unsigned DefLoc = getInstrIdx(*Def);
213 const unsigned UseLoc = getInstrIdx(*UseInst);
214 const unsigned Delta = (UseLoc - DefLoc);
215
216 if (UseInst->getParent() != Def->getParent())
217 continue;
218 if (DefLoc >= UseLoc)
219 continue;
220
221 if (Delta < Distance) {
222 Distance = Delta;
223 UseToBringDefCloserTo = UseInst;
224 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
225 }
226 }
227
228 const auto BBE = MBB->instr_end();
229 MachineBasicBlock::iterator DefI = BBE;
230 MachineBasicBlock::iterator UseI = BBE;
231
232 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
233
234 if (DefI != BBE && UseI != BBE)
235 break;
236
237 if (&*BBI == Def) {
238 DefI = BBI;
239 continue;
240 }
241
242 if (&*BBI == UseToBringDefCloserTo) {
243 UseI = BBI;
244 continue;
245 }
246 }
247
248 if (DefI == BBE || UseI == BBE)
249 continue;
250
251 LLVM_DEBUG({
252 dbgs() << "Splicing ";
253 DefI->dump();
254 dbgs() << " right before: ";
255 UseI->dump();
256 });
257
258 MultiUsers[UseToBringDefCloserTo].push_back(Def);
259 Changed = true;
260 MBB->splice(UseI, MBB, DefI);
261 }
262
263 // Sort the defs for users of multiple defs lexographically.
264 for (const auto &E : MultiUserLookup) {
265
266 auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
267 return &MI == E.second;
268 });
269
270 if (UseI == MBB->instr_end())
271 continue;
272
273 LLVM_DEBUG(
274 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
275 Changed |= rescheduleLexographically(
276 MultiUsers[E.second], MBB,
277 [&]() -> MachineBasicBlock::iterator { return UseI; });
278 }
279
280 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
281 LLVM_DEBUG(
282 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
283 Changed |= rescheduleLexographically(
284 PseudoIdempotentInstructions, MBB,
285 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
286
287 return Changed;
288 }
289
propagateLocalCopies(MachineBasicBlock * MBB)290 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
291 bool Changed = false;
292 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
293
294 std::vector<MachineInstr *> Copies;
295 for (MachineInstr &MI : MBB->instrs()) {
296 if (MI.isCopy())
297 Copies.push_back(&MI);
298 }
299
300 for (MachineInstr *MI : Copies) {
301
302 if (!MI->getOperand(0).isReg())
303 continue;
304 if (!MI->getOperand(1).isReg())
305 continue;
306
307 const Register Dst = MI->getOperand(0).getReg();
308 const Register Src = MI->getOperand(1).getReg();
309
310 if (!Dst.isVirtual())
311 continue;
312 if (!Src.isVirtual())
313 continue;
314 // Not folding COPY instructions if regbankselect has not set the RCs.
315 // Why are we only considering Register Classes? Because the verifier
316 // sometimes gets upset if the register classes don't match even if the
317 // types do. A future patch might add COPY folding for matching types in
318 // pre-registerbankselect code.
319 if (!MRI.getRegClassOrNull(Dst))
320 continue;
321 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
322 continue;
323
324 std::vector<MachineOperand *> Uses;
325 for (MachineOperand &MO : MRI.use_operands(Dst))
326 Uses.push_back(&MO);
327 for (auto *MO : Uses)
328 MO->setReg(Src);
329
330 Changed = true;
331 MI->eraseFromParent();
332 }
333
334 return Changed;
335 }
336
doDefKillClear(MachineBasicBlock * MBB)337 static bool doDefKillClear(MachineBasicBlock *MBB) {
338 bool Changed = false;
339
340 for (auto &MI : *MBB) {
341 for (auto &MO : MI.operands()) {
342 if (!MO.isReg())
343 continue;
344 if (!MO.isDef() && MO.isKill()) {
345 Changed = true;
346 MO.setIsKill(false);
347 }
348
349 if (MO.isDef() && MO.isDead()) {
350 Changed = true;
351 MO.setIsDead(false);
352 }
353 }
354 }
355
356 return Changed;
357 }
358
runOnBasicBlock(MachineBasicBlock * MBB,unsigned BasicBlockNum,VRegRenamer & Renamer)359 static bool runOnBasicBlock(MachineBasicBlock *MBB,
360 unsigned BasicBlockNum, VRegRenamer &Renamer) {
361 LLVM_DEBUG({
362 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
363 dbgs() << "\n\n================================================\n\n";
364 });
365
366 bool Changed = false;
367
368 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
369
370 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
371 MBB->dump(););
372 Changed |= propagateLocalCopies(MBB);
373 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
374
375 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
376 unsigned IdempotentInstCount = 0;
377 Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
378 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
379
380 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
381
382 // TODO: Consider dropping this. Dropping kill defs is probably not
383 // semantically sound.
384 Changed |= doDefKillClear(MBB);
385
386 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
387 dbgs() << "\n";);
388 LLVM_DEBUG(
389 dbgs() << "\n\n================================================\n\n");
390 return Changed;
391 }
392
runOnMachineFunction(MachineFunction & MF)393 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
394
395 static unsigned functionNum = 0;
396 if (CanonicalizeFunctionNumber != ~0U) {
397 if (CanonicalizeFunctionNumber != functionNum++)
398 return false;
399 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
400 << "\n";);
401 }
402
403 // we need a valid vreg to create a vreg type for skipping all those
404 // stray vreg numbers so reach alignment/canonical vreg values.
405 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
406
407 LLVM_DEBUG(
408 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
409 dbgs() << "\n\n================================================\n\n";
410 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
411 for (auto MBB
412 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
413 << "\n\n================================================\n\n";);
414
415 unsigned BBNum = 0;
416 bool Changed = false;
417 MachineRegisterInfo &MRI = MF.getRegInfo();
418 VRegRenamer Renamer(MRI);
419 for (auto *MBB : RPOList)
420 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
421
422 return Changed;
423 }
424