xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Perform peephole optimizations on the machine code:
10 //
11 // - Optimize Extensions
12 //
13 //     Optimization of sign / zero extension instructions. It may be extended to
14 //     handle other instructions with similar properties.
15 //
16 //     On some targets, some instructions, e.g. X86 sign / zero extension, may
17 //     leave the source value in the lower part of the result. This optimization
18 //     will replace some uses of the pre-extension value with uses of the
19 //     sub-register of the results.
20 //
21 // - Optimize Comparisons
22 //
23 //     Optimization of comparison instructions. For instance, in this code:
24 //
25 //       sub r1, 1
26 //       cmp r1, 0
27 //       bz  L1
28 //
29 //     If the "sub" instruction all ready sets (or could be modified to set) the
30 //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
31 //     eliminate the "cmp" instruction.
32 //
33 //     Another instance, in this code:
34 //
35 //       sub r1, r3 | sub r1, imm
36 //       cmp r3, r1 or cmp r1, r3 | cmp r1, imm
37 //       bge L1
38 //
39 //     If the branch instruction can use flag from "sub", then we can replace
40 //     "sub" with "subs" and eliminate the "cmp" instruction.
41 //
42 // - Optimize Loads:
43 //
44 //     Loads that can be folded into a later instruction. A load is foldable
45 //     if it loads to virtual registers and the virtual register defined has
46 //     a single use.
47 //
48 // - Optimize Copies and Bitcast (more generally, target specific copies):
49 //
50 //     Rewrite copies and bitcasts to avoid cross register bank copies
51 //     when possible.
52 //     E.g., Consider the following example, where capital and lower
53 //     letters denote different register file:
54 //     b = copy A <-- cross-bank copy
55 //     C = copy b <-- cross-bank copy
56 //   =>
57 //     b = copy A <-- cross-bank copy
58 //     C = copy A <-- same-bank copy
59 //
60 //     E.g., for bitcast:
61 //     b = bitcast A <-- cross-bank copy
62 //     C = bitcast b <-- cross-bank copy
63 //   =>
64 //     b = bitcast A <-- cross-bank copy
65 //     C = copy A    <-- same-bank copy
66 //===----------------------------------------------------------------------===//
67 
68 #include "llvm/CodeGen/PeepholeOptimizer.h"
69 #include "llvm/ADT/DenseMap.h"
70 #include "llvm/ADT/SmallPtrSet.h"
71 #include "llvm/ADT/SmallSet.h"
72 #include "llvm/ADT/SmallVector.h"
73 #include "llvm/ADT/Statistic.h"
74 #include "llvm/CodeGen/MachineBasicBlock.h"
75 #include "llvm/CodeGen/MachineDominators.h"
76 #include "llvm/CodeGen/MachineFunction.h"
77 #include "llvm/CodeGen/MachineFunctionPass.h"
78 #include "llvm/CodeGen/MachineInstr.h"
79 #include "llvm/CodeGen/MachineInstrBuilder.h"
80 #include "llvm/CodeGen/MachineLoopInfo.h"
81 #include "llvm/CodeGen/MachineOperand.h"
82 #include "llvm/CodeGen/MachinePassManager.h"
83 #include "llvm/CodeGen/MachineRegisterInfo.h"
84 #include "llvm/CodeGen/TargetInstrInfo.h"
85 #include "llvm/CodeGen/TargetOpcodes.h"
86 #include "llvm/CodeGen/TargetRegisterInfo.h"
87 #include "llvm/CodeGen/TargetSubtargetInfo.h"
88 #include "llvm/InitializePasses.h"
89 #include "llvm/MC/LaneBitmask.h"
90 #include "llvm/MC/MCInstrDesc.h"
91 #include "llvm/Pass.h"
92 #include "llvm/Support/CommandLine.h"
93 #include "llvm/Support/Debug.h"
94 #include "llvm/Support/raw_ostream.h"
95 #include <cassert>
96 #include <cstdint>
97 #include <utility>
98 
99 using namespace llvm;
100 using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
101 using RegSubRegPairAndIdx = TargetInstrInfo::RegSubRegPairAndIdx;
102 
103 #define DEBUG_TYPE "peephole-opt"
104 
105 // Optimize Extensions
106 static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
107                                 cl::desc("Aggressive extension optimization"));
108 
109 static cl::opt<bool>
110     DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
111                     cl::desc("Disable the peephole optimizer"));
112 
113 /// Specifiy whether or not the value tracking looks through
114 /// complex instructions. When this is true, the value tracker
115 /// bails on everything that is not a copy or a bitcast.
116 static cl::opt<bool>
117     DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
118                       cl::desc("Disable advanced copy optimization"));
119 
120 static cl::opt<bool> DisableNAPhysCopyOpt(
121     "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
122     cl::desc("Disable non-allocatable physical register copy optimization"));
123 
124 // Limit the number of PHI instructions to process
125 // in PeepholeOptimizer::getNextSource.
126 static cl::opt<unsigned>
127     RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10),
128                     cl::desc("Limit the length of PHI chains to lookup"));
129 
130 // Limit the length of recurrence chain when evaluating the benefit of
131 // commuting operands.
132 static cl::opt<unsigned> MaxRecurrenceChain(
133     "recurrence-chain-limit", cl::Hidden, cl::init(3),
134     cl::desc("Maximum length of recurrence chain when evaluating the benefit "
135              "of commuting operands"));
136 
137 STATISTIC(NumReuse, "Number of extension results reused");
138 STATISTIC(NumCmps, "Number of compares eliminated");
139 STATISTIC(NumImmFold, "Number of move immediate folded");
140 STATISTIC(NumLoadFold, "Number of loads folded");
141 STATISTIC(NumSelects, "Number of selects optimized");
142 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
143 STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
144 STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
145 
146 namespace {
147 
148 class ValueTrackerResult;
149 class RecurrenceInstr;
150 
151 /// Interface to query instructions amenable to copy rewriting.
152 class Rewriter {
153 protected:
154   MachineInstr &CopyLike;
155   int CurrentSrcIdx = 0; ///< The index of the source being rewritten.
156 public:
157   Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
158   virtual ~Rewriter() = default;
159 
160   /// Get the next rewritable source (SrcReg, SrcSubReg) and
161   /// the related value that it affects (DstReg, DstSubReg).
162   /// A source is considered rewritable if its register class and the
163   /// register class of the related DstReg may not be register
164   /// coalescer friendly. In other words, given a copy-like instruction
165   /// not all the arguments may be returned at rewritable source, since
166   /// some arguments are none to be register coalescer friendly.
167   ///
168   /// Each call of this method moves the current source to the next
169   /// rewritable source.
170   /// For instance, let CopyLike be the instruction to rewrite.
171   /// CopyLike has one definition and one source:
172   /// dst.dstSubIdx = CopyLike src.srcSubIdx.
173   ///
174   /// The first call will give the first rewritable source, i.e.,
175   /// the only source this instruction has:
176   /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
177   /// This source defines the whole definition, i.e.,
178   /// (DstReg, DstSubReg) = (dst, dstSubIdx).
179   ///
180   /// The second and subsequent calls will return false, as there is only one
181   /// rewritable source.
182   ///
183   /// \return True if a rewritable source has been found, false otherwise.
184   /// The output arguments are valid if and only if true is returned.
185   virtual bool getNextRewritableSource(RegSubRegPair &Src,
186                                        RegSubRegPair &Dst) = 0;
187 
188   /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
189   /// \return True if the rewriting was possible, false otherwise.
190   virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
191 };
192 
193 /// Rewriter for COPY instructions.
194 class CopyRewriter : public Rewriter {
195 public:
196   CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
197     assert(MI.isCopy() && "Expected copy instruction");
198   }
199   virtual ~CopyRewriter() = default;
200 
201   bool getNextRewritableSource(RegSubRegPair &Src,
202                                RegSubRegPair &Dst) override {
203     if (++CurrentSrcIdx > 1)
204       return false;
205 
206     // The rewritable source is the argument.
207     const MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
208     Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
209     // What we track are the alternative sources of the definition.
210     const MachineOperand &MODef = CopyLike.getOperand(0);
211     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
212     return true;
213   }
214 
215   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
216     MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
217     MOSrc.setReg(NewReg);
218     MOSrc.setSubReg(NewSubReg);
219     return true;
220   }
221 };
222 
223 /// Helper class to rewrite uncoalescable copy like instructions
224 /// into new COPY (coalescable friendly) instructions.
225 class UncoalescableRewriter : public Rewriter {
226   int NumDefs; ///< Number of defs in the bitcast.
227 
228 public:
229   UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
230     NumDefs = MI.getDesc().getNumDefs();
231   }
232 
233   /// \see See Rewriter::getNextRewritableSource()
234   /// All such sources need to be considered rewritable in order to
235   /// rewrite a uncoalescable copy-like instruction. This method return
236   /// each definition that must be checked if rewritable.
237   bool getNextRewritableSource(RegSubRegPair &Src,
238                                RegSubRegPair &Dst) override {
239     // Find the next non-dead definition and continue from there.
240     if (CurrentSrcIdx == NumDefs)
241       return false;
242 
243     while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
244       ++CurrentSrcIdx;
245       if (CurrentSrcIdx == NumDefs)
246         return false;
247     }
248 
249     // What we track are the alternative sources of the definition.
250     Src = RegSubRegPair(0, 0);
251     const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
252     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
253 
254     CurrentSrcIdx++;
255     return true;
256   }
257 
258   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
259     return false;
260   }
261 };
262 
263 /// Specialized rewriter for INSERT_SUBREG instruction.
264 class InsertSubregRewriter : public Rewriter {
265 public:
266   InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
267     assert(MI.isInsertSubreg() && "Invalid instruction");
268   }
269 
270   /// \see See Rewriter::getNextRewritableSource()
271   /// Here CopyLike has the following form:
272   /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
273   /// Src1 has the same register class has dst, hence, there is
274   /// nothing to rewrite.
275   /// Src2.src2SubIdx, may not be register coalescer friendly.
276   /// Therefore, the first call to this method returns:
277   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
278   /// (DstReg, DstSubReg) = (dst, subIdx).
279   ///
280   /// Subsequence calls will return false.
281   bool getNextRewritableSource(RegSubRegPair &Src,
282                                RegSubRegPair &Dst) override {
283     // If we already get the only source we can rewrite, return false.
284     if (CurrentSrcIdx == 2)
285       return false;
286     // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
287     CurrentSrcIdx = 2;
288     const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
289     Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
290     const MachineOperand &MODef = CopyLike.getOperand(0);
291 
292     // We want to track something that is compatible with the
293     // partial definition.
294     if (MODef.getSubReg())
295       // Bail if we have to compose sub-register indices.
296       return false;
297     Dst = RegSubRegPair(MODef.getReg(),
298                         (unsigned)CopyLike.getOperand(3).getImm());
299     return true;
300   }
301 
302   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
303     if (CurrentSrcIdx != 2)
304       return false;
305     // We are rewriting the inserted reg.
306     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
307     MO.setReg(NewReg);
308     MO.setSubReg(NewSubReg);
309     return true;
310   }
311 };
312 
313 /// Specialized rewriter for EXTRACT_SUBREG instruction.
314 class ExtractSubregRewriter : public Rewriter {
315   const TargetInstrInfo &TII;
316 
317 public:
318   ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
319       : Rewriter(MI), TII(TII) {
320     assert(MI.isExtractSubreg() && "Invalid instruction");
321   }
322 
323   /// \see Rewriter::getNextRewritableSource()
324   /// Here CopyLike has the following form:
325   /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
326   /// There is only one rewritable source: Src.subIdx,
327   /// which defines dst.dstSubIdx.
328   bool getNextRewritableSource(RegSubRegPair &Src,
329                                RegSubRegPair &Dst) override {
330     // If we already get the only source we can rewrite, return false.
331     if (CurrentSrcIdx == 1)
332       return false;
333     // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
334     CurrentSrcIdx = 1;
335     const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
336     // If we have to compose sub-register indices, bail out.
337     if (MOExtractedReg.getSubReg())
338       return false;
339 
340     Src =
341         RegSubRegPair(MOExtractedReg.getReg(), CopyLike.getOperand(2).getImm());
342 
343     // We want to track something that is compatible with the definition.
344     const MachineOperand &MODef = CopyLike.getOperand(0);
345     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
346     return true;
347   }
348 
349   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
350     // The only source we can rewrite is the input register.
351     if (CurrentSrcIdx != 1)
352       return false;
353 
354     CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
355 
356     // If we find a source that does not require to extract something,
357     // rewrite the operation with a copy.
358     if (!NewSubReg) {
359       // Move the current index to an invalid position.
360       // We do not want another call to this method to be able
361       // to do any change.
362       CurrentSrcIdx = -1;
363       // Rewrite the operation as a COPY.
364       // Get rid of the sub-register index.
365       CopyLike.removeOperand(2);
366       // Morph the operation into a COPY.
367       CopyLike.setDesc(TII.get(TargetOpcode::COPY));
368       return true;
369     }
370     CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
371     return true;
372   }
373 };
374 
375 /// Specialized rewriter for REG_SEQUENCE instruction.
376 class RegSequenceRewriter : public Rewriter {
377 public:
378   RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
379     assert(MI.isRegSequence() && "Invalid instruction");
380     CurrentSrcIdx = -1;
381   }
382 
383   /// \see Rewriter::getNextRewritableSource()
384   /// Here CopyLike has the following form:
385   /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
386   /// Each call will return a different source, walking all the available
387   /// source.
388   ///
389   /// The first call returns:
390   /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
391   /// (DstReg, DstSubReg) = (dst, subIdx1).
392   ///
393   /// The second call returns:
394   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
395   /// (DstReg, DstSubReg) = (dst, subIdx2).
396   ///
397   /// And so on, until all the sources have been traversed, then
398   /// it returns false.
399   bool getNextRewritableSource(RegSubRegPair &Src,
400                                RegSubRegPair &Dst) override {
401     // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
402     CurrentSrcIdx += 2;
403     if (static_cast<unsigned>(CurrentSrcIdx) >= CopyLike.getNumOperands())
404       return false;
405 
406     const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
407     Src.Reg = MOInsertedReg.getReg();
408     Src.SubReg = MOInsertedReg.getSubReg();
409 
410     // We want to track something that is compatible with the related
411     // partial definition.
412     Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
413 
414     const MachineOperand &MODef = CopyLike.getOperand(0);
415     Dst.Reg = MODef.getReg();
416     assert(MODef.getSubReg() == 0 && "cannot have subregister def in SSA");
417     return true;
418   }
419 
420   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
421     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
422     MO.setReg(NewReg);
423     MO.setSubReg(NewSubReg);
424     return true;
425   }
426 };
427 
428 class PeepholeOptimizer : private MachineFunction::Delegate {
429   const TargetInstrInfo *TII = nullptr;
430   const TargetRegisterInfo *TRI = nullptr;
431   MachineRegisterInfo *MRI = nullptr;
432   MachineDominatorTree *DT = nullptr; // Machine dominator tree
433   MachineLoopInfo *MLI = nullptr;
434 
435 public:
436   PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
437       : DT(DT), MLI(MLI) {}
438 
439   bool run(MachineFunction &MF);
440   /// Track Def -> Use info used for rewriting copies.
441   using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
442 
443   /// Sequence of instructions that formulate recurrence cycle.
444   using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
445 
446 private:
447   bool optimizeCmpInstr(MachineInstr &MI);
448   bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
449                         SmallPtrSetImpl<MachineInstr *> &LocalMIs);
450   bool optimizeSelect(MachineInstr &MI,
451                       SmallPtrSetImpl<MachineInstr *> &LocalMIs);
452   bool optimizeCondBranch(MachineInstr &MI);
453 
454   bool optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter);
455   bool optimizeCoalescableCopy(MachineInstr &MI);
456   bool optimizeUncoalescableCopy(MachineInstr &MI,
457                                  SmallPtrSetImpl<MachineInstr *> &LocalMIs);
458   bool optimizeRecurrence(MachineInstr &PHI);
459   bool findNextSource(const TargetRegisterClass *DefRC, unsigned DefSubReg,
460                       RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
461   bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
462                        DenseMap<Register, MachineInstr *> &ImmDefMIs);
463   bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
464                      DenseMap<Register, MachineInstr *> &ImmDefMIs,
465                      bool &Deleted);
466 
467   /// Finds recurrence cycles, but only ones that formulated around
468   /// a def operand and a use operand that are tied. If there is a use
469   /// operand commutable with the tied use operand, find recurrence cycle
470   /// along that operand as well.
471   bool findTargetRecurrence(Register Reg,
472                             const SmallSet<Register, 2> &TargetReg,
473                             RecurrenceCycle &RC);
474 
475   /// If copy instruction \p MI is a virtual register copy or a copy of a
476   /// constant physical register to a virtual register, track it in the
477   /// set CopySrcMIs. If this virtual register was previously seen as a
478   /// copy, replace the uses of this copy with the previously seen copy's
479   /// destination register.
480   bool foldRedundantCopy(MachineInstr &MI);
481 
482   /// Is the register \p Reg a non-allocatable physical register?
483   bool isNAPhysCopy(Register Reg);
484 
485   /// If copy instruction \p MI is a non-allocatable virtual<->physical
486   /// register copy, track it in the \p NAPhysToVirtMIs map. If this
487   /// non-allocatable physical register was previously copied to a virtual
488   /// registered and hasn't been clobbered, the virt->phys copy can be
489   /// deleted.
490   bool
491   foldRedundantNAPhysCopy(MachineInstr &MI,
492                           DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
493 
494   bool isLoadFoldable(MachineInstr &MI,
495                       SmallSet<Register, 16> &FoldAsLoadDefCandidates);
496 
497   /// Check whether \p MI is understood by the register coalescer
498   /// but may require some rewriting.
499   static bool isCoalescableCopy(const MachineInstr &MI) {
500     // SubregToRegs are not interesting, because they are already register
501     // coalescer friendly.
502     return MI.isCopy() ||
503            (!DisableAdvCopyOpt && (MI.isRegSequence() || MI.isInsertSubreg() ||
504                                    MI.isExtractSubreg()));
505   }
506 
507   /// Check whether \p MI is a copy like instruction that is
508   /// not recognized by the register coalescer.
509   static bool isUncoalescableCopy(const MachineInstr &MI) {
510     return MI.isBitcast() || (!DisableAdvCopyOpt && (MI.isRegSequenceLike() ||
511                                                      MI.isInsertSubregLike() ||
512                                                      MI.isExtractSubregLike()));
513   }
514 
515   MachineInstr &rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def,
516                               RewriteMapTy &RewriteMap);
517 
518   // Set of copies to virtual registers keyed by source register.  Never
519   // holds any physreg which requires def tracking.
520   DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
521 
522   // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs.
523   void MF_HandleInsertion(MachineInstr &MI) override {}
524 
525   bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) {
526     if (!MI.isCopy())
527       return false;
528 
529     Register SrcReg = MI.getOperand(1).getReg();
530     unsigned SrcSubReg = MI.getOperand(1).getSubReg();
531     if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
532       return false;
533 
534     SrcPair = RegSubRegPair(SrcReg, SrcSubReg);
535     return true;
536   }
537 
538   // If a COPY instruction is to be deleted or changed, we should also remove
539   // it from CopySrcMIs.
540   void deleteChangedCopy(MachineInstr &MI) {
541     RegSubRegPair SrcPair;
542     if (!getCopySrc(MI, SrcPair))
543       return;
544 
545     auto It = CopySrcMIs.find(SrcPair);
546     if (It != CopySrcMIs.end() && It->second == &MI)
547       CopySrcMIs.erase(It);
548   }
549 
550   void MF_HandleRemoval(MachineInstr &MI) override { deleteChangedCopy(MI); }
551 
552   void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override {
553     deleteChangedCopy(MI);
554   }
555 };
556 
557 class PeepholeOptimizerLegacy : public MachineFunctionPass {
558 public:
559   static char ID; // Pass identification
560 
561   PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {
562     initializePeepholeOptimizerLegacyPass(*PassRegistry::getPassRegistry());
563   }
564 
565   bool runOnMachineFunction(MachineFunction &MF) override;
566 
567   void getAnalysisUsage(AnalysisUsage &AU) const override {
568     AU.setPreservesCFG();
569     MachineFunctionPass::getAnalysisUsage(AU);
570     AU.addRequired<MachineLoopInfoWrapperPass>();
571     AU.addPreserved<MachineLoopInfoWrapperPass>();
572     if (Aggressive) {
573       AU.addRequired<MachineDominatorTreeWrapperPass>();
574       AU.addPreserved<MachineDominatorTreeWrapperPass>();
575     }
576   }
577 
578   MachineFunctionProperties getRequiredProperties() const override {
579     return MachineFunctionProperties().setIsSSA();
580   }
581 };
582 
583 /// Helper class to hold instructions that are inside recurrence cycles.
584 /// The recurrence cycle is formulated around 1) a def operand and its
585 /// tied use operand, or 2) a def operand and a use operand that is commutable
586 /// with another use operand which is tied to the def operand. In the latter
587 /// case, index of the tied use operand and the commutable use operand are
588 /// maintained with CommutePair.
589 class RecurrenceInstr {
590 public:
591   using IndexPair = std::pair<unsigned, unsigned>;
592 
593   RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
594   RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
595       : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
596 
597   MachineInstr *getMI() const { return MI; }
598   std::optional<IndexPair> getCommutePair() const { return CommutePair; }
599 
600 private:
601   MachineInstr *MI;
602   std::optional<IndexPair> CommutePair;
603 };
604 
605 /// Helper class to hold a reply for ValueTracker queries.
606 /// Contains the returned sources for a given search and the instructions
607 /// where the sources were tracked from.
608 class ValueTrackerResult {
609 private:
610   /// Track all sources found by one ValueTracker query.
611   SmallVector<RegSubRegPair, 2> RegSrcs;
612 
613   /// Instruction using the sources in 'RegSrcs'.
614   const MachineInstr *Inst = nullptr;
615 
616 public:
617   ValueTrackerResult() = default;
618 
619   ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); }
620 
621   bool isValid() const { return getNumSources() > 0; }
622 
623   void setInst(const MachineInstr *I) { Inst = I; }
624   const MachineInstr *getInst() const { return Inst; }
625 
626   void clear() {
627     RegSrcs.clear();
628     Inst = nullptr;
629   }
630 
631   void addSource(Register SrcReg, unsigned SrcSubReg) {
632     RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
633   }
634 
635   void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
636     assert(Idx < getNumSources() && "Reg pair source out of index");
637     RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
638   }
639 
640   int getNumSources() const { return RegSrcs.size(); }
641 
642   RegSubRegPair getSrc(int Idx) const { return RegSrcs[Idx]; }
643 
644   Register getSrcReg(int Idx) const {
645     assert(Idx < getNumSources() && "Reg source out of index");
646     return RegSrcs[Idx].Reg;
647   }
648 
649   unsigned getSrcSubReg(int Idx) const {
650     assert(Idx < getNumSources() && "SubReg source out of index");
651     return RegSrcs[Idx].SubReg;
652   }
653 
654   bool operator==(const ValueTrackerResult &Other) const {
655     if (Other.getInst() != getInst())
656       return false;
657 
658     if (Other.getNumSources() != getNumSources())
659       return false;
660 
661     for (int i = 0, e = Other.getNumSources(); i != e; ++i)
662       if (Other.getSrcReg(i) != getSrcReg(i) ||
663           Other.getSrcSubReg(i) != getSrcSubReg(i))
664         return false;
665     return true;
666   }
667 };
668 
669 /// Helper class to track the possible sources of a value defined by
670 /// a (chain of) copy related instructions.
671 /// Given a definition (instruction and definition index), this class
672 /// follows the use-def chain to find successive suitable sources.
673 /// The given source can be used to rewrite the definition into
674 /// def = COPY src.
675 ///
676 /// For instance, let us consider the following snippet:
677 /// v0 =
678 /// v2 = INSERT_SUBREG v1, v0, sub0
679 /// def = COPY v2.sub0
680 ///
681 /// Using a ValueTracker for def = COPY v2.sub0 will give the following
682 /// suitable sources:
683 /// v2.sub0 and v0.
684 /// Then, def can be rewritten into def = COPY v0.
685 class ValueTracker {
686 private:
687   /// The current point into the use-def chain.
688   const MachineInstr *Def = nullptr;
689 
690   /// The index of the definition in Def.
691   unsigned DefIdx = 0;
692 
693   /// The sub register index of the definition.
694   unsigned DefSubReg;
695 
696   /// The register where the value can be found.
697   Register Reg;
698 
699   /// MachineRegisterInfo used to perform tracking.
700   const MachineRegisterInfo &MRI;
701 
702   /// Optional TargetInstrInfo used to perform some complex tracking.
703   const TargetInstrInfo *TII;
704 
705   /// Dispatcher to the right underlying implementation of getNextSource.
706   ValueTrackerResult getNextSourceImpl();
707 
708   /// Specialized version of getNextSource for Copy instructions.
709   ValueTrackerResult getNextSourceFromCopy();
710 
711   /// Specialized version of getNextSource for Bitcast instructions.
712   ValueTrackerResult getNextSourceFromBitcast();
713 
714   /// Specialized version of getNextSource for RegSequence instructions.
715   ValueTrackerResult getNextSourceFromRegSequence();
716 
717   /// Specialized version of getNextSource for InsertSubreg instructions.
718   ValueTrackerResult getNextSourceFromInsertSubreg();
719 
720   /// Specialized version of getNextSource for ExtractSubreg instructions.
721   ValueTrackerResult getNextSourceFromExtractSubreg();
722 
723   /// Specialized version of getNextSource for SubregToReg instructions.
724   ValueTrackerResult getNextSourceFromSubregToReg();
725 
726   /// Specialized version of getNextSource for PHI instructions.
727   ValueTrackerResult getNextSourceFromPHI();
728 
729 public:
730   /// Create a ValueTracker instance for the value defined by \p Reg.
731   /// \p DefSubReg represents the sub register index the value tracker will
732   /// track. It does not need to match the sub register index used in the
733   /// definition of \p Reg.
734   /// If \p Reg is a physical register, a value tracker constructed with
735   /// this constructor will not find any alternative source.
736   /// Indeed, when \p Reg is a physical register that constructor does not
737   /// know which definition of \p Reg it should track.
738   /// Use the next constructor to track a physical register.
739   ValueTracker(Register Reg, unsigned DefSubReg, const MachineRegisterInfo &MRI,
740                const TargetInstrInfo *TII = nullptr)
741       : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
742     if (!Reg.isPhysical()) {
743       Def = MRI.getVRegDef(Reg);
744       DefIdx = MRI.def_begin(Reg).getOperandNo();
745     }
746   }
747 
748   /// Following the use-def chain, get the next available source
749   /// for the tracked value.
750   /// \return A ValueTrackerResult containing a set of registers
751   /// and sub registers with tracked values. A ValueTrackerResult with
752   /// an empty set of registers means no source was found.
753   ValueTrackerResult getNextSource();
754 };
755 
756 } // end anonymous namespace
757 
758 char PeepholeOptimizerLegacy::ID = 0;
759 
760 char &llvm::PeepholeOptimizerLegacyID = PeepholeOptimizerLegacy::ID;
761 
762 INITIALIZE_PASS_BEGIN(PeepholeOptimizerLegacy, DEBUG_TYPE,
763                       "Peephole Optimizations", false, false)
764 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
765 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
766 INITIALIZE_PASS_END(PeepholeOptimizerLegacy, DEBUG_TYPE,
767                     "Peephole Optimizations", false, false)
768 
769 /// If instruction is a copy-like instruction, i.e. it reads a single register
770 /// and writes a single register and it does not modify the source, and if the
771 /// source value is preserved as a sub-register of the result, then replace all
772 /// reachable uses of the source with the subreg of the result.
773 ///
774 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
775 /// the code. Since this code does not currently share EXTRACTs, just ignore all
776 /// debug uses.
777 bool PeepholeOptimizer::optimizeExtInstr(
778     MachineInstr &MI, MachineBasicBlock &MBB,
779     SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
780   Register SrcReg, DstReg;
781   unsigned SubIdx;
782   if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
783     return false;
784 
785   if (DstReg.isPhysical() || SrcReg.isPhysical())
786     return false;
787 
788   if (MRI->hasOneNonDBGUse(SrcReg))
789     // No other uses.
790     return false;
791 
792   // Ensure DstReg can get a register class that actually supports
793   // sub-registers. Don't change the class until we commit.
794   const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
795   DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
796   if (!DstRC)
797     return false;
798 
799   // The ext instr may be operating on a sub-register of SrcReg as well.
800   // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
801   // register.
802   // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
803   // SrcReg:SubIdx should be replaced.
804   bool UseSrcSubIdx =
805       TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
806 
807   // The source has other uses. See if we can replace the other uses with use of
808   // the result of the extension.
809   SmallPtrSet<MachineBasicBlock *, 4> ReachedBBs;
810   for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
811     ReachedBBs.insert(UI.getParent());
812 
813   // Uses that are in the same BB of uses of the result of the instruction.
814   SmallVector<MachineOperand *, 8> Uses;
815 
816   // Uses that the result of the instruction can reach.
817   SmallVector<MachineOperand *, 8> ExtendedUses;
818 
819   bool ExtendLife = true;
820   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
821     MachineInstr *UseMI = UseMO.getParent();
822     if (UseMI == &MI)
823       continue;
824 
825     if (UseMI->isPHI()) {
826       ExtendLife = false;
827       continue;
828     }
829 
830     // Only accept uses of SrcReg:SubIdx.
831     if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
832       continue;
833 
834     // It's an error to translate this:
835     //
836     //    %reg1025 = <sext> %reg1024
837     //     ...
838     //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
839     //
840     // into this:
841     //
842     //    %reg1025 = <sext> %reg1024
843     //     ...
844     //    %reg1027 = COPY %reg1025:4
845     //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
846     //
847     // The problem here is that SUBREG_TO_REG is there to assert that an
848     // implicit zext occurs. It doesn't insert a zext instruction. If we allow
849     // the COPY here, it will give us the value after the <sext>, not the
850     // original value of %reg1024 before <sext>.
851     if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
852       continue;
853 
854     MachineBasicBlock *UseMBB = UseMI->getParent();
855     if (UseMBB == &MBB) {
856       // Local uses that come after the extension.
857       if (!LocalMIs.count(UseMI))
858         Uses.push_back(&UseMO);
859     } else if (ReachedBBs.count(UseMBB)) {
860       // Non-local uses where the result of the extension is used. Always
861       // replace these unless it's a PHI.
862       Uses.push_back(&UseMO);
863     } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
864       // We may want to extend the live range of the extension result in order
865       // to replace these uses.
866       ExtendedUses.push_back(&UseMO);
867     } else {
868       // Both will be live out of the def MBB anyway. Don't extend live range of
869       // the extension result.
870       ExtendLife = false;
871       break;
872     }
873   }
874 
875   if (ExtendLife && !ExtendedUses.empty())
876     // Extend the liveness of the extension result.
877     Uses.append(ExtendedUses.begin(), ExtendedUses.end());
878 
879   // Now replace all uses.
880   bool Changed = false;
881   if (!Uses.empty()) {
882     SmallPtrSet<MachineBasicBlock *, 4> PHIBBs;
883 
884     // Look for PHI uses of the extended result, we don't want to extend the
885     // liveness of a PHI input. It breaks all kinds of assumptions down
886     // stream. A PHI use is expected to be the kill of its source values.
887     for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
888       if (UI.isPHI())
889         PHIBBs.insert(UI.getParent());
890 
891     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
892     for (MachineOperand *UseMO : Uses) {
893       MachineInstr *UseMI = UseMO->getParent();
894       MachineBasicBlock *UseMBB = UseMI->getParent();
895       if (PHIBBs.count(UseMBB))
896         continue;
897 
898       // About to add uses of DstReg, clear DstReg's kill flags.
899       if (!Changed) {
900         MRI->clearKillFlags(DstReg);
901         MRI->constrainRegClass(DstReg, DstRC);
902       }
903 
904       // SubReg defs are illegal in machine SSA phase,
905       // we should not generate SubReg defs.
906       //
907       // For example, for the instructions:
908       //
909       // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
910       // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc
911       //
912       // We should generate:
913       //
914       // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
915       // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0
916       // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0
917       //
918       if (UseSrcSubIdx)
919         RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
920 
921       Register NewVR = MRI->createVirtualRegister(RC);
922       BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
923               TII->get(TargetOpcode::COPY), NewVR)
924           .addReg(DstReg, 0, SubIdx);
925       if (UseSrcSubIdx)
926         UseMO->setSubReg(0);
927 
928       UseMO->setReg(NewVR);
929       ++NumReuse;
930       Changed = true;
931     }
932   }
933 
934   return Changed;
935 }
936 
937 /// If the instruction is a compare and the previous instruction it's comparing
938 /// against already sets (or could be modified to set) the same flag as the
939 /// compare, then we can remove the comparison and use the flag from the
940 /// previous instruction.
941 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
942   // If this instruction is a comparison against zero and isn't comparing a
943   // physical register, we can try to optimize it.
944   Register SrcReg, SrcReg2;
945   int64_t CmpMask, CmpValue;
946   if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
947       SrcReg.isPhysical() || SrcReg2.isPhysical())
948     return false;
949 
950   // Attempt to optimize the comparison instruction.
951   LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI);
952   if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
953     LLVM_DEBUG(dbgs() << "  -> Successfully optimized compare!\n");
954     ++NumCmps;
955     return true;
956   }
957 
958   return false;
959 }
960 
961 /// Optimize a select instruction.
962 bool PeepholeOptimizer::optimizeSelect(
963     MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
964   unsigned TrueOp = 0;
965   unsigned FalseOp = 0;
966   bool Optimizable = false;
967   SmallVector<MachineOperand, 4> Cond;
968   if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
969     return false;
970   if (!Optimizable)
971     return false;
972   if (!TII->optimizeSelect(MI, LocalMIs))
973     return false;
974   LLVM_DEBUG(dbgs() << "Deleting select: " << MI);
975   MI.eraseFromParent();
976   ++NumSelects;
977   return true;
978 }
979 
980 /// Check if a simpler conditional branch can be generated.
981 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
982   return TII->optimizeCondBranch(MI);
983 }
984 
985 /// Try to find a better source value that shares the same register file to
986 /// replace \p RegSubReg in an instruction like
987 /// `DefRC.DefSubReg = COPY RegSubReg`
988 ///
989 /// When true is returned, the \p RewriteMap can be used by the client to
990 /// retrieve all Def -> Use along the way up to the next source. Any found
991 /// Use that is not itself a key for another entry, is the next source to
992 /// use. During the search for the next source, multiple sources can be found
993 /// given multiple incoming sources of a PHI instruction. In this case, we
994 /// look in each PHI source for the next source; all found next sources must
995 /// share the same register file as \p Reg and \p SubReg. The client should
996 /// then be capable to rewrite all intermediate PHIs to get the next source.
997 /// \return False if no alternative sources are available. True otherwise.
998 bool PeepholeOptimizer::findNextSource(const TargetRegisterClass *DefRC,
999                                        unsigned DefSubReg,
1000                                        RegSubRegPair RegSubReg,
1001                                        RewriteMapTy &RewriteMap) {
1002   // Do not try to find a new source for a physical register.
1003   // So far we do not have any motivating example for doing that.
1004   // Thus, instead of maintaining untested code, we will revisit that if
1005   // that changes at some point.
1006   Register Reg = RegSubReg.Reg;
1007   SmallVector<RegSubRegPair, 4> SrcToLook;
1008   RegSubRegPair CurSrcPair = RegSubReg;
1009   SrcToLook.push_back(CurSrcPair);
1010 
1011   unsigned PHICount = 0;
1012   do {
1013     CurSrcPair = SrcToLook.pop_back_val();
1014     // As explained above, do not handle physical registers
1015     if (CurSrcPair.Reg.isPhysical())
1016       return false;
1017 
1018     ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
1019 
1020     // Follow the chain of copies until we find a more suitable source, a phi
1021     // or have to abort.
1022     while (true) {
1023       ValueTrackerResult Res = ValTracker.getNextSource();
1024       // Abort at the end of a chain (without finding a suitable source).
1025       if (!Res.isValid())
1026         return false;
1027 
1028       // Insert the Def -> Use entry for the recently found source.
1029       auto [InsertPt, WasInserted] = RewriteMap.try_emplace(CurSrcPair, Res);
1030 
1031       if (!WasInserted) {
1032         const ValueTrackerResult &CurSrcRes = InsertPt->second;
1033 
1034         assert(CurSrcRes == Res && "ValueTrackerResult found must match");
1035         // An existent entry with multiple sources is a PHI cycle we must avoid.
1036         // Otherwise it's an entry with a valid next source we already found.
1037         if (CurSrcRes.getNumSources() > 1) {
1038           LLVM_DEBUG(dbgs()
1039                      << "findNextSource: found PHI cycle, aborting...\n");
1040           return false;
1041         }
1042         break;
1043       }
1044 
1045       // ValueTrackerResult usually have one source unless it's the result from
1046       // a PHI instruction. Add the found PHI edges to be looked up further.
1047       unsigned NumSrcs = Res.getNumSources();
1048       if (NumSrcs > 1) {
1049         PHICount++;
1050         if (PHICount >= RewritePHILimit) {
1051           LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
1052           return false;
1053         }
1054 
1055         for (unsigned i = 0; i < NumSrcs; ++i)
1056           SrcToLook.push_back(Res.getSrc(i));
1057         break;
1058       }
1059 
1060       CurSrcPair = Res.getSrc(0);
1061       // Do not extend the live-ranges of physical registers as they add
1062       // constraints to the register allocator. Moreover, if we want to extend
1063       // the live-range of a physical register, unlike SSA virtual register,
1064       // we will have to check that they aren't redefine before the related use.
1065       if (CurSrcPair.Reg.isPhysical())
1066         return false;
1067 
1068       // Keep following the chain if the value isn't any better yet.
1069       const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
1070       if (!TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
1071                                      CurSrcPair.SubReg))
1072         continue;
1073 
1074       // We currently cannot deal with subreg operands on PHI instructions
1075       // (see insertPHI()).
1076       if (PHICount > 0 && CurSrcPair.SubReg != 0)
1077         continue;
1078 
1079       // We found a suitable source, and are done with this chain.
1080       break;
1081     }
1082   } while (!SrcToLook.empty());
1083 
1084   // If we did not find a more suitable source, there is nothing to optimize.
1085   return CurSrcPair.Reg != Reg;
1086 }
1087 
1088 /// Insert a PHI instruction with incoming edges \p SrcRegs that are
1089 /// guaranteed to have the same register class. This is necessary whenever we
1090 /// successfully traverse a PHI instruction and find suitable sources coming
1091 /// from its edges. By inserting a new PHI, we provide a rewritten PHI def
1092 /// suitable to be used in a new COPY instruction.
1093 static MachineInstr &insertPHI(MachineRegisterInfo &MRI,
1094                                const TargetInstrInfo &TII,
1095                                const SmallVectorImpl<RegSubRegPair> &SrcRegs,
1096                                MachineInstr &OrigPHI) {
1097   assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
1098 
1099   const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
1100   // NewRC is only correct if no subregisters are involved. findNextSource()
1101   // should have rejected those cases already.
1102   assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
1103   Register NewVR = MRI.createVirtualRegister(NewRC);
1104   MachineBasicBlock *MBB = OrigPHI.getParent();
1105   MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
1106                                     TII.get(TargetOpcode::PHI), NewVR);
1107 
1108   unsigned MBBOpIdx = 2;
1109   for (const RegSubRegPair &RegPair : SrcRegs) {
1110     MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
1111     MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
1112     // Since we're extended the lifetime of RegPair.Reg, clear the
1113     // kill flags to account for that and make RegPair.Reg reaches
1114     // the new PHI.
1115     MRI.clearKillFlags(RegPair.Reg);
1116     MBBOpIdx += 2;
1117   }
1118 
1119   return *MIB;
1120 }
1121 
1122 /// Given a \p Def.Reg and Def.SubReg  pair, use \p RewriteMap to find
1123 /// the new source to use for rewrite. If \p HandleMultipleSources is true and
1124 /// multiple sources for a given \p Def are found along the way, we found a
1125 /// PHI instructions that needs to be rewritten.
1126 /// TODO: HandleMultipleSources should be removed once we test PHI handling
1127 /// with coalescable copies.
1128 static RegSubRegPair
1129 getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
1130              RegSubRegPair Def,
1131              const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1132              bool HandleMultipleSources = true) {
1133   RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
1134   while (true) {
1135     ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
1136     // If there are no entries on the map, LookupSrc is the new source.
1137     if (!Res.isValid())
1138       return LookupSrc;
1139 
1140     // There's only one source for this definition, keep searching...
1141     unsigned NumSrcs = Res.getNumSources();
1142     if (NumSrcs == 1) {
1143       LookupSrc.Reg = Res.getSrcReg(0);
1144       LookupSrc.SubReg = Res.getSrcSubReg(0);
1145       continue;
1146     }
1147 
1148     // TODO: Remove once multiple srcs w/ coalescable copies are supported.
1149     if (!HandleMultipleSources)
1150       break;
1151 
1152     // Multiple sources, recurse into each source to find a new source
1153     // for it. Then, rewrite the PHI accordingly to its new edges.
1154     SmallVector<RegSubRegPair, 4> NewPHISrcs;
1155     for (unsigned i = 0; i < NumSrcs; ++i) {
1156       RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1157       NewPHISrcs.push_back(
1158           getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
1159     }
1160 
1161     // Build the new PHI node and return its def register as the new source.
1162     MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
1163     MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
1164     LLVM_DEBUG(dbgs() << "-- getNewSource\n");
1165     LLVM_DEBUG(dbgs() << "   Replacing: " << OrigPHI);
1166     LLVM_DEBUG(dbgs() << "        With: " << NewPHI);
1167     const MachineOperand &MODef = NewPHI.getOperand(0);
1168     return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
1169   }
1170 
1171   return RegSubRegPair(0, 0);
1172 }
1173 
1174 bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) {
1175   bool Changed = false;
1176   // Get the right rewriter for the current copy.
1177   // Rewrite each rewritable source.
1178   RegSubRegPair Dst;
1179   RegSubRegPair TrackPair;
1180   while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
1181     if (Dst.Reg.isPhysical()) {
1182       // Do not try to find a new source for a physical register.
1183       // So far we do not have any motivating example for doing that.
1184       // Thus, instead of maintaining untested code, we will revisit that if
1185       // that changes at some point.
1186       continue;
1187     }
1188 
1189     const TargetRegisterClass *DefRC = MRI->getRegClass(Dst.Reg);
1190 
1191     // Keep track of PHI nodes and its incoming edges when looking for sources.
1192     RewriteMapTy RewriteMap;
1193     // Try to find a more suitable source. If we failed to do so, or get the
1194     // actual source, move to the next source.
1195     if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
1196       continue;
1197 
1198     // Get the new source to rewrite. TODO: Only enable handling of multiple
1199     // sources (PHIs) once we have a motivating example and testcases for it.
1200     RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
1201                                         /*HandleMultipleSources=*/false);
1202     assert(TrackPair.Reg != NewSrc.Reg &&
1203            "should not rewrite source to original value");
1204     if (!NewSrc.Reg)
1205       continue;
1206 
1207     // Rewrite source.
1208     if (CpyRewriter.RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
1209       // We may have extended the live-range of NewSrc, account for that.
1210       MRI->clearKillFlags(NewSrc.Reg);
1211       Changed = true;
1212     }
1213   }
1214 
1215   // TODO: We could have a clean-up method to tidy the instruction.
1216   // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
1217   // => v0 = COPY v1
1218   // Currently we haven't seen motivating example for that and we
1219   // want to avoid untested code.
1220   NumRewrittenCopies += Changed;
1221   return Changed;
1222 }
1223 
1224 /// Optimize generic copy instructions to avoid cross register bank copy.
1225 /// The optimization looks through a chain of copies and tries to find a source
1226 /// that has a compatible register class.
1227 /// Two register classes are considered to be compatible if they share the same
1228 /// register bank.
1229 /// New copies issued by this optimization are register allocator
1230 /// friendly. This optimization does not remove any copy as it may
1231 /// overconstrain the register allocator, but replaces some operands
1232 /// when possible.
1233 /// \pre isCoalescableCopy(*MI) is true.
1234 /// \return True, when \p MI has been rewritten. False otherwise.
1235 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
1236   assert(isCoalescableCopy(MI) && "Invalid argument");
1237   assert(MI.getDesc().getNumDefs() == 1 &&
1238          "Coalescer can understand multiple defs?!");
1239   const MachineOperand &MODef = MI.getOperand(0);
1240   // Do not rewrite physical definitions.
1241   if (MODef.getReg().isPhysical())
1242     return false;
1243 
1244   switch (MI.getOpcode()) {
1245   case TargetOpcode::COPY:
1246     return optimizeCoalescableCopyImpl(CopyRewriter(MI));
1247   case TargetOpcode::INSERT_SUBREG:
1248     return optimizeCoalescableCopyImpl(InsertSubregRewriter(MI));
1249   case TargetOpcode::EXTRACT_SUBREG:
1250     return optimizeCoalescableCopyImpl(ExtractSubregRewriter(MI, *TII));
1251   case TargetOpcode::REG_SEQUENCE:
1252     return optimizeCoalescableCopyImpl(RegSequenceRewriter(MI));
1253   default:
1254     // Handle uncoalescable copy-like instructions.
1255     if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
1256         MI.isExtractSubregLike())
1257       return optimizeCoalescableCopyImpl(UncoalescableRewriter(MI));
1258     return false;
1259   }
1260 }
1261 
1262 /// Rewrite the source found through \p Def, by using the \p RewriteMap
1263 /// and create a new COPY instruction. More info about RewriteMap in
1264 /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
1265 /// Uncoalescable copies, since they are copy like instructions that aren't
1266 /// recognized by the register allocator.
1267 MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1268                                                RegSubRegPair Def,
1269                                                RewriteMapTy &RewriteMap) {
1270   assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers");
1271 
1272   // Find the new source to use in the COPY rewrite.
1273   RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
1274 
1275   // Insert the COPY.
1276   const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
1277   Register NewVReg = MRI->createVirtualRegister(DefRC);
1278 
1279   MachineInstr *NewCopy =
1280       BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
1281               TII->get(TargetOpcode::COPY), NewVReg)
1282           .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
1283 
1284   if (Def.SubReg) {
1285     NewCopy->getOperand(0).setSubReg(Def.SubReg);
1286     NewCopy->getOperand(0).setIsUndef();
1287   }
1288 
1289   LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
1290   LLVM_DEBUG(dbgs() << "   Replacing: " << CopyLike);
1291   LLVM_DEBUG(dbgs() << "        With: " << *NewCopy);
1292   MRI->replaceRegWith(Def.Reg, NewVReg);
1293   MRI->clearKillFlags(NewVReg);
1294 
1295   // We extended the lifetime of NewSrc.Reg, clear the kill flags to
1296   // account for that.
1297   MRI->clearKillFlags(NewSrc.Reg);
1298 
1299   return *NewCopy;
1300 }
1301 
1302 /// Optimize copy-like instructions to create
1303 /// register coalescer friendly instruction.
1304 /// The optimization tries to kill-off the \p MI by looking
1305 /// through a chain of copies to find a source that has a compatible
1306 /// register class.
1307 /// If such a source is found, it replace \p MI by a generic COPY
1308 /// operation.
1309 /// \pre isUncoalescableCopy(*MI) is true.
1310 /// \return True, when \p MI has been optimized. In that case, \p MI has
1311 /// been removed from its parent.
1312 /// All COPY instructions created, are inserted in \p LocalMIs.
1313 bool PeepholeOptimizer::optimizeUncoalescableCopy(
1314     MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
1315   assert(isUncoalescableCopy(MI) && "Invalid argument");
1316   UncoalescableRewriter CpyRewriter(MI);
1317 
1318   // Rewrite each rewritable source by generating new COPYs. This works
1319   // differently from optimizeCoalescableCopy since it first makes sure that all
1320   // definitions can be rewritten.
1321   RewriteMapTy RewriteMap;
1322   RegSubRegPair Src;
1323   RegSubRegPair Def;
1324   SmallVector<RegSubRegPair, 4> RewritePairs;
1325   while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1326     // If a physical register is here, this is probably for a good reason.
1327     // Do not rewrite that.
1328     if (Def.Reg.isPhysical())
1329       return false;
1330 
1331     // FIXME: Uncoalescable copies are treated differently by
1332     // UncoalescableRewriter, and this probably should not share
1333     // API. getNextRewritableSource really finds rewritable defs.
1334     const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
1335 
1336     // If we do not know how to rewrite this definition, there is no point
1337     // in trying to kill this instruction.
1338     if (!findNextSource(DefRC, Def.SubReg, Def, RewriteMap))
1339       return false;
1340 
1341     RewritePairs.push_back(Def);
1342   }
1343 
1344   // The change is possible for all defs, do it.
1345   for (const RegSubRegPair &Def : RewritePairs) {
1346     // Rewrite the "copy" in a way the register coalescer understands.
1347     MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
1348     LocalMIs.insert(&NewCopy);
1349   }
1350 
1351   // MI is now dead.
1352   LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI);
1353   MI.eraseFromParent();
1354   ++NumUncoalescableCopies;
1355   return true;
1356 }
1357 
1358 /// Check whether MI is a candidate for folding into a later instruction.
1359 /// We only fold loads to virtual registers and the virtual register defined
1360 /// has a single user.
1361 bool PeepholeOptimizer::isLoadFoldable(
1362     MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1363   if (!MI.canFoldAsLoad() || !MI.mayLoad())
1364     return false;
1365   const MCInstrDesc &MCID = MI.getDesc();
1366   if (MCID.getNumDefs() != 1)
1367     return false;
1368 
1369   Register Reg = MI.getOperand(0).getReg();
1370   // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
1371   // loads. It should be checked when processing uses of the load, since
1372   // uses can be removed during peephole.
1373   if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
1374       MRI->hasOneNonDBGUser(Reg)) {
1375     FoldAsLoadDefCandidates.insert(Reg);
1376     return true;
1377   }
1378   return false;
1379 }
1380 
1381 bool PeepholeOptimizer::isMoveImmediate(
1382     MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
1383     DenseMap<Register, MachineInstr *> &ImmDefMIs) {
1384   const MCInstrDesc &MCID = MI.getDesc();
1385   if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg())
1386     return false;
1387   Register Reg = MI.getOperand(0).getReg();
1388   if (!Reg.isVirtual())
1389     return false;
1390 
1391   int64_t ImmVal;
1392   if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal))
1393     return false;
1394 
1395   ImmDefMIs.insert(std::make_pair(Reg, &MI));
1396   ImmDefRegs.insert(Reg);
1397   return true;
1398 }
1399 
1400 /// Try folding register operands that are defined by move immediate
1401 /// instructions, i.e. a trivial constant folding optimization, if
1402 /// and only if the def and use are in the same BB.
1403 bool PeepholeOptimizer::foldImmediate(
1404     MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
1405     DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) {
1406   Deleted = false;
1407   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1408     MachineOperand &MO = MI.getOperand(i);
1409     if (!MO.isReg() || MO.isDef())
1410       continue;
1411     Register Reg = MO.getReg();
1412     if (!Reg.isVirtual())
1413       continue;
1414     if (ImmDefRegs.count(Reg) == 0)
1415       continue;
1416     DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg);
1417     assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
1418     if (TII->foldImmediate(MI, *II->second, Reg, MRI)) {
1419       ++NumImmFold;
1420       // foldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
1421       // is not deleted, and we happened to get a same MI, we can delete MI and
1422       // replace its users.
1423       if (MRI->getVRegDef(Reg) &&
1424           MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) {
1425         Register DstReg = MI.getOperand(0).getReg();
1426         if (DstReg.isVirtual() &&
1427             MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) {
1428           MRI->replaceRegWith(DstReg, Reg);
1429           MI.eraseFromParent();
1430           Deleted = true;
1431         }
1432       }
1433       return true;
1434     }
1435   }
1436   return false;
1437 }
1438 
1439 // FIXME: This is very simple and misses some cases which should be handled when
1440 // motivating examples are found.
1441 //
1442 // The copy rewriting logic should look at uses as well as defs and be able to
1443 // eliminate copies across blocks.
1444 //
1445 // Later copies that are subregister extracts will also not be eliminated since
1446 // only the first copy is considered.
1447 //
1448 // e.g.
1449 // %1 = COPY %0
1450 // %2 = COPY %0:sub1
1451 //
1452 // Should replace %2 uses with %1:sub1
1453 bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI) {
1454   assert(MI.isCopy() && "expected a COPY machine instruction");
1455 
1456   RegSubRegPair SrcPair;
1457   if (!getCopySrc(MI, SrcPair))
1458     return false;
1459 
1460   Register DstReg = MI.getOperand(0).getReg();
1461   if (!DstReg.isVirtual())
1462     return false;
1463 
1464   if (CopySrcMIs.insert(std::make_pair(SrcPair, &MI)).second) {
1465     // First copy of this reg seen.
1466     return false;
1467   }
1468 
1469   MachineInstr *PrevCopy = CopySrcMIs.find(SrcPair)->second;
1470 
1471   assert(SrcPair.SubReg == PrevCopy->getOperand(1).getSubReg() &&
1472          "Unexpected mismatching subreg!");
1473 
1474   Register PrevDstReg = PrevCopy->getOperand(0).getReg();
1475 
1476   // Only replace if the copy register class is the same.
1477   //
1478   // TODO: If we have multiple copies to different register classes, we may want
1479   // to track multiple copies of the same source register.
1480   if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
1481     return false;
1482 
1483   MRI->replaceRegWith(DstReg, PrevDstReg);
1484 
1485   // Lifetime of the previous copy has been extended.
1486   MRI->clearKillFlags(PrevDstReg);
1487   return true;
1488 }
1489 
1490 bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
1491   return Reg.isPhysical() && !MRI->isAllocatable(Reg);
1492 }
1493 
1494 bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1495     MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
1496   assert(MI.isCopy() && "expected a COPY machine instruction");
1497 
1498   if (DisableNAPhysCopyOpt)
1499     return false;
1500 
1501   Register DstReg = MI.getOperand(0).getReg();
1502   Register SrcReg = MI.getOperand(1).getReg();
1503   if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) {
1504     // %vreg = COPY $physreg
1505     // Avoid using a datastructure which can track multiple live non-allocatable
1506     // phys->virt copies since LLVM doesn't seem to do this.
1507     NAPhysToVirtMIs.insert({SrcReg, &MI});
1508     return false;
1509   }
1510 
1511   if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
1512     return false;
1513 
1514   // $physreg = COPY %vreg
1515   auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
1516   if (PrevCopy == NAPhysToVirtMIs.end()) {
1517     // We can't remove the copy: there was an intervening clobber of the
1518     // non-allocatable physical register after the copy to virtual.
1519     LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
1520                       << MI);
1521     return false;
1522   }
1523 
1524   Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
1525   if (PrevDstReg == SrcReg) {
1526     // Remove the virt->phys copy: we saw the virtual register definition, and
1527     // the non-allocatable physical register's state hasn't changed since then.
1528     LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
1529     ++NumNAPhysCopies;
1530     return true;
1531   }
1532 
1533   // Potential missed optimization opportunity: we saw a different virtual
1534   // register get a copy of the non-allocatable physical register, and we only
1535   // track one such copy. Avoid getting confused by this new non-allocatable
1536   // physical register definition, and remove it from the tracked copies.
1537   LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
1538   NAPhysToVirtMIs.erase(PrevCopy);
1539   return false;
1540 }
1541 
1542 /// \bried Returns true if \p MO is a virtual register operand.
1543 static bool isVirtualRegisterOperand(MachineOperand &MO) {
1544   return MO.isReg() && MO.getReg().isVirtual();
1545 }
1546 
1547 bool PeepholeOptimizer::findTargetRecurrence(
1548     Register Reg, const SmallSet<Register, 2> &TargetRegs,
1549     RecurrenceCycle &RC) {
1550   // Recurrence found if Reg is in TargetRegs.
1551   if (TargetRegs.count(Reg))
1552     return true;
1553 
1554   // TODO: Curerntly, we only allow the last instruction of the recurrence
1555   // cycle (the instruction that feeds the PHI instruction) to have more than
1556   // one uses to guarantee that commuting operands does not tie registers
1557   // with overlapping live range. Once we have actual live range info of
1558   // each register, this constraint can be relaxed.
1559   if (!MRI->hasOneNonDBGUse(Reg))
1560     return false;
1561 
1562   // Give up if the reccurrence chain length is longer than the limit.
1563   if (RC.size() >= MaxRecurrenceChain)
1564     return false;
1565 
1566   MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
1567   unsigned Idx = MI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr);
1568 
1569   // Only interested in recurrences whose instructions have only one def, which
1570   // is a virtual register.
1571   if (MI.getDesc().getNumDefs() != 1)
1572     return false;
1573 
1574   MachineOperand &DefOp = MI.getOperand(0);
1575   if (!isVirtualRegisterOperand(DefOp))
1576     return false;
1577 
1578   // Check if def operand of MI is tied to any use operand. We are only
1579   // interested in the case that all the instructions in the recurrence chain
1580   // have there def operand tied with one of the use operand.
1581   unsigned TiedUseIdx;
1582   if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1583     return false;
1584 
1585   if (Idx == TiedUseIdx) {
1586     RC.push_back(RecurrenceInstr(&MI));
1587     return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1588   } else {
1589     // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
1590     unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
1591     if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1592       RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
1593       return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1594     }
1595   }
1596 
1597   return false;
1598 }
1599 
1600 /// Phi instructions will eventually be lowered to copy instructions.
1601 /// If phi is in a loop header, a recurrence may formulated around the source
1602 /// and destination of the phi. For such case commuting operands of the
1603 /// instructions in the recurrence may enable coalescing of the copy instruction
1604 /// generated from the phi. For example, if there is a recurrence of
1605 ///
1606 /// LoopHeader:
1607 ///   %1 = phi(%0, %100)
1608 /// LoopLatch:
1609 ///   %0<def, tied1> = ADD %2<def, tied0>, %1
1610 ///
1611 /// , the fact that %0 and %2 are in the same tied operands set makes
1612 /// the coalescing of copy instruction generated from the phi in
1613 /// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
1614 /// %2 have overlapping live range. This introduces additional move
1615 /// instruction to the final assembly. However, if we commute %2 and
1616 /// %1 of ADD instruction, the redundant move instruction can be
1617 /// avoided.
1618 bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
1619   SmallSet<Register, 2> TargetRegs;
1620   for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
1621     MachineOperand &MO = PHI.getOperand(Idx);
1622     assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
1623     TargetRegs.insert(MO.getReg());
1624   }
1625 
1626   bool Changed = false;
1627   RecurrenceCycle RC;
1628   if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1629     // Commutes operands of instructions in RC if necessary so that the copy to
1630     // be generated from PHI can be coalesced.
1631     LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
1632     for (auto &RI : RC) {
1633       LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
1634       auto CP = RI.getCommutePair();
1635       if (CP) {
1636         Changed = true;
1637         TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
1638                                 (*CP).second);
1639         LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
1640       }
1641     }
1642   }
1643 
1644   return Changed;
1645 }
1646 
1647 PreservedAnalyses
1648 PeepholeOptimizerPass::run(MachineFunction &MF,
1649                            MachineFunctionAnalysisManager &MFAM) {
1650   MFPropsModifier _(*this, MF);
1651   auto *DT =
1652       Aggressive ? &MFAM.getResult<MachineDominatorTreeAnalysis>(MF) : nullptr;
1653   auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
1654   PeepholeOptimizer Impl(DT, MLI);
1655   bool Changed = Impl.run(MF);
1656   if (!Changed)
1657     return PreservedAnalyses::all();
1658 
1659   auto PA = getMachineFunctionPassPreservedAnalyses();
1660   PA.preserve<MachineDominatorTreeAnalysis>();
1661   PA.preserve<MachineLoopAnalysis>();
1662   PA.preserveSet<CFGAnalyses>();
1663   return PA;
1664 }
1665 
1666 bool PeepholeOptimizerLegacy::runOnMachineFunction(MachineFunction &MF) {
1667   if (skipFunction(MF.getFunction()))
1668     return false;
1669   auto *DT = Aggressive
1670                  ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1671                  : nullptr;
1672   auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1673   PeepholeOptimizer Impl(DT, MLI);
1674   return Impl.run(MF);
1675 }
1676 
1677 bool PeepholeOptimizer::run(MachineFunction &MF) {
1678 
1679   LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
1680   LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
1681 
1682   if (DisablePeephole)
1683     return false;
1684 
1685   TII = MF.getSubtarget().getInstrInfo();
1686   TRI = MF.getSubtarget().getRegisterInfo();
1687   MRI = &MF.getRegInfo();
1688   MF.setDelegate(this);
1689 
1690   bool Changed = false;
1691 
1692   for (MachineBasicBlock &MBB : MF) {
1693     bool SeenMoveImm = false;
1694 
1695     // During this forward scan, at some point it needs to answer the question
1696     // "given a pointer to an MI in the current BB, is it located before or
1697     // after the current instruction".
1698     // To perform this, the following set keeps track of the MIs already seen
1699     // during the scan, if a MI is not in the set, it is assumed to be located
1700     // after. Newly created MIs have to be inserted in the set as well.
1701     SmallPtrSet<MachineInstr *, 16> LocalMIs;
1702     SmallSet<Register, 4> ImmDefRegs;
1703     DenseMap<Register, MachineInstr *> ImmDefMIs;
1704     SmallSet<Register, 16> FoldAsLoadDefCandidates;
1705 
1706     // Track when a non-allocatable physical register is copied to a virtual
1707     // register so that useless moves can be removed.
1708     //
1709     // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
1710     // without any intervening re-definition of $physreg.
1711     DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
1712 
1713     CopySrcMIs.clear();
1714 
1715     bool IsLoopHeader = MLI->isLoopHeader(&MBB);
1716 
1717     for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
1718          MII != MIE;) {
1719       MachineInstr *MI = &*MII;
1720       // We may be erasing MI below, increment MII now.
1721       ++MII;
1722       LocalMIs.insert(MI);
1723 
1724       // Skip debug instructions. They should not affect this peephole
1725       // optimization.
1726       if (MI->isDebugInstr())
1727         continue;
1728 
1729       if (MI->isPosition())
1730         continue;
1731 
1732       if (IsLoopHeader && MI->isPHI()) {
1733         if (optimizeRecurrence(*MI)) {
1734           Changed = true;
1735           continue;
1736         }
1737       }
1738 
1739       if (!MI->isCopy()) {
1740         for (const MachineOperand &MO : MI->operands()) {
1741           // Visit all operands: definitions can be implicit or explicit.
1742           if (MO.isReg()) {
1743             Register Reg = MO.getReg();
1744             if (MO.isDef() && isNAPhysCopy(Reg)) {
1745               const auto &Def = NAPhysToVirtMIs.find(Reg);
1746               if (Def != NAPhysToVirtMIs.end()) {
1747                 // A new definition of the non-allocatable physical register
1748                 // invalidates previous copies.
1749                 LLVM_DEBUG(dbgs()
1750                            << "NAPhysCopy: invalidating because of " << *MI);
1751                 NAPhysToVirtMIs.erase(Def);
1752               }
1753             }
1754           } else if (MO.isRegMask()) {
1755             const uint32_t *RegMask = MO.getRegMask();
1756             for (auto &RegMI : NAPhysToVirtMIs) {
1757               Register Def = RegMI.first;
1758               if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
1759                 LLVM_DEBUG(dbgs()
1760                            << "NAPhysCopy: invalidating because of " << *MI);
1761                 NAPhysToVirtMIs.erase(Def);
1762               }
1763             }
1764           }
1765         }
1766       }
1767 
1768       if (MI->isImplicitDef() || MI->isKill())
1769         continue;
1770 
1771       if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
1772         // Blow away all non-allocatable physical registers knowledge since we
1773         // don't know what's correct anymore.
1774         //
1775         // FIXME: handle explicit asm clobbers.
1776         LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
1777                           << *MI);
1778         NAPhysToVirtMIs.clear();
1779       }
1780 
1781       if ((isUncoalescableCopy(*MI) &&
1782            optimizeUncoalescableCopy(*MI, LocalMIs)) ||
1783           (MI->isCompare() && optimizeCmpInstr(*MI)) ||
1784           (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
1785         // MI is deleted.
1786         LocalMIs.erase(MI);
1787         Changed = true;
1788         continue;
1789       }
1790 
1791       if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
1792         Changed = true;
1793         continue;
1794       }
1795 
1796       if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
1797         // MI is just rewritten.
1798         Changed = true;
1799         continue;
1800       }
1801 
1802       if (MI->isCopy() && (foldRedundantCopy(*MI) ||
1803                            foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
1804         LocalMIs.erase(MI);
1805         LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
1806         MI->eraseFromParent();
1807         Changed = true;
1808         continue;
1809       }
1810 
1811       if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
1812         SeenMoveImm = true;
1813       } else {
1814         Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
1815         // optimizeExtInstr might have created new instructions after MI
1816         // and before the already incremented MII. Adjust MII so that the
1817         // next iteration sees the new instructions.
1818         MII = MI;
1819         ++MII;
1820         if (SeenMoveImm) {
1821           bool Deleted;
1822           Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted);
1823           if (Deleted) {
1824             LocalMIs.erase(MI);
1825             continue;
1826           }
1827         }
1828       }
1829 
1830       // Check whether MI is a load candidate for folding into a later
1831       // instruction. If MI is not a candidate, check whether we can fold an
1832       // earlier load into MI.
1833       if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
1834           !FoldAsLoadDefCandidates.empty()) {
1835 
1836         // We visit each operand even after successfully folding a previous
1837         // one.  This allows us to fold multiple loads into a single
1838         // instruction.  We do assume that optimizeLoadInstr doesn't insert
1839         // foldable uses earlier in the argument list.  Since we don't restart
1840         // iteration, we'd miss such cases.
1841         const MCInstrDesc &MIDesc = MI->getDesc();
1842         for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands(); ++i) {
1843           const MachineOperand &MOp = MI->getOperand(i);
1844           if (!MOp.isReg())
1845             continue;
1846           Register FoldAsLoadDefReg = MOp.getReg();
1847           if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
1848             // We need to fold load after optimizeCmpInstr, since
1849             // optimizeCmpInstr can enable folding by converting SUB to CMP.
1850             // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
1851             // we need it for markUsesInDebugValueAsUndef().
1852             Register FoldedReg = FoldAsLoadDefReg;
1853             MachineInstr *DefMI = nullptr;
1854             if (MachineInstr *FoldMI =
1855                     TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
1856               // Update LocalMIs since we replaced MI with FoldMI and deleted
1857               // DefMI.
1858               LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
1859               LLVM_DEBUG(dbgs() << "     With: " << *FoldMI);
1860               LocalMIs.erase(MI);
1861               LocalMIs.erase(DefMI);
1862               LocalMIs.insert(FoldMI);
1863               // Update the call info.
1864               if (MI->shouldUpdateAdditionalCallInfo())
1865                 MI->getMF()->moveAdditionalCallInfo(MI, FoldMI);
1866               MI->eraseFromParent();
1867               DefMI->eraseFromParent();
1868               MRI->markUsesInDebugValueAsUndef(FoldedReg);
1869               FoldAsLoadDefCandidates.erase(FoldedReg);
1870               ++NumLoadFold;
1871 
1872               // MI is replaced with FoldMI so we can continue trying to fold
1873               Changed = true;
1874               MI = FoldMI;
1875             }
1876           }
1877         }
1878       }
1879 
1880       // If we run into an instruction we can't fold across, discard
1881       // the load candidates.  Note: We might be able to fold *into* this
1882       // instruction, so this needs to be after the folding logic.
1883       if (MI->isLoadFoldBarrier()) {
1884         LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
1885         FoldAsLoadDefCandidates.clear();
1886       }
1887     }
1888   }
1889 
1890   MF.resetDelegate(this);
1891   return Changed;
1892 }
1893 
1894 ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1895   assert(Def->isCopy() && "Invalid definition");
1896   // Copy instruction are supposed to be: Def = Src.
1897   // If someone breaks this assumption, bad things will happen everywhere.
1898   // There may be implicit uses preventing the copy to be moved across
1899   // some target specific register definitions
1900   assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
1901          "Invalid number of operands");
1902   assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
1903   assert(!Def->getOperand(DefIdx).getSubReg() && "no subregister defs in SSA");
1904 
1905   // Otherwise, we want the whole source.
1906   const MachineOperand &Src = Def->getOperand(1);
1907   if (Src.isUndef())
1908     return ValueTrackerResult();
1909   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1910 }
1911 
1912 ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1913   assert(Def->isBitcast() && "Invalid definition");
1914 
1915   // Bail if there are effects that a plain copy will not expose.
1916   if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
1917     return ValueTrackerResult();
1918 
1919   // Bitcasts with more than one def are not supported.
1920   if (Def->getDesc().getNumDefs() != 1)
1921     return ValueTrackerResult();
1922 
1923   assert(!Def->getOperand(DefIdx).getSubReg() && "no subregister defs in SSA");
1924 
1925   unsigned SrcIdx = Def->getNumOperands();
1926   for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
1927        ++OpIdx) {
1928     const MachineOperand &MO = Def->getOperand(OpIdx);
1929     if (!MO.isReg() || !MO.getReg())
1930       continue;
1931     // Ignore dead implicit defs.
1932     if (MO.isImplicit() && MO.isDead())
1933       continue;
1934     assert(!MO.isDef() && "We should have skipped all the definitions by now");
1935     if (SrcIdx != EndOpIdx)
1936       // Multiple sources?
1937       return ValueTrackerResult();
1938     SrcIdx = OpIdx;
1939   }
1940 
1941   // In some rare case, Def has no input, SrcIdx is out of bound,
1942   // getOperand(SrcIdx) will fail below.
1943   if (SrcIdx >= Def->getNumOperands())
1944     return ValueTrackerResult();
1945 
1946   const MachineOperand &DefOp = Def->getOperand(DefIdx);
1947 
1948   // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
1949   // will break the assumed guarantees for the upper bits.
1950   for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
1951     if (UseMI.isSubregToReg())
1952       return ValueTrackerResult();
1953   }
1954 
1955   const MachineOperand &Src = Def->getOperand(SrcIdx);
1956   if (Src.isUndef())
1957     return ValueTrackerResult();
1958   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1959 }
1960 
1961 ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
1962   assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
1963          "Invalid definition");
1964 
1965   assert(!Def->getOperand(DefIdx).getSubReg() && "illegal subregister def");
1966 
1967   SmallVector<RegSubRegPairAndIdx, 8> RegSeqInputRegs;
1968   if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
1969     return ValueTrackerResult();
1970 
1971   // We are looking at:
1972   // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
1973   //
1974   // Check if one of the operands exactly defines the subreg we are interested
1975   // in.
1976   for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
1977     if (RegSeqInput.SubIdx == DefSubReg)
1978       return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
1979   }
1980 
1981   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1982 
1983   // If we did not find an exact match, see if we can do a composition to
1984   // extract a sub-subregister.
1985   for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
1986     LaneBitmask DefMask = TRI->getSubRegIndexLaneMask(DefSubReg);
1987     LaneBitmask ThisOpRegMask = TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
1988 
1989     // Check that this extract reads a subset of this single reg_sequence input.
1990     //
1991     // FIXME: We should be able to filter this in terms of the indexes directly
1992     // without checking the lanemasks.
1993     if ((DefMask & ThisOpRegMask) != DefMask)
1994       continue;
1995 
1996     unsigned ReverseDefCompose =
1997         TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg);
1998     if (!ReverseDefCompose)
1999       continue;
2000 
2001     unsigned ComposedDefInSrcReg1 =
2002         TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
2003 
2004     // TODO: We should be able to defer checking if the result register class
2005     // supports the index to continue looking for a rewritable source.
2006     //
2007     // TODO: Should we modify the register class to support the index?
2008     const TargetRegisterClass *SrcRC = MRI.getRegClass(RegSeqInput.Reg);
2009     const TargetRegisterClass *SrcWithSubRC =
2010         TRI->getSubClassWithSubReg(SrcRC, ComposedDefInSrcReg1);
2011     if (SrcRC != SrcWithSubRC)
2012       return ValueTrackerResult();
2013 
2014     return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
2015   }
2016 
2017   // If the subreg we are tracking is super-defined by another subreg,
2018   // we could follow this value. However, this would require to compose
2019   // the subreg and we do not do that for now.
2020   return ValueTrackerResult();
2021 }
2022 
2023 ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2024   assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
2025          "Invalid definition");
2026   assert(!Def->getOperand(DefIdx).getSubReg() && "no subreg defs in SSA");
2027 
2028   RegSubRegPair BaseReg;
2029   RegSubRegPairAndIdx InsertedReg;
2030   if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2031     return ValueTrackerResult();
2032 
2033   // We are looking at:
2034   // Def = INSERT_SUBREG v0, v1, sub1
2035   // There are two cases:
2036   // 1. DefSubReg == sub1, get v1.
2037   // 2. DefSubReg != sub1, the value may be available through v0.
2038 
2039   // #1 Check if the inserted register matches the required sub index.
2040   if (InsertedReg.SubIdx == DefSubReg) {
2041     return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
2042   }
2043   // #2 Otherwise, if the sub register we are looking for is not partial
2044   // defined by the inserted element, we can look through the main
2045   // register (v0).
2046   const MachineOperand &MODef = Def->getOperand(DefIdx);
2047   // If the result register (Def) and the base register (v0) do not
2048   // have the same register class or if we have to compose
2049   // subregisters, bail out.
2050   if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
2051       BaseReg.SubReg)
2052     return ValueTrackerResult();
2053 
2054   // Get the TRI and check if the inserted sub-register overlaps with the
2055   // sub-register we are tracking.
2056   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
2057   if ((TRI->getSubRegIndexLaneMask(DefSubReg) &
2058        TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx))
2059           .any())
2060     return ValueTrackerResult();
2061   // At this point, the value is available in v0 via the same subreg
2062   // we used for Def.
2063   return ValueTrackerResult(BaseReg.Reg, DefSubReg);
2064 }
2065 
2066 ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2067   assert((Def->isExtractSubreg() || Def->isExtractSubregLike()) &&
2068          "Invalid definition");
2069   // We are looking at:
2070   // Def = EXTRACT_SUBREG v0, sub0
2071 
2072   // Bail if we have to compose sub registers.
2073   // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
2074   if (DefSubReg)
2075     return ValueTrackerResult();
2076 
2077   RegSubRegPairAndIdx ExtractSubregInputReg;
2078   if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2079     return ValueTrackerResult();
2080 
2081   // Bail if we have to compose sub registers.
2082   // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
2083   if (ExtractSubregInputReg.SubReg)
2084     return ValueTrackerResult();
2085   // Otherwise, the value is available in the v0.sub0.
2086   return ValueTrackerResult(ExtractSubregInputReg.Reg,
2087                             ExtractSubregInputReg.SubIdx);
2088 }
2089 
2090 ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2091   assert(Def->isSubregToReg() && "Invalid definition");
2092   // We are looking at:
2093   // Def = SUBREG_TO_REG Imm, v0, sub0
2094 
2095   // Bail if we have to compose sub registers.
2096   // If DefSubReg != sub0, we would have to check that all the bits
2097   // we track are included in sub0 and if yes, we would have to
2098   // determine the right subreg in v0.
2099   if (DefSubReg != Def->getOperand(3).getImm())
2100     return ValueTrackerResult();
2101   // Bail if we have to compose sub registers.
2102   // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
2103   if (Def->getOperand(2).getSubReg())
2104     return ValueTrackerResult();
2105 
2106   return ValueTrackerResult(Def->getOperand(2).getReg(),
2107                             Def->getOperand(3).getImm());
2108 }
2109 
2110 /// Explore each PHI incoming operand and return its sources.
2111 ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2112   assert(Def->isPHI() && "Invalid definition");
2113   ValueTrackerResult Res;
2114 
2115   // Return all register sources for PHI instructions.
2116   for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
2117     const MachineOperand &MO = Def->getOperand(i);
2118     assert(MO.isReg() && "Invalid PHI instruction");
2119     // We have no code to deal with undef operands. They shouldn't happen in
2120     // normal programs anyway.
2121     if (MO.isUndef())
2122       return ValueTrackerResult();
2123     Res.addSource(MO.getReg(), MO.getSubReg());
2124   }
2125 
2126   return Res;
2127 }
2128 
2129 ValueTrackerResult ValueTracker::getNextSourceImpl() {
2130   assert(Def && "This method needs a valid definition");
2131 
2132   assert(((Def->getOperand(DefIdx).isDef() &&
2133            (DefIdx < Def->getDesc().getNumDefs() ||
2134             Def->getDesc().isVariadic())) ||
2135           Def->getOperand(DefIdx).isImplicit()) &&
2136          "Invalid DefIdx");
2137   if (Def->isCopy())
2138     return getNextSourceFromCopy();
2139   if (Def->isBitcast())
2140     return getNextSourceFromBitcast();
2141   // All the remaining cases involve "complex" instructions.
2142   // Bail if we did not ask for the advanced tracking.
2143   if (DisableAdvCopyOpt)
2144     return ValueTrackerResult();
2145   if (Def->isRegSequence() || Def->isRegSequenceLike())
2146     return getNextSourceFromRegSequence();
2147   if (Def->isInsertSubreg() || Def->isInsertSubregLike())
2148     return getNextSourceFromInsertSubreg();
2149   if (Def->isExtractSubreg() || Def->isExtractSubregLike())
2150     return getNextSourceFromExtractSubreg();
2151   if (Def->isSubregToReg())
2152     return getNextSourceFromSubregToReg();
2153   if (Def->isPHI())
2154     return getNextSourceFromPHI();
2155   return ValueTrackerResult();
2156 }
2157 
2158 ValueTrackerResult ValueTracker::getNextSource() {
2159   // If we reach a point where we cannot move up in the use-def chain,
2160   // there is nothing we can get.
2161   if (!Def)
2162     return ValueTrackerResult();
2163 
2164   ValueTrackerResult Res = getNextSourceImpl();
2165   if (Res.isValid()) {
2166     // Update definition, definition index, and subregister for the
2167     // next call of getNextSource.
2168     // Update the current register.
2169     bool OneRegSrc = Res.getNumSources() == 1;
2170     if (OneRegSrc)
2171       Reg = Res.getSrcReg(0);
2172     // Update the result before moving up in the use-def chain
2173     // with the instruction containing the last found sources.
2174     Res.setInst(Def);
2175 
2176     // If we can still move up in the use-def chain, move to the next
2177     // definition.
2178     if (!Reg.isPhysical() && OneRegSrc) {
2179       MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
2180       if (DI != MRI.def_end()) {
2181         Def = DI->getParent();
2182         DefIdx = DI.getOperandNo();
2183         DefSubReg = Res.getSrcSubReg(0);
2184       } else {
2185         Def = nullptr;
2186       }
2187       return Res;
2188     }
2189   }
2190   // If we end up here, this means we will not be able to find another source
2191   // for the next iteration. Make sure any new call to getNextSource bails out
2192   // early by cutting the use-def chain.
2193   Def = nullptr;
2194   return Res;
2195 }
2196