xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp (revision e3f4a63af63bea70bc86b6c790b14aa5ee99fcd0)
1 //===- HexagonBitSimplify.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "BitTracker.h"
10 #include "Hexagon.h"
11 #include "HexagonBitTracker.h"
12 #include "HexagonInstrInfo.h"
13 #include "HexagonRegisterInfo.h"
14 #include "HexagonSubtarget.h"
15 #include "llvm/ADT/BitVector.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/CodeGen/MachineBasicBlock.h"
19 #include "llvm/CodeGen/MachineDominators.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/MachineOperand.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/TargetRegisterInfo.h"
26 #include "llvm/IR/DebugLoc.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/raw_ostream.h"
32 
33 #define DEBUG_TYPE "hexbit"
34 
35 using namespace llvm;
36 
37 static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
38   cl::init(true), cl::desc("Preserve subregisters in tied operands"));
39 static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden,
40   cl::init(true), cl::desc("Generate extract instructions"));
41 static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden,
42   cl::init(true), cl::desc("Generate bitsplit instructions"));
43 
44 static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden,
45   cl::init(std::numeric_limits<unsigned>::max()));
46 static unsigned CountExtract = 0;
47 static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden,
48   cl::init(std::numeric_limits<unsigned>::max()));
49 static unsigned CountBitSplit = 0;
50 
51 static cl::opt<unsigned> RegisterSetLimit("hexbit-registerset-limit",
52   cl::Hidden, cl::init(1000));
53 
54 namespace {
55 
56   // Set of virtual registers, based on BitVector.
57   struct RegisterSet {
58     RegisterSet() = default;
59     explicit RegisterSet(unsigned s, bool t = false) : Bits(s, t) {}
60     RegisterSet(const RegisterSet &RS) = default;
61 
62     void clear() {
63       Bits.clear();
64       LRU.clear();
65     }
66 
67     unsigned count() const {
68       return Bits.count();
69     }
70 
71     unsigned find_first() const {
72       int First = Bits.find_first();
73       if (First < 0)
74         return 0;
75       return x2v(First);
76     }
77 
78     unsigned find_next(unsigned Prev) const {
79       int Next = Bits.find_next(v2x(Prev));
80       if (Next < 0)
81         return 0;
82       return x2v(Next);
83     }
84 
85     RegisterSet &insert(unsigned R) {
86       unsigned Idx = v2x(R);
87       ensure(Idx);
88       bool Exists = Bits.test(Idx);
89       Bits.set(Idx);
90       if (!Exists) {
91         LRU.push_back(Idx);
92         if (LRU.size() > RegisterSetLimit) {
93           unsigned T = LRU.front();
94           Bits.reset(T);
95           LRU.pop_front();
96         }
97       }
98       return *this;
99     }
100     RegisterSet &remove(unsigned R) {
101       unsigned Idx = v2x(R);
102       if (Idx < Bits.size()) {
103         bool Exists = Bits.test(Idx);
104         Bits.reset(Idx);
105         if (Exists) {
106           auto F = llvm::find(LRU, Idx);
107           assert(F != LRU.end());
108           LRU.erase(F);
109         }
110       }
111       return *this;
112     }
113 
114     RegisterSet &insert(const RegisterSet &Rs) {
115       for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
116         insert(R);
117       return *this;
118     }
119     RegisterSet &remove(const RegisterSet &Rs) {
120       for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
121         remove(R);
122       return *this;
123     }
124 
125     bool operator[](unsigned R) const {
126       unsigned Idx = v2x(R);
127       return Idx < Bits.size() ? Bits[Idx] : false;
128     }
129     bool has(unsigned R) const {
130       unsigned Idx = v2x(R);
131       if (Idx >= Bits.size())
132         return false;
133       return Bits.test(Idx);
134     }
135 
136     bool empty() const {
137       return !Bits.any();
138     }
139     bool includes(const RegisterSet &Rs) const {
140       // A.test(B)  <=>  A-B != {}
141       return !Rs.Bits.test(Bits);
142     }
143     bool intersects(const RegisterSet &Rs) const {
144       return Bits.anyCommon(Rs.Bits);
145     }
146 
147   private:
148     BitVector Bits;
149     std::deque<unsigned> LRU;
150 
151     void ensure(unsigned Idx) {
152       if (Bits.size() <= Idx)
153         Bits.resize(std::max(Idx+1, 32U));
154     }
155 
156     static inline unsigned v2x(unsigned v) {
157       return Register(v).virtRegIndex();
158     }
159 
160     static inline unsigned x2v(unsigned x) {
161       return Register::index2VirtReg(x);
162     }
163   };
164 
165   struct PrintRegSet {
166     PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
167       : RS(S), TRI(RI) {}
168 
169     friend raw_ostream &operator<< (raw_ostream &OS,
170           const PrintRegSet &P);
171 
172   private:
173     const RegisterSet &RS;
174     const TargetRegisterInfo *TRI;
175   };
176 
177   raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
178     LLVM_ATTRIBUTE_UNUSED;
179   raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
180     OS << '{';
181     for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
182       OS << ' ' << printReg(R, P.TRI);
183     OS << " }";
184     return OS;
185   }
186 
187   class Transformation;
188 
189   class HexagonBitSimplify : public MachineFunctionPass {
190   public:
191     static char ID;
192 
193     HexagonBitSimplify() : MachineFunctionPass(ID) {}
194 
195     StringRef getPassName() const override {
196       return "Hexagon bit simplification";
197     }
198 
199     void getAnalysisUsage(AnalysisUsage &AU) const override {
200       AU.addRequired<MachineDominatorTreeWrapperPass>();
201       AU.addPreserved<MachineDominatorTreeWrapperPass>();
202       MachineFunctionPass::getAnalysisUsage(AU);
203     }
204 
205     bool runOnMachineFunction(MachineFunction &MF) override;
206 
207     static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
208     static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
209     static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
210         const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
211     static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
212         uint16_t W);
213     static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
214         uint16_t W, uint64_t &U);
215     static bool replaceReg(Register OldR, Register NewR,
216                            MachineRegisterInfo &MRI);
217     static bool getSubregMask(const BitTracker::RegisterRef &RR,
218         unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
219     static bool replaceRegWithSub(Register OldR, Register NewR, unsigned NewSR,
220                                   MachineRegisterInfo &MRI);
221     static bool replaceSubWithSub(Register OldR, unsigned OldSR, Register NewR,
222                                   unsigned NewSR, MachineRegisterInfo &MRI);
223     static bool parseRegSequence(const MachineInstr &I,
224         BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH,
225         const MachineRegisterInfo &MRI);
226 
227     static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
228         uint16_t Begin);
229     static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
230         uint16_t Begin, const HexagonInstrInfo &HII);
231 
232     static const TargetRegisterClass *getFinalVRegClass(
233         const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);
234     static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
235         const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);
236 
237   private:
238     MachineDominatorTree *MDT = nullptr;
239 
240     bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
241     static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
242         unsigned NewSub = Hexagon::NoSubRegister);
243   };
244 
245   using HBS = HexagonBitSimplify;
246 
247   // The purpose of this class is to provide a common facility to traverse
248   // the function top-down or bottom-up via the dominator tree, and keep
249   // track of the available registers.
250   class Transformation {
251   public:
252     bool TopDown;
253 
254     Transformation(bool TD) : TopDown(TD) {}
255     virtual ~Transformation() = default;
256 
257     virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
258   };
259 
260 } // end anonymous namespace
261 
262 char HexagonBitSimplify::ID = 0;
263 
264 INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify",
265       "Hexagon bit simplification", false, false)
266 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
267 INITIALIZE_PASS_END(HexagonBitSimplify, "hexagon-bit-simplify",
268       "Hexagon bit simplification", false, false)
269 
270 bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
271       RegisterSet &AVs) {
272   bool Changed = false;
273 
274   if (T.TopDown)
275     Changed = T.processBlock(B, AVs);
276 
277   RegisterSet Defs;
278   for (auto &I : B)
279     getInstrDefs(I, Defs);
280   RegisterSet NewAVs = AVs;
281   NewAVs.insert(Defs);
282 
283   for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B)))
284     Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs);
285 
286   if (!T.TopDown)
287     Changed |= T.processBlock(B, AVs);
288 
289   return Changed;
290 }
291 
292 //
293 // Utility functions:
294 //
295 void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
296       RegisterSet &Defs) {
297   for (auto &Op : MI.operands()) {
298     if (!Op.isReg() || !Op.isDef())
299       continue;
300     Register R = Op.getReg();
301     if (!R.isVirtual())
302       continue;
303     Defs.insert(R);
304   }
305 }
306 
307 void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
308       RegisterSet &Uses) {
309   for (auto &Op : MI.operands()) {
310     if (!Op.isReg() || !Op.isUse())
311       continue;
312     Register R = Op.getReg();
313     if (!R.isVirtual())
314       continue;
315     Uses.insert(R);
316   }
317 }
318 
319 // Check if all the bits in range [B, E) in both cells are equal.
320 bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,
321       uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
322       uint16_t W) {
323   for (uint16_t i = 0; i < W; ++i) {
324     // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
325     if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
326       return false;
327     // Same for RC2[i].
328     if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
329       return false;
330     if (RC1[B1+i] != RC2[B2+i])
331       return false;
332   }
333   return true;
334 }
335 
336 bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
337       uint16_t B, uint16_t W) {
338   assert(B < RC.width() && B+W <= RC.width());
339   for (uint16_t i = B; i < B+W; ++i)
340     if (!RC[i].is(0))
341       return false;
342   return true;
343 }
344 
345 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
346         uint16_t B, uint16_t W, uint64_t &U) {
347   assert(B < RC.width() && B+W <= RC.width());
348   int64_t T = 0;
349   for (uint16_t i = B+W; i > B; --i) {
350     const BitTracker::BitValue &BV = RC[i-1];
351     T <<= 1;
352     if (BV.is(1))
353       T |= 1;
354     else if (!BV.is(0))
355       return false;
356   }
357   U = T;
358   return true;
359 }
360 
361 bool HexagonBitSimplify::replaceReg(Register OldR, Register NewR,
362                                     MachineRegisterInfo &MRI) {
363   if (!OldR.isVirtual() || !NewR.isVirtual())
364     return false;
365   auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
366   decltype(End) NextI;
367   for (auto I = Begin; I != End; I = NextI) {
368     NextI = std::next(I);
369     I->setReg(NewR);
370   }
371   return Begin != End;
372 }
373 
374 bool HexagonBitSimplify::replaceRegWithSub(Register OldR, Register NewR,
375                                            unsigned NewSR,
376                                            MachineRegisterInfo &MRI) {
377   if (!OldR.isVirtual() || !NewR.isVirtual())
378     return false;
379   if (hasTiedUse(OldR, MRI, NewSR))
380     return false;
381   auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
382   decltype(End) NextI;
383   for (auto I = Begin; I != End; I = NextI) {
384     NextI = std::next(I);
385     I->setReg(NewR);
386     I->setSubReg(NewSR);
387   }
388   return Begin != End;
389 }
390 
391 bool HexagonBitSimplify::replaceSubWithSub(Register OldR, unsigned OldSR,
392                                            Register NewR, unsigned NewSR,
393                                            MachineRegisterInfo &MRI) {
394   if (!OldR.isVirtual() || !NewR.isVirtual())
395     return false;
396   if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))
397     return false;
398   auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
399   decltype(End) NextI;
400   for (auto I = Begin; I != End; I = NextI) {
401     NextI = std::next(I);
402     if (I->getSubReg() != OldSR)
403       continue;
404     I->setReg(NewR);
405     I->setSubReg(NewSR);
406   }
407   return Begin != End;
408 }
409 
410 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB
411 // of Sub in Reg, and set Width to the size of Sub in bits. Return true,
412 // if this succeeded, otherwise return false.
413 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
414       unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
415   const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
416   if (RR.Sub == 0) {
417     Begin = 0;
418     Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC);
419     return true;
420   }
421 
422   Begin = 0;
423 
424   switch (RC->getID()) {
425     case Hexagon::DoubleRegsRegClassID:
426     case Hexagon::HvxWRRegClassID:
427       Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2;
428       if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi)
429         Begin = Width;
430       break;
431     default:
432       return false;
433   }
434   return true;
435 }
436 
437 
438 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high
439 // subregister.
440 bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
441       BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH,
442       const MachineRegisterInfo &MRI) {
443   assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
444   unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
445   auto &DstRC = *MRI.getRegClass(I.getOperand(0).getReg());
446   auto &HRI = static_cast<const HexagonRegisterInfo&>(
447                   *MRI.getTargetRegisterInfo());
448   unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo);
449   unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi);
450   assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo));
451   if (Sub1 == SubLo && Sub2 == SubHi) {
452     SL = I.getOperand(1);
453     SH = I.getOperand(3);
454     return true;
455   }
456   if (Sub1 == SubHi && Sub2 == SubLo) {
457     SH = I.getOperand(1);
458     SL = I.getOperand(3);
459     return true;
460   }
461   return false;
462 }
463 
464 // All stores (except 64-bit stores) take a 32-bit register as the source
465 // of the value to be stored. If the instruction stores into a location
466 // that is shorter than 32 bits, some bits of the source register are not
467 // used. For each store instruction, calculate the set of used bits in
468 // the source register, and set appropriate bits in Bits. Return true if
469 // the bits are calculated, false otherwise.
470 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
471       uint16_t Begin) {
472   using namespace Hexagon;
473 
474   switch (Opc) {
475     // Store byte
476     case S2_storerb_io:           // memb(Rs32+#s11:0)=Rt32
477     case S2_storerbnew_io:        // memb(Rs32+#s11:0)=Nt8.new
478     case S2_pstorerbt_io:         // if (Pv4) memb(Rs32+#u6:0)=Rt32
479     case S2_pstorerbf_io:         // if (!Pv4) memb(Rs32+#u6:0)=Rt32
480     case S4_pstorerbtnew_io:      // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
481     case S4_pstorerbfnew_io:      // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
482     case S2_pstorerbnewt_io:      // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
483     case S2_pstorerbnewf_io:      // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
484     case S4_pstorerbnewtnew_io:   // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
485     case S4_pstorerbnewfnew_io:   // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
486     case S2_storerb_pi:           // memb(Rx32++#s4:0)=Rt32
487     case S2_storerbnew_pi:        // memb(Rx32++#s4:0)=Nt8.new
488     case S2_pstorerbt_pi:         // if (Pv4) memb(Rx32++#s4:0)=Rt32
489     case S2_pstorerbf_pi:         // if (!Pv4) memb(Rx32++#s4:0)=Rt32
490     case S2_pstorerbtnew_pi:      // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
491     case S2_pstorerbfnew_pi:      // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
492     case S2_pstorerbnewt_pi:      // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
493     case S2_pstorerbnewf_pi:      // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
494     case S2_pstorerbnewtnew_pi:   // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
495     case S2_pstorerbnewfnew_pi:   // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
496     case S4_storerb_ap:           // memb(Re32=#U6)=Rt32
497     case S4_storerbnew_ap:        // memb(Re32=#U6)=Nt8.new
498     case S2_storerb_pr:           // memb(Rx32++Mu2)=Rt32
499     case S2_storerbnew_pr:        // memb(Rx32++Mu2)=Nt8.new
500     case S4_storerb_ur:           // memb(Ru32<<#u2+#U6)=Rt32
501     case S4_storerbnew_ur:        // memb(Ru32<<#u2+#U6)=Nt8.new
502     case S2_storerb_pbr:          // memb(Rx32++Mu2:brev)=Rt32
503     case S2_storerbnew_pbr:       // memb(Rx32++Mu2:brev)=Nt8.new
504     case S2_storerb_pci:          // memb(Rx32++#s4:0:circ(Mu2))=Rt32
505     case S2_storerbnew_pci:       // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
506     case S2_storerb_pcr:          // memb(Rx32++I:circ(Mu2))=Rt32
507     case S2_storerbnew_pcr:       // memb(Rx32++I:circ(Mu2))=Nt8.new
508     case S4_storerb_rr:           // memb(Rs32+Ru32<<#u2)=Rt32
509     case S4_storerbnew_rr:        // memb(Rs32+Ru32<<#u2)=Nt8.new
510     case S4_pstorerbt_rr:         // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
511     case S4_pstorerbf_rr:         // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
512     case S4_pstorerbtnew_rr:      // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
513     case S4_pstorerbfnew_rr:      // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
514     case S4_pstorerbnewt_rr:      // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
515     case S4_pstorerbnewf_rr:      // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
516     case S4_pstorerbnewtnew_rr:   // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
517     case S4_pstorerbnewfnew_rr:   // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
518     case S2_storerbgp:            // memb(gp+#u16:0)=Rt32
519     case S2_storerbnewgp:         // memb(gp+#u16:0)=Nt8.new
520     case S4_pstorerbt_abs:        // if (Pv4) memb(#u6)=Rt32
521     case S4_pstorerbf_abs:        // if (!Pv4) memb(#u6)=Rt32
522     case S4_pstorerbtnew_abs:     // if (Pv4.new) memb(#u6)=Rt32
523     case S4_pstorerbfnew_abs:     // if (!Pv4.new) memb(#u6)=Rt32
524     case S4_pstorerbnewt_abs:     // if (Pv4) memb(#u6)=Nt8.new
525     case S4_pstorerbnewf_abs:     // if (!Pv4) memb(#u6)=Nt8.new
526     case S4_pstorerbnewtnew_abs:  // if (Pv4.new) memb(#u6)=Nt8.new
527     case S4_pstorerbnewfnew_abs:  // if (!Pv4.new) memb(#u6)=Nt8.new
528       Bits.set(Begin, Begin+8);
529       return true;
530 
531     // Store low half
532     case S2_storerh_io:           // memh(Rs32+#s11:1)=Rt32
533     case S2_storerhnew_io:        // memh(Rs32+#s11:1)=Nt8.new
534     case S2_pstorerht_io:         // if (Pv4) memh(Rs32+#u6:1)=Rt32
535     case S2_pstorerhf_io:         // if (!Pv4) memh(Rs32+#u6:1)=Rt32
536     case S4_pstorerhtnew_io:      // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
537     case S4_pstorerhfnew_io:      // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
538     case S2_pstorerhnewt_io:      // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
539     case S2_pstorerhnewf_io:      // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
540     case S4_pstorerhnewtnew_io:   // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
541     case S4_pstorerhnewfnew_io:   // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
542     case S2_storerh_pi:           // memh(Rx32++#s4:1)=Rt32
543     case S2_storerhnew_pi:        // memh(Rx32++#s4:1)=Nt8.new
544     case S2_pstorerht_pi:         // if (Pv4) memh(Rx32++#s4:1)=Rt32
545     case S2_pstorerhf_pi:         // if (!Pv4) memh(Rx32++#s4:1)=Rt32
546     case S2_pstorerhtnew_pi:      // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
547     case S2_pstorerhfnew_pi:      // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
548     case S2_pstorerhnewt_pi:      // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
549     case S2_pstorerhnewf_pi:      // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
550     case S2_pstorerhnewtnew_pi:   // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
551     case S2_pstorerhnewfnew_pi:   // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
552     case S4_storerh_ap:           // memh(Re32=#U6)=Rt32
553     case S4_storerhnew_ap:        // memh(Re32=#U6)=Nt8.new
554     case S2_storerh_pr:           // memh(Rx32++Mu2)=Rt32
555     case S2_storerhnew_pr:        // memh(Rx32++Mu2)=Nt8.new
556     case S4_storerh_ur:           // memh(Ru32<<#u2+#U6)=Rt32
557     case S4_storerhnew_ur:        // memh(Ru32<<#u2+#U6)=Nt8.new
558     case S2_storerh_pbr:          // memh(Rx32++Mu2:brev)=Rt32
559     case S2_storerhnew_pbr:       // memh(Rx32++Mu2:brev)=Nt8.new
560     case S2_storerh_pci:          // memh(Rx32++#s4:1:circ(Mu2))=Rt32
561     case S2_storerhnew_pci:       // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
562     case S2_storerh_pcr:          // memh(Rx32++I:circ(Mu2))=Rt32
563     case S2_storerhnew_pcr:       // memh(Rx32++I:circ(Mu2))=Nt8.new
564     case S4_storerh_rr:           // memh(Rs32+Ru32<<#u2)=Rt32
565     case S4_pstorerht_rr:         // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
566     case S4_pstorerhf_rr:         // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
567     case S4_pstorerhtnew_rr:      // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
568     case S4_pstorerhfnew_rr:      // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
569     case S4_storerhnew_rr:        // memh(Rs32+Ru32<<#u2)=Nt8.new
570     case S4_pstorerhnewt_rr:      // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
571     case S4_pstorerhnewf_rr:      // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
572     case S4_pstorerhnewtnew_rr:   // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
573     case S4_pstorerhnewfnew_rr:   // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
574     case S2_storerhgp:            // memh(gp+#u16:1)=Rt32
575     case S2_storerhnewgp:         // memh(gp+#u16:1)=Nt8.new
576     case S4_pstorerht_abs:        // if (Pv4) memh(#u6)=Rt32
577     case S4_pstorerhf_abs:        // if (!Pv4) memh(#u6)=Rt32
578     case S4_pstorerhtnew_abs:     // if (Pv4.new) memh(#u6)=Rt32
579     case S4_pstorerhfnew_abs:     // if (!Pv4.new) memh(#u6)=Rt32
580     case S4_pstorerhnewt_abs:     // if (Pv4) memh(#u6)=Nt8.new
581     case S4_pstorerhnewf_abs:     // if (!Pv4) memh(#u6)=Nt8.new
582     case S4_pstorerhnewtnew_abs:  // if (Pv4.new) memh(#u6)=Nt8.new
583     case S4_pstorerhnewfnew_abs:  // if (!Pv4.new) memh(#u6)=Nt8.new
584       Bits.set(Begin, Begin+16);
585       return true;
586 
587     // Store high half
588     case S2_storerf_io:           // memh(Rs32+#s11:1)=Rt.H32
589     case S2_pstorerft_io:         // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
590     case S2_pstorerff_io:         // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
591     case S4_pstorerftnew_io:      // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
592     case S4_pstorerffnew_io:      // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
593     case S2_storerf_pi:           // memh(Rx32++#s4:1)=Rt.H32
594     case S2_pstorerft_pi:         // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
595     case S2_pstorerff_pi:         // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
596     case S2_pstorerftnew_pi:      // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
597     case S2_pstorerffnew_pi:      // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
598     case S4_storerf_ap:           // memh(Re32=#U6)=Rt.H32
599     case S2_storerf_pr:           // memh(Rx32++Mu2)=Rt.H32
600     case S4_storerf_ur:           // memh(Ru32<<#u2+#U6)=Rt.H32
601     case S2_storerf_pbr:          // memh(Rx32++Mu2:brev)=Rt.H32
602     case S2_storerf_pci:          // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
603     case S2_storerf_pcr:          // memh(Rx32++I:circ(Mu2))=Rt.H32
604     case S4_storerf_rr:           // memh(Rs32+Ru32<<#u2)=Rt.H32
605     case S4_pstorerft_rr:         // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
606     case S4_pstorerff_rr:         // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
607     case S4_pstorerftnew_rr:      // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
608     case S4_pstorerffnew_rr:      // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
609     case S2_storerfgp:            // memh(gp+#u16:1)=Rt.H32
610     case S4_pstorerft_abs:        // if (Pv4) memh(#u6)=Rt.H32
611     case S4_pstorerff_abs:        // if (!Pv4) memh(#u6)=Rt.H32
612     case S4_pstorerftnew_abs:     // if (Pv4.new) memh(#u6)=Rt.H32
613     case S4_pstorerffnew_abs:     // if (!Pv4.new) memh(#u6)=Rt.H32
614       Bits.set(Begin+16, Begin+32);
615       return true;
616   }
617 
618   return false;
619 }
620 
621 // For an instruction with opcode Opc, calculate the set of bits that it
622 // uses in a register in operand OpN. This only calculates the set of used
623 // bits for cases where it does not depend on any operands (as is the case
624 // in shifts, for example). For concrete instructions from a program, the
625 // operand may be a subregister of a larger register, while Bits would
626 // correspond to the larger register in its entirety. Because of that,
627 // the parameter Begin can be used to indicate which bit of Bits should be
628 // considered the LSB of the operand.
629 bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
630       BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
631   using namespace Hexagon;
632 
633   const MCInstrDesc &D = HII.get(Opc);
634   if (D.mayStore()) {
635     if (OpN == D.getNumOperands()-1)
636       return getUsedBitsInStore(Opc, Bits, Begin);
637     return false;
638   }
639 
640   switch (Opc) {
641     // One register source. Used bits: R1[0-7].
642     case A2_sxtb:
643     case A2_zxtb:
644     case A4_cmpbeqi:
645     case A4_cmpbgti:
646     case A4_cmpbgtui:
647       if (OpN == 1) {
648         Bits.set(Begin, Begin+8);
649         return true;
650       }
651       break;
652 
653     // One register source. Used bits: R1[0-15].
654     case A2_aslh:
655     case A2_sxth:
656     case A2_zxth:
657     case A4_cmpheqi:
658     case A4_cmphgti:
659     case A4_cmphgtui:
660       if (OpN == 1) {
661         Bits.set(Begin, Begin+16);
662         return true;
663       }
664       break;
665 
666     // One register source. Used bits: R1[16-31].
667     case A2_asrh:
668       if (OpN == 1) {
669         Bits.set(Begin+16, Begin+32);
670         return true;
671       }
672       break;
673 
674     // Two register sources. Used bits: R1[0-7], R2[0-7].
675     case A4_cmpbeq:
676     case A4_cmpbgt:
677     case A4_cmpbgtu:
678       if (OpN == 1) {
679         Bits.set(Begin, Begin+8);
680         return true;
681       }
682       break;
683 
684     // Two register sources. Used bits: R1[0-15], R2[0-15].
685     case A4_cmpheq:
686     case A4_cmphgt:
687     case A4_cmphgtu:
688     case A2_addh_h16_ll:
689     case A2_addh_h16_sat_ll:
690     case A2_addh_l16_ll:
691     case A2_addh_l16_sat_ll:
692     case A2_combine_ll:
693     case A2_subh_h16_ll:
694     case A2_subh_h16_sat_ll:
695     case A2_subh_l16_ll:
696     case A2_subh_l16_sat_ll:
697     case M2_mpy_acc_ll_s0:
698     case M2_mpy_acc_ll_s1:
699     case M2_mpy_acc_sat_ll_s0:
700     case M2_mpy_acc_sat_ll_s1:
701     case M2_mpy_ll_s0:
702     case M2_mpy_ll_s1:
703     case M2_mpy_nac_ll_s0:
704     case M2_mpy_nac_ll_s1:
705     case M2_mpy_nac_sat_ll_s0:
706     case M2_mpy_nac_sat_ll_s1:
707     case M2_mpy_rnd_ll_s0:
708     case M2_mpy_rnd_ll_s1:
709     case M2_mpy_sat_ll_s0:
710     case M2_mpy_sat_ll_s1:
711     case M2_mpy_sat_rnd_ll_s0:
712     case M2_mpy_sat_rnd_ll_s1:
713     case M2_mpyd_acc_ll_s0:
714     case M2_mpyd_acc_ll_s1:
715     case M2_mpyd_ll_s0:
716     case M2_mpyd_ll_s1:
717     case M2_mpyd_nac_ll_s0:
718     case M2_mpyd_nac_ll_s1:
719     case M2_mpyd_rnd_ll_s0:
720     case M2_mpyd_rnd_ll_s1:
721     case M2_mpyu_acc_ll_s0:
722     case M2_mpyu_acc_ll_s1:
723     case M2_mpyu_ll_s0:
724     case M2_mpyu_ll_s1:
725     case M2_mpyu_nac_ll_s0:
726     case M2_mpyu_nac_ll_s1:
727     case M2_mpyud_acc_ll_s0:
728     case M2_mpyud_acc_ll_s1:
729     case M2_mpyud_ll_s0:
730     case M2_mpyud_ll_s1:
731     case M2_mpyud_nac_ll_s0:
732     case M2_mpyud_nac_ll_s1:
733       if (OpN == 1 || OpN == 2) {
734         Bits.set(Begin, Begin+16);
735         return true;
736       }
737       break;
738 
739     // Two register sources. Used bits: R1[0-15], R2[16-31].
740     case A2_addh_h16_lh:
741     case A2_addh_h16_sat_lh:
742     case A2_combine_lh:
743     case A2_subh_h16_lh:
744     case A2_subh_h16_sat_lh:
745     case M2_mpy_acc_lh_s0:
746     case M2_mpy_acc_lh_s1:
747     case M2_mpy_acc_sat_lh_s0:
748     case M2_mpy_acc_sat_lh_s1:
749     case M2_mpy_lh_s0:
750     case M2_mpy_lh_s1:
751     case M2_mpy_nac_lh_s0:
752     case M2_mpy_nac_lh_s1:
753     case M2_mpy_nac_sat_lh_s0:
754     case M2_mpy_nac_sat_lh_s1:
755     case M2_mpy_rnd_lh_s0:
756     case M2_mpy_rnd_lh_s1:
757     case M2_mpy_sat_lh_s0:
758     case M2_mpy_sat_lh_s1:
759     case M2_mpy_sat_rnd_lh_s0:
760     case M2_mpy_sat_rnd_lh_s1:
761     case M2_mpyd_acc_lh_s0:
762     case M2_mpyd_acc_lh_s1:
763     case M2_mpyd_lh_s0:
764     case M2_mpyd_lh_s1:
765     case M2_mpyd_nac_lh_s0:
766     case M2_mpyd_nac_lh_s1:
767     case M2_mpyd_rnd_lh_s0:
768     case M2_mpyd_rnd_lh_s1:
769     case M2_mpyu_acc_lh_s0:
770     case M2_mpyu_acc_lh_s1:
771     case M2_mpyu_lh_s0:
772     case M2_mpyu_lh_s1:
773     case M2_mpyu_nac_lh_s0:
774     case M2_mpyu_nac_lh_s1:
775     case M2_mpyud_acc_lh_s0:
776     case M2_mpyud_acc_lh_s1:
777     case M2_mpyud_lh_s0:
778     case M2_mpyud_lh_s1:
779     case M2_mpyud_nac_lh_s0:
780     case M2_mpyud_nac_lh_s1:
781     // These four are actually LH.
782     case A2_addh_l16_hl:
783     case A2_addh_l16_sat_hl:
784     case A2_subh_l16_hl:
785     case A2_subh_l16_sat_hl:
786       if (OpN == 1) {
787         Bits.set(Begin, Begin+16);
788         return true;
789       }
790       if (OpN == 2) {
791         Bits.set(Begin+16, Begin+32);
792         return true;
793       }
794       break;
795 
796     // Two register sources, used bits: R1[16-31], R2[0-15].
797     case A2_addh_h16_hl:
798     case A2_addh_h16_sat_hl:
799     case A2_combine_hl:
800     case A2_subh_h16_hl:
801     case A2_subh_h16_sat_hl:
802     case M2_mpy_acc_hl_s0:
803     case M2_mpy_acc_hl_s1:
804     case M2_mpy_acc_sat_hl_s0:
805     case M2_mpy_acc_sat_hl_s1:
806     case M2_mpy_hl_s0:
807     case M2_mpy_hl_s1:
808     case M2_mpy_nac_hl_s0:
809     case M2_mpy_nac_hl_s1:
810     case M2_mpy_nac_sat_hl_s0:
811     case M2_mpy_nac_sat_hl_s1:
812     case M2_mpy_rnd_hl_s0:
813     case M2_mpy_rnd_hl_s1:
814     case M2_mpy_sat_hl_s0:
815     case M2_mpy_sat_hl_s1:
816     case M2_mpy_sat_rnd_hl_s0:
817     case M2_mpy_sat_rnd_hl_s1:
818     case M2_mpyd_acc_hl_s0:
819     case M2_mpyd_acc_hl_s1:
820     case M2_mpyd_hl_s0:
821     case M2_mpyd_hl_s1:
822     case M2_mpyd_nac_hl_s0:
823     case M2_mpyd_nac_hl_s1:
824     case M2_mpyd_rnd_hl_s0:
825     case M2_mpyd_rnd_hl_s1:
826     case M2_mpyu_acc_hl_s0:
827     case M2_mpyu_acc_hl_s1:
828     case M2_mpyu_hl_s0:
829     case M2_mpyu_hl_s1:
830     case M2_mpyu_nac_hl_s0:
831     case M2_mpyu_nac_hl_s1:
832     case M2_mpyud_acc_hl_s0:
833     case M2_mpyud_acc_hl_s1:
834     case M2_mpyud_hl_s0:
835     case M2_mpyud_hl_s1:
836     case M2_mpyud_nac_hl_s0:
837     case M2_mpyud_nac_hl_s1:
838       if (OpN == 1) {
839         Bits.set(Begin+16, Begin+32);
840         return true;
841       }
842       if (OpN == 2) {
843         Bits.set(Begin, Begin+16);
844         return true;
845       }
846       break;
847 
848     // Two register sources, used bits: R1[16-31], R2[16-31].
849     case A2_addh_h16_hh:
850     case A2_addh_h16_sat_hh:
851     case A2_combine_hh:
852     case A2_subh_h16_hh:
853     case A2_subh_h16_sat_hh:
854     case M2_mpy_acc_hh_s0:
855     case M2_mpy_acc_hh_s1:
856     case M2_mpy_acc_sat_hh_s0:
857     case M2_mpy_acc_sat_hh_s1:
858     case M2_mpy_hh_s0:
859     case M2_mpy_hh_s1:
860     case M2_mpy_nac_hh_s0:
861     case M2_mpy_nac_hh_s1:
862     case M2_mpy_nac_sat_hh_s0:
863     case M2_mpy_nac_sat_hh_s1:
864     case M2_mpy_rnd_hh_s0:
865     case M2_mpy_rnd_hh_s1:
866     case M2_mpy_sat_hh_s0:
867     case M2_mpy_sat_hh_s1:
868     case M2_mpy_sat_rnd_hh_s0:
869     case M2_mpy_sat_rnd_hh_s1:
870     case M2_mpyd_acc_hh_s0:
871     case M2_mpyd_acc_hh_s1:
872     case M2_mpyd_hh_s0:
873     case M2_mpyd_hh_s1:
874     case M2_mpyd_nac_hh_s0:
875     case M2_mpyd_nac_hh_s1:
876     case M2_mpyd_rnd_hh_s0:
877     case M2_mpyd_rnd_hh_s1:
878     case M2_mpyu_acc_hh_s0:
879     case M2_mpyu_acc_hh_s1:
880     case M2_mpyu_hh_s0:
881     case M2_mpyu_hh_s1:
882     case M2_mpyu_nac_hh_s0:
883     case M2_mpyu_nac_hh_s1:
884     case M2_mpyud_acc_hh_s0:
885     case M2_mpyud_acc_hh_s1:
886     case M2_mpyud_hh_s0:
887     case M2_mpyud_hh_s1:
888     case M2_mpyud_nac_hh_s0:
889     case M2_mpyud_nac_hh_s1:
890       if (OpN == 1 || OpN == 2) {
891         Bits.set(Begin+16, Begin+32);
892         return true;
893       }
894       break;
895   }
896 
897   return false;
898 }
899 
900 // Calculate the register class that matches Reg:Sub. For example, if
901 // %1 is a double register, then %1:isub_hi would match the "int"
902 // register class.
903 const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
904       const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
905   if (!RR.Reg.isVirtual())
906     return nullptr;
907   auto *RC = MRI.getRegClass(RR.Reg);
908   if (RR.Sub == 0)
909     return RC;
910   auto &HRI = static_cast<const HexagonRegisterInfo&>(
911                   *MRI.getTargetRegisterInfo());
912 
913   auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void {
914     (void)HRI;
915     assert(Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_lo) ||
916            Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_hi));
917   };
918 
919   switch (RC->getID()) {
920     case Hexagon::DoubleRegsRegClassID:
921       VerifySR(RC, RR.Sub);
922       return &Hexagon::IntRegsRegClass;
923     case Hexagon::HvxWRRegClassID:
924       VerifySR(RC, RR.Sub);
925       return &Hexagon::HvxVRRegClass;
926   }
927   return nullptr;
928 }
929 
930 // Check if RD could be replaced with RS at any possible use of RD.
931 // For example a predicate register cannot be replaced with a integer
932 // register, but a 64-bit register with a subregister can be replaced
933 // with a 32-bit register.
934 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
935       const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
936   if (!RD.Reg.isVirtual() || !RS.Reg.isVirtual())
937     return false;
938   // Return false if one (or both) classes are nullptr.
939   auto *DRC = getFinalVRegClass(RD, MRI);
940   if (!DRC)
941     return false;
942 
943   return DRC == getFinalVRegClass(RS, MRI);
944 }
945 
946 bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
947       unsigned NewSub) {
948   if (!PreserveTiedOps)
949     return false;
950   return llvm::any_of(MRI.use_operands(Reg),
951                       [NewSub] (const MachineOperand &Op) -> bool {
952                         return Op.getSubReg() != NewSub && Op.isTied();
953                       });
954 }
955 
956 namespace {
957 
958   class DeadCodeElimination {
959   public:
960     DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
961       : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
962         MDT(mdt), MRI(mf.getRegInfo()) {}
963 
964     bool run() {
965       return runOnNode(MDT.getRootNode());
966     }
967 
968   private:
969     bool isDead(unsigned R) const;
970     bool runOnNode(MachineDomTreeNode *N);
971 
972     MachineFunction &MF;
973     const HexagonInstrInfo &HII;
974     MachineDominatorTree &MDT;
975     MachineRegisterInfo &MRI;
976   };
977 
978 } // end anonymous namespace
979 
980 bool DeadCodeElimination::isDead(unsigned R) const {
981   for (const MachineOperand &MO : MRI.use_operands(R)) {
982     const MachineInstr *UseI = MO.getParent();
983     if (UseI->isDebugInstr())
984       continue;
985     if (UseI->isPHI()) {
986       assert(!UseI->getOperand(0).getSubReg());
987       Register DR = UseI->getOperand(0).getReg();
988       if (DR == R)
989         continue;
990     }
991     return false;
992   }
993   return true;
994 }
995 
996 bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
997   bool Changed = false;
998 
999   for (auto *DTN : children<MachineDomTreeNode*>(N))
1000     Changed |= runOnNode(DTN);
1001 
1002   MachineBasicBlock *B = N->getBlock();
1003   std::vector<MachineInstr*> Instrs;
1004   for (MachineInstr &MI : llvm::reverse(*B))
1005     Instrs.push_back(&MI);
1006 
1007   for (auto *MI : Instrs) {
1008     unsigned Opc = MI->getOpcode();
1009     // Do not touch lifetime markers. This is why the target-independent DCE
1010     // cannot be used.
1011     if (Opc == TargetOpcode::LIFETIME_START ||
1012         Opc == TargetOpcode::LIFETIME_END)
1013       continue;
1014     bool Store = false;
1015     if (MI->isInlineAsm())
1016       continue;
1017     // Delete PHIs if possible.
1018     if (!MI->isPHI() && !MI->isSafeToMove(Store))
1019       continue;
1020 
1021     bool AllDead = true;
1022     SmallVector<unsigned,2> Regs;
1023     for (auto &Op : MI->operands()) {
1024       if (!Op.isReg() || !Op.isDef())
1025         continue;
1026       Register R = Op.getReg();
1027       if (!R.isVirtual() || !isDead(R)) {
1028         AllDead = false;
1029         break;
1030       }
1031       Regs.push_back(R);
1032     }
1033     if (!AllDead)
1034       continue;
1035 
1036     B->erase(MI);
1037     for (unsigned Reg : Regs)
1038       MRI.markUsesInDebugValueAsUndef(Reg);
1039     Changed = true;
1040   }
1041 
1042   return Changed;
1043 }
1044 
1045 namespace {
1046 
1047 // Eliminate redundant instructions
1048 //
1049 // This transformation will identify instructions where the output register
1050 // is the same as one of its input registers. This only works on instructions
1051 // that define a single register (unlike post-increment loads, for example).
1052 // The equality check is actually more detailed: the code calculates which
1053 // bits of the output are used, and only compares these bits with the input
1054 // registers.
1055 // If the output matches an input, the instruction is replaced with COPY.
1056 // The copies will be removed by another transformation.
1057   class RedundantInstrElimination : public Transformation {
1058   public:
1059     RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
1060           const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1061         : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
1062 
1063     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1064 
1065   private:
1066     bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
1067           unsigned &LostB, unsigned &LostE);
1068     bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
1069           unsigned &LostB, unsigned &LostE);
1070     bool computeUsedBits(unsigned Reg, BitVector &Bits);
1071     bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
1072           uint16_t Begin);
1073     bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
1074 
1075     const HexagonInstrInfo &HII;
1076     const HexagonRegisterInfo &HRI;
1077     MachineRegisterInfo &MRI;
1078     BitTracker &BT;
1079   };
1080 
1081 } // end anonymous namespace
1082 
1083 // Check if the instruction is a lossy shift left, where the input being
1084 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1085 // of bit indices that are lost.
1086 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
1087       unsigned OpN, unsigned &LostB, unsigned &LostE) {
1088   using namespace Hexagon;
1089 
1090   unsigned Opc = MI.getOpcode();
1091   unsigned ImN, RegN, Width;
1092   switch (Opc) {
1093     case S2_asl_i_p:
1094       ImN = 2;
1095       RegN = 1;
1096       Width = 64;
1097       break;
1098     case S2_asl_i_p_acc:
1099     case S2_asl_i_p_and:
1100     case S2_asl_i_p_nac:
1101     case S2_asl_i_p_or:
1102     case S2_asl_i_p_xacc:
1103       ImN = 3;
1104       RegN = 2;
1105       Width = 64;
1106       break;
1107     case S2_asl_i_r:
1108       ImN = 2;
1109       RegN = 1;
1110       Width = 32;
1111       break;
1112     case S2_addasl_rrri:
1113     case S4_andi_asl_ri:
1114     case S4_ori_asl_ri:
1115     case S4_addi_asl_ri:
1116     case S4_subi_asl_ri:
1117     case S2_asl_i_r_acc:
1118     case S2_asl_i_r_and:
1119     case S2_asl_i_r_nac:
1120     case S2_asl_i_r_or:
1121     case S2_asl_i_r_sat:
1122     case S2_asl_i_r_xacc:
1123       ImN = 3;
1124       RegN = 2;
1125       Width = 32;
1126       break;
1127     default:
1128       return false;
1129   }
1130 
1131   if (RegN != OpN)
1132     return false;
1133 
1134   assert(MI.getOperand(ImN).isImm());
1135   unsigned S = MI.getOperand(ImN).getImm();
1136   if (S == 0)
1137     return false;
1138   LostB = Width-S;
1139   LostE = Width;
1140   return true;
1141 }
1142 
1143 // Check if the instruction is a lossy shift right, where the input being
1144 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1145 // of bit indices that are lost.
1146 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
1147       unsigned OpN, unsigned &LostB, unsigned &LostE) {
1148   using namespace Hexagon;
1149 
1150   unsigned Opc = MI.getOpcode();
1151   unsigned ImN, RegN;
1152   switch (Opc) {
1153     case S2_asr_i_p:
1154     case S2_lsr_i_p:
1155       ImN = 2;
1156       RegN = 1;
1157       break;
1158     case S2_asr_i_p_acc:
1159     case S2_asr_i_p_and:
1160     case S2_asr_i_p_nac:
1161     case S2_asr_i_p_or:
1162     case S2_lsr_i_p_acc:
1163     case S2_lsr_i_p_and:
1164     case S2_lsr_i_p_nac:
1165     case S2_lsr_i_p_or:
1166     case S2_lsr_i_p_xacc:
1167       ImN = 3;
1168       RegN = 2;
1169       break;
1170     case S2_asr_i_r:
1171     case S2_lsr_i_r:
1172       ImN = 2;
1173       RegN = 1;
1174       break;
1175     case S4_andi_lsr_ri:
1176     case S4_ori_lsr_ri:
1177     case S4_addi_lsr_ri:
1178     case S4_subi_lsr_ri:
1179     case S2_asr_i_r_acc:
1180     case S2_asr_i_r_and:
1181     case S2_asr_i_r_nac:
1182     case S2_asr_i_r_or:
1183     case S2_lsr_i_r_acc:
1184     case S2_lsr_i_r_and:
1185     case S2_lsr_i_r_nac:
1186     case S2_lsr_i_r_or:
1187     case S2_lsr_i_r_xacc:
1188       ImN = 3;
1189       RegN = 2;
1190       break;
1191 
1192     default:
1193       return false;
1194   }
1195 
1196   if (RegN != OpN)
1197     return false;
1198 
1199   assert(MI.getOperand(ImN).isImm());
1200   unsigned S = MI.getOperand(ImN).getImm();
1201   LostB = 0;
1202   LostE = S;
1203   return true;
1204 }
1205 
1206 // Calculate the bit vector that corresponds to the used bits of register Reg.
1207 // The vector Bits has the same size, as the size of Reg in bits. If the cal-
1208 // culation fails (i.e. the used bits are unknown), it returns false. Other-
1209 // wise, it returns true and sets the corresponding bits in Bits.
1210 bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
1211   BitVector Used(Bits.size());
1212   RegisterSet Visited;
1213   std::vector<unsigned> Pending;
1214   Pending.push_back(Reg);
1215 
1216   for (unsigned i = 0; i < Pending.size(); ++i) {
1217     unsigned R = Pending[i];
1218     if (Visited.has(R))
1219       continue;
1220     Visited.insert(R);
1221     for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
1222       BitTracker::RegisterRef UR = *I;
1223       unsigned B, W;
1224       if (!HBS::getSubregMask(UR, B, W, MRI))
1225         return false;
1226       MachineInstr &UseI = *I->getParent();
1227       if (UseI.isPHI() || UseI.isCopy()) {
1228         Register DefR = UseI.getOperand(0).getReg();
1229         if (!DefR.isVirtual())
1230           return false;
1231         Pending.push_back(DefR);
1232       } else {
1233         if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
1234           return false;
1235       }
1236     }
1237   }
1238   Bits |= Used;
1239   return true;
1240 }
1241 
1242 // Calculate the bits used by instruction MI in a register in operand OpN.
1243 // Return true/false if the calculation succeeds/fails. If is succeeds, set
1244 // used bits in Bits. This function does not reset any bits in Bits, so
1245 // subsequent calls over different instructions will result in the union
1246 // of the used bits in all these instructions.
1247 // The register in question may be used with a sub-register, whereas Bits
1248 // holds the bits for the entire register. To keep track of that, the
1249 // argument Begin indicates where in Bits is the lowest-significant bit
1250 // of the register used in operand OpN. For example, in instruction:
1251 //   %1 = S2_lsr_i_r %2:isub_hi, 10
1252 // the operand 1 is a 32-bit register, which happens to be a subregister
1253 // of the 64-bit register %2, and that subregister starts at position 32.
1254 // In this case Begin=32, since Bits[32] would be the lowest-significant bit
1255 // of %2:isub_hi.
1256 bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
1257       unsigned OpN, BitVector &Bits, uint16_t Begin) {
1258   unsigned Opc = MI.getOpcode();
1259   BitVector T(Bits.size());
1260   bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
1261   // Even if we don't have bits yet, we could still provide some information
1262   // if the instruction is a lossy shift: the lost bits will be marked as
1263   // not used.
1264   unsigned LB, LE;
1265   if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
1266     assert(MI.getOperand(OpN).isReg());
1267     BitTracker::RegisterRef RR = MI.getOperand(OpN);
1268     const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
1269     uint16_t Width = HRI.getRegSizeInBits(*RC);
1270 
1271     if (!GotBits)
1272       T.set(Begin, Begin+Width);
1273     assert(LB <= LE && LB < Width && LE <= Width);
1274     T.reset(Begin+LB, Begin+LE);
1275     GotBits = true;
1276   }
1277   if (GotBits)
1278     Bits |= T;
1279   return GotBits;
1280 }
1281 
1282 // Calculates the used bits in RD ("defined register"), and checks if these
1283 // bits in RS ("used register") and RD are identical.
1284 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
1285       BitTracker::RegisterRef RS) {
1286   const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
1287   const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1288 
1289   unsigned DB, DW;
1290   if (!HBS::getSubregMask(RD, DB, DW, MRI))
1291     return false;
1292   unsigned SB, SW;
1293   if (!HBS::getSubregMask(RS, SB, SW, MRI))
1294     return false;
1295   if (SW != DW)
1296     return false;
1297 
1298   BitVector Used(DC.width());
1299   if (!computeUsedBits(RD.Reg, Used))
1300     return false;
1301 
1302   for (unsigned i = 0; i != DW; ++i)
1303     if (Used[i+DB] && DC[DB+i] != SC[SB+i])
1304       return false;
1305   return true;
1306 }
1307 
1308 bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
1309       const RegisterSet&) {
1310   if (!BT.reached(&B))
1311     return false;
1312   bool Changed = false;
1313 
1314   for (auto I = B.begin(), E = B.end(); I != E; ++I) {
1315     MachineInstr *MI = &*I;
1316 
1317     if (MI->getOpcode() == TargetOpcode::COPY)
1318       continue;
1319     if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
1320       continue;
1321     unsigned NumD = MI->getDesc().getNumDefs();
1322     if (NumD != 1)
1323       continue;
1324 
1325     BitTracker::RegisterRef RD = MI->getOperand(0);
1326     if (!BT.has(RD.Reg))
1327       continue;
1328     const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
1329     auto At = MachineBasicBlock::iterator(MI);
1330 
1331     // Find a source operand that is equal to the result.
1332     for (auto &Op : MI->uses()) {
1333       if (!Op.isReg())
1334         continue;
1335       BitTracker::RegisterRef RS = Op;
1336       if (!BT.has(RS.Reg))
1337         continue;
1338       if (!HBS::isTransparentCopy(RD, RS, MRI))
1339         continue;
1340 
1341       unsigned BN, BW;
1342       if (!HBS::getSubregMask(RS, BN, BW, MRI))
1343         continue;
1344 
1345       const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1346       if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
1347         continue;
1348 
1349       // If found, replace the instruction with a COPY.
1350       const DebugLoc &DL = MI->getDebugLoc();
1351       const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
1352       Register NewR = MRI.createVirtualRegister(FRC);
1353       MachineInstr *CopyI =
1354           BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1355             .addReg(RS.Reg, 0, RS.Sub);
1356       HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
1357       // This pass can create copies between registers that don't have the
1358       // exact same values. Updating the tracker has to involve updating
1359       // all dependent cells. Example:
1360       //   %1  = inst %2     ; %1 != %2, but used bits are equal
1361       //
1362       //   %3  = copy %2     ; <- inserted
1363       //   ... = %3          ; <- replaced from %2
1364       // Indirectly, we can create a "copy" between %1 and %2 even
1365       // though their exact values do not match.
1366       BT.visit(*CopyI);
1367       Changed = true;
1368       break;
1369     }
1370   }
1371 
1372   return Changed;
1373 }
1374 
1375 namespace {
1376 
1377 // Recognize instructions that produce constant values known at compile-time.
1378 // Replace them with register definitions that load these constants directly.
1379   class ConstGeneration : public Transformation {
1380   public:
1381     ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1382         MachineRegisterInfo &mri)
1383       : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
1384 
1385     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1386     static bool isTfrConst(const MachineInstr &MI);
1387 
1388   private:
1389     Register genTfrConst(const TargetRegisterClass *RC, int64_t C,
1390                          MachineBasicBlock &B, MachineBasicBlock::iterator At,
1391                          DebugLoc &DL);
1392 
1393     const HexagonInstrInfo &HII;
1394     MachineRegisterInfo &MRI;
1395     BitTracker &BT;
1396   };
1397 
1398 } // end anonymous namespace
1399 
1400 bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
1401   unsigned Opc = MI.getOpcode();
1402   switch (Opc) {
1403     case Hexagon::A2_combineii:
1404     case Hexagon::A4_combineii:
1405     case Hexagon::A2_tfrsi:
1406     case Hexagon::A2_tfrpi:
1407     case Hexagon::PS_true:
1408     case Hexagon::PS_false:
1409     case Hexagon::CONST32:
1410     case Hexagon::CONST64:
1411       return true;
1412   }
1413   return false;
1414 }
1415 
1416 // Generate a transfer-immediate instruction that is appropriate for the
1417 // register class and the actual value being transferred.
1418 Register ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
1419                                       MachineBasicBlock &B,
1420                                       MachineBasicBlock::iterator At,
1421                                       DebugLoc &DL) {
1422   Register Reg = MRI.createVirtualRegister(RC);
1423   if (RC == &Hexagon::IntRegsRegClass) {
1424     BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
1425         .addImm(int32_t(C));
1426     return Reg;
1427   }
1428 
1429   if (RC == &Hexagon::DoubleRegsRegClass) {
1430     if (isInt<8>(C)) {
1431       BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
1432           .addImm(C);
1433       return Reg;
1434     }
1435 
1436     unsigned Lo = Lo_32(C), Hi = Hi_32(C);
1437     if (isInt<8>(Lo) || isInt<8>(Hi)) {
1438       unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
1439                                   : Hexagon::A4_combineii;
1440       BuildMI(B, At, DL, HII.get(Opc), Reg)
1441           .addImm(int32_t(Hi))
1442           .addImm(int32_t(Lo));
1443       return Reg;
1444     }
1445     MachineFunction *MF = B.getParent();
1446     auto &HST = MF->getSubtarget<HexagonSubtarget>();
1447 
1448     // Disable CONST64 for tiny core since it takes a LD resource.
1449     if (!HST.isTinyCore() ||
1450         MF->getFunction().hasOptSize()) {
1451       BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg)
1452           .addImm(C);
1453       return Reg;
1454     }
1455   }
1456 
1457   if (RC == &Hexagon::PredRegsRegClass) {
1458     unsigned Opc;
1459     if (C == 0)
1460       Opc = Hexagon::PS_false;
1461     else if ((C & 0xFF) == 0xFF)
1462       Opc = Hexagon::PS_true;
1463     else
1464       return 0;
1465     BuildMI(B, At, DL, HII.get(Opc), Reg);
1466     return Reg;
1467   }
1468 
1469   return 0;
1470 }
1471 
1472 bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
1473   if (!BT.reached(&B))
1474     return false;
1475   bool Changed = false;
1476   RegisterSet Defs;
1477 
1478   for (auto I = B.begin(), E = B.end(); I != E; ++I) {
1479     if (isTfrConst(*I))
1480       continue;
1481     Defs.clear();
1482     HBS::getInstrDefs(*I, Defs);
1483     if (Defs.count() != 1)
1484       continue;
1485     Register DR = Defs.find_first();
1486     if (!DR.isVirtual())
1487       continue;
1488     uint64_t U;
1489     const BitTracker::RegisterCell &DRC = BT.lookup(DR);
1490     if (HBS::getConst(DRC, 0, DRC.width(), U)) {
1491       int64_t C = U;
1492       DebugLoc DL = I->getDebugLoc();
1493       auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1494       Register ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
1495       if (ImmReg) {
1496         HBS::replaceReg(DR, ImmReg, MRI);
1497         BT.put(ImmReg, DRC);
1498         Changed = true;
1499       }
1500     }
1501   }
1502   return Changed;
1503 }
1504 
1505 namespace {
1506 
1507 // Identify pairs of available registers which hold identical values.
1508 // In such cases, only one of them needs to be calculated, the other one
1509 // will be defined as a copy of the first.
1510   class CopyGeneration : public Transformation {
1511   public:
1512     CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1513         const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1514       : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
1515 
1516     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1517 
1518   private:
1519     bool findMatch(const BitTracker::RegisterRef &Inp,
1520         BitTracker::RegisterRef &Out, const RegisterSet &AVs);
1521 
1522     const HexagonInstrInfo &HII;
1523     const HexagonRegisterInfo &HRI;
1524     MachineRegisterInfo &MRI;
1525     BitTracker &BT;
1526     RegisterSet Forbidden;
1527   };
1528 
1529 // Eliminate register copies RD = RS, by replacing the uses of RD with
1530 // with uses of RS.
1531   class CopyPropagation : public Transformation {
1532   public:
1533     CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1534         : Transformation(false), HRI(hri), MRI(mri) {}
1535 
1536     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1537 
1538     static bool isCopyReg(unsigned Opc, bool NoConv);
1539 
1540   private:
1541     bool propagateRegCopy(MachineInstr &MI);
1542 
1543     const HexagonRegisterInfo &HRI;
1544     MachineRegisterInfo &MRI;
1545   };
1546 
1547 } // end anonymous namespace
1548 
1549 /// Check if there is a register in AVs that is identical to Inp. If so,
1550 /// set Out to the found register. The output may be a pair Reg:Sub.
1551 bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
1552       BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
1553   if (!BT.has(Inp.Reg))
1554     return false;
1555   const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
1556   auto *FRC = HBS::getFinalVRegClass(Inp, MRI);
1557   unsigned B, W;
1558   if (!HBS::getSubregMask(Inp, B, W, MRI))
1559     return false;
1560 
1561   for (Register R = AVs.find_first(); R; R = AVs.find_next(R)) {
1562     if (!BT.has(R) || Forbidden[R])
1563       continue;
1564     const BitTracker::RegisterCell &RC = BT.lookup(R);
1565     unsigned RW = RC.width();
1566     if (W == RW) {
1567       if (FRC != MRI.getRegClass(R))
1568         continue;
1569       if (!HBS::isTransparentCopy(R, Inp, MRI))
1570         continue;
1571       if (!HBS::isEqual(InpRC, B, RC, 0, W))
1572         continue;
1573       Out.Reg = R;
1574       Out.Sub = 0;
1575       return true;
1576     }
1577     // Check if there is a super-register, whose part (with a subregister)
1578     // is equal to the input.
1579     // Only do double registers for now.
1580     if (W*2 != RW)
1581       continue;
1582     if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
1583       continue;
1584 
1585     if (HBS::isEqual(InpRC, B, RC, 0, W))
1586       Out.Sub = Hexagon::isub_lo;
1587     else if (HBS::isEqual(InpRC, B, RC, W, W))
1588       Out.Sub = Hexagon::isub_hi;
1589     else
1590       continue;
1591     Out.Reg = R;
1592     if (HBS::isTransparentCopy(Out, Inp, MRI))
1593       return true;
1594   }
1595   return false;
1596 }
1597 
1598 bool CopyGeneration::processBlock(MachineBasicBlock &B,
1599       const RegisterSet &AVs) {
1600   if (!BT.reached(&B))
1601     return false;
1602   RegisterSet AVB(AVs);
1603   bool Changed = false;
1604   RegisterSet Defs;
1605 
1606   for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
1607     Defs.clear();
1608     HBS::getInstrDefs(*I, Defs);
1609 
1610     unsigned Opc = I->getOpcode();
1611     if (CopyPropagation::isCopyReg(Opc, false) ||
1612         ConstGeneration::isTfrConst(*I))
1613       continue;
1614 
1615     DebugLoc DL = I->getDebugLoc();
1616     auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1617 
1618     for (Register R = Defs.find_first(); R; R = Defs.find_next(R)) {
1619       BitTracker::RegisterRef MR;
1620       auto *FRC = HBS::getFinalVRegClass(R, MRI);
1621 
1622       if (findMatch(R, MR, AVB)) {
1623         Register NewR = MRI.createVirtualRegister(FRC);
1624         BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1625           .addReg(MR.Reg, 0, MR.Sub);
1626         BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
1627         HBS::replaceReg(R, NewR, MRI);
1628         Forbidden.insert(R);
1629         continue;
1630       }
1631 
1632       if (FRC == &Hexagon::DoubleRegsRegClass ||
1633           FRC == &Hexagon::HvxWRRegClass) {
1634         // Try to generate REG_SEQUENCE.
1635         unsigned SubLo = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_lo);
1636         unsigned SubHi = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_hi);
1637         BitTracker::RegisterRef TL = { R, SubLo };
1638         BitTracker::RegisterRef TH = { R, SubHi };
1639         BitTracker::RegisterRef ML, MH;
1640         if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
1641           auto *FRC = HBS::getFinalVRegClass(R, MRI);
1642           Register NewR = MRI.createVirtualRegister(FRC);
1643           BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
1644             .addReg(ML.Reg, 0, ML.Sub)
1645             .addImm(SubLo)
1646             .addReg(MH.Reg, 0, MH.Sub)
1647             .addImm(SubHi);
1648           BT.put(BitTracker::RegisterRef(NewR), BT.get(R));
1649           HBS::replaceReg(R, NewR, MRI);
1650           Forbidden.insert(R);
1651         }
1652       }
1653     }
1654   }
1655 
1656   return Changed;
1657 }
1658 
1659 bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) {
1660   switch (Opc) {
1661     case TargetOpcode::COPY:
1662     case TargetOpcode::REG_SEQUENCE:
1663     case Hexagon::A4_combineir:
1664     case Hexagon::A4_combineri:
1665       return true;
1666     case Hexagon::A2_tfr:
1667     case Hexagon::A2_tfrp:
1668     case Hexagon::A2_combinew:
1669     case Hexagon::V6_vcombine:
1670       return NoConv;
1671     default:
1672       break;
1673   }
1674   return false;
1675 }
1676 
1677 bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
1678   bool Changed = false;
1679   unsigned Opc = MI.getOpcode();
1680   BitTracker::RegisterRef RD = MI.getOperand(0);
1681   assert(MI.getOperand(0).getSubReg() == 0);
1682 
1683   switch (Opc) {
1684     case TargetOpcode::COPY:
1685     case Hexagon::A2_tfr:
1686     case Hexagon::A2_tfrp: {
1687       BitTracker::RegisterRef RS = MI.getOperand(1);
1688       if (!HBS::isTransparentCopy(RD, RS, MRI))
1689         break;
1690       if (RS.Sub != 0)
1691         Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
1692       else
1693         Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
1694       break;
1695     }
1696     case TargetOpcode::REG_SEQUENCE: {
1697       BitTracker::RegisterRef SL, SH;
1698       if (HBS::parseRegSequence(MI, SL, SH, MRI)) {
1699         const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);
1700         unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
1701         unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
1702         Changed  = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI);
1703         Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI);
1704       }
1705       break;
1706     }
1707     case Hexagon::A2_combinew:
1708     case Hexagon::V6_vcombine: {
1709       const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);
1710       unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
1711       unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
1712       BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
1713       Changed  = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI);
1714       Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI);
1715       break;
1716     }
1717     case Hexagon::A4_combineir:
1718     case Hexagon::A4_combineri: {
1719       unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
1720       unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo
1721                                                     : Hexagon::isub_hi;
1722       BitTracker::RegisterRef RS = MI.getOperand(SrcX);
1723       Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
1724       break;
1725     }
1726   }
1727   return Changed;
1728 }
1729 
1730 bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
1731   std::vector<MachineInstr*> Instrs;
1732   for (MachineInstr &MI : llvm::reverse(B))
1733     Instrs.push_back(&MI);
1734 
1735   bool Changed = false;
1736   for (auto *I : Instrs) {
1737     unsigned Opc = I->getOpcode();
1738     if (!CopyPropagation::isCopyReg(Opc, true))
1739       continue;
1740     Changed |= propagateRegCopy(*I);
1741   }
1742 
1743   return Changed;
1744 }
1745 
1746 namespace {
1747 
1748 // Recognize patterns that can be simplified and replace them with the
1749 // simpler forms.
1750 // This is by no means complete
1751   class BitSimplification : public Transformation {
1752   public:
1753     BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt,
1754         const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri,
1755         MachineRegisterInfo &mri, MachineFunction &mf)
1756       : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri),
1757         MF(mf), BT(bt) {}
1758 
1759     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1760 
1761   private:
1762     struct RegHalf : public BitTracker::RegisterRef {
1763       bool Low;  // Low/High halfword.
1764     };
1765 
1766     bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
1767           unsigned B, RegHalf &RH);
1768     bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum);
1769 
1770     bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
1771           BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
1772     unsigned getCombineOpcode(bool HLow, bool LLow);
1773 
1774     bool genStoreUpperHalf(MachineInstr *MI);
1775     bool genStoreImmediate(MachineInstr *MI);
1776     bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
1777           const BitTracker::RegisterCell &RC);
1778     bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1779           const BitTracker::RegisterCell &RC);
1780     bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1781           const BitTracker::RegisterCell &RC);
1782     bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
1783           const BitTracker::RegisterCell &RC);
1784     bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD,
1785           const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
1786     bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
1787           const BitTracker::RegisterCell &RC);
1788     bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
1789           const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
1790     bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD);
1791 
1792     // Cache of created instructions to avoid creating duplicates.
1793     // XXX Currently only used by genBitSplit.
1794     std::vector<MachineInstr*> NewMIs;
1795 
1796     const MachineDominatorTree &MDT;
1797     const HexagonInstrInfo &HII;
1798     const HexagonRegisterInfo &HRI;
1799     MachineRegisterInfo &MRI;
1800     MachineFunction &MF;
1801     BitTracker &BT;
1802   };
1803 
1804 } // end anonymous namespace
1805 
1806 // Check if the bits [B..B+16) in register cell RC form a valid halfword,
1807 // i.e. [0..16), [16..32), etc. of some register. If so, return true and
1808 // set the information about the found register in RH.
1809 bool BitSimplification::matchHalf(unsigned SelfR,
1810       const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
1811   // XXX This could be searching in the set of available registers, in case
1812   // the match is not exact.
1813 
1814   // Match 16-bit chunks, where the RC[B..B+15] references exactly one
1815   // register and all the bits B..B+15 match between RC and the register.
1816   // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
1817   // and RC = { [0]:0 [1-15]:v1[1-15]... }.
1818   bool Low = false;
1819   unsigned I = B;
1820   while (I < B+16 && RC[I].num())
1821     I++;
1822   if (I == B+16)
1823     return false;
1824 
1825   Register Reg = RC[I].RefI.Reg;
1826   unsigned P = RC[I].RefI.Pos;    // The RefI.Pos will be advanced by I-B.
1827   if (P < I-B)
1828     return false;
1829   unsigned Pos = P - (I-B);
1830 
1831   if (Reg == 0 || Reg == SelfR)    // Don't match "self".
1832     return false;
1833   if (!Reg.isVirtual())
1834     return false;
1835   if (!BT.has(Reg))
1836     return false;
1837 
1838   const BitTracker::RegisterCell &SC = BT.lookup(Reg);
1839   if (Pos+16 > SC.width())
1840     return false;
1841 
1842   for (unsigned i = 0; i < 16; ++i) {
1843     const BitTracker::BitValue &RV = RC[i+B];
1844     if (RV.Type == BitTracker::BitValue::Ref) {
1845       if (RV.RefI.Reg != Reg)
1846         return false;
1847       if (RV.RefI.Pos != i+Pos)
1848         return false;
1849       continue;
1850     }
1851     if (RC[i+B] != SC[i+Pos])
1852       return false;
1853   }
1854 
1855   unsigned Sub = 0;
1856   switch (Pos) {
1857     case 0:
1858       Sub = Hexagon::isub_lo;
1859       Low = true;
1860       break;
1861     case 16:
1862       Sub = Hexagon::isub_lo;
1863       Low = false;
1864       break;
1865     case 32:
1866       Sub = Hexagon::isub_hi;
1867       Low = true;
1868       break;
1869     case 48:
1870       Sub = Hexagon::isub_hi;
1871       Low = false;
1872       break;
1873     default:
1874       return false;
1875   }
1876 
1877   RH.Reg = Reg;
1878   RH.Sub = Sub;
1879   RH.Low = Low;
1880   // If the subregister is not valid with the register, set it to 0.
1881   if (!HBS::getFinalVRegClass(RH, MRI))
1882     RH.Sub = 0;
1883 
1884   return true;
1885 }
1886 
1887 bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc,
1888       unsigned OpNum) {
1889   auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);
1890   auto *RRC = HBS::getFinalVRegClass(R, MRI);
1891   return OpRC->hasSubClassEq(RRC);
1892 }
1893 
1894 // Check if RC matches the pattern of a S2_packhl. If so, return true and
1895 // set the inputs Rs and Rt.
1896 bool BitSimplification::matchPackhl(unsigned SelfR,
1897       const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,
1898       BitTracker::RegisterRef &Rt) {
1899   RegHalf L1, H1, L2, H2;
1900 
1901   if (!matchHalf(SelfR, RC, 0, L2)  || !matchHalf(SelfR, RC, 16, L1))
1902     return false;
1903   if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
1904     return false;
1905 
1906   // Rs = H1.L1, Rt = H2.L2
1907   if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
1908     return false;
1909   if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
1910     return false;
1911 
1912   Rs = H1;
1913   Rt = H2;
1914   return true;
1915 }
1916 
1917 unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
1918   return HLow ? LLow ? Hexagon::A2_combine_ll
1919                      : Hexagon::A2_combine_lh
1920               : LLow ? Hexagon::A2_combine_hl
1921                      : Hexagon::A2_combine_hh;
1922 }
1923 
1924 // If MI stores the upper halfword of a register (potentially obtained via
1925 // shifts or extracts), replace it with a storerf instruction. This could
1926 // cause the "extraction" code to become dead.
1927 bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
1928   unsigned Opc = MI->getOpcode();
1929   if (Opc != Hexagon::S2_storerh_io)
1930     return false;
1931 
1932   MachineOperand &ValOp = MI->getOperand(2);
1933   BitTracker::RegisterRef RS = ValOp;
1934   if (!BT.has(RS.Reg))
1935     return false;
1936   const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
1937   RegHalf H;
1938   unsigned B = (RS.Sub == Hexagon::isub_hi) ? 32 : 0;
1939   if (!matchHalf(0, RC, B, H))
1940     return false;
1941   if (H.Low)
1942     return false;
1943   MI->setDesc(HII.get(Hexagon::S2_storerf_io));
1944   ValOp.setReg(H.Reg);
1945   ValOp.setSubReg(H.Sub);
1946   return true;
1947 }
1948 
1949 // If MI stores a value known at compile-time, and the value is within a range
1950 // that avoids using constant-extenders, replace it with a store-immediate.
1951 bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
1952   unsigned Opc = MI->getOpcode();
1953   unsigned Align = 0;
1954   switch (Opc) {
1955     case Hexagon::S2_storeri_io:
1956       Align++;
1957       [[fallthrough]];
1958     case Hexagon::S2_storerh_io:
1959       Align++;
1960       [[fallthrough]];
1961     case Hexagon::S2_storerb_io:
1962       break;
1963     default:
1964       return false;
1965   }
1966 
1967   // Avoid stores to frame-indices (due to an unknown offset).
1968   if (!MI->getOperand(0).isReg())
1969     return false;
1970   MachineOperand &OffOp = MI->getOperand(1);
1971   if (!OffOp.isImm())
1972     return false;
1973 
1974   int64_t Off = OffOp.getImm();
1975   // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
1976   if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
1977     return false;
1978   // Source register:
1979   BitTracker::RegisterRef RS = MI->getOperand(2);
1980   if (!BT.has(RS.Reg))
1981     return false;
1982   const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
1983   uint64_t U;
1984   if (!HBS::getConst(RC, 0, RC.width(), U))
1985     return false;
1986 
1987   // Only consider 8-bit values to avoid constant-extenders.
1988   int V;
1989   switch (Opc) {
1990     case Hexagon::S2_storerb_io:
1991       V = int8_t(U);
1992       break;
1993     case Hexagon::S2_storerh_io:
1994       V = int16_t(U);
1995       break;
1996     case Hexagon::S2_storeri_io:
1997       V = int32_t(U);
1998       break;
1999     default:
2000       // Opc is already checked above to be one of the three store instructions.
2001       // This silences a -Wuninitialized false positive on GCC 5.4.
2002       llvm_unreachable("Unexpected store opcode");
2003   }
2004   if (!isInt<8>(V))
2005     return false;
2006 
2007   MI->removeOperand(2);
2008   switch (Opc) {
2009     case Hexagon::S2_storerb_io:
2010       MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
2011       break;
2012     case Hexagon::S2_storerh_io:
2013       MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
2014       break;
2015     case Hexagon::S2_storeri_io:
2016       MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
2017       break;
2018   }
2019   MI->addOperand(MachineOperand::CreateImm(V));
2020   return true;
2021 }
2022 
2023 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
2024 // last instruction in a sequence that results in something equivalent to
2025 // the pack-halfwords. The intent is to cause the entire sequence to become
2026 // dead.
2027 bool BitSimplification::genPackhl(MachineInstr *MI,
2028       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2029   unsigned Opc = MI->getOpcode();
2030   if (Opc == Hexagon::S2_packhl)
2031     return false;
2032   BitTracker::RegisterRef Rs, Rt;
2033   if (!matchPackhl(RD.Reg, RC, Rs, Rt))
2034     return false;
2035   if (!validateReg(Rs, Hexagon::S2_packhl, 1) ||
2036       !validateReg(Rt, Hexagon::S2_packhl, 2))
2037     return false;
2038 
2039   MachineBasicBlock &B = *MI->getParent();
2040   Register NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2041   DebugLoc DL = MI->getDebugLoc();
2042   auto At = MI->isPHI() ? B.getFirstNonPHI()
2043                         : MachineBasicBlock::iterator(MI);
2044   BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
2045       .addReg(Rs.Reg, 0, Rs.Sub)
2046       .addReg(Rt.Reg, 0, Rt.Sub);
2047   HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2048   BT.put(BitTracker::RegisterRef(NewR), RC);
2049   return true;
2050 }
2051 
2052 // If MI produces halfword of the input in the low half of the output,
2053 // replace it with zero-extend or extractu.
2054 bool BitSimplification::genExtractHalf(MachineInstr *MI,
2055       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2056   RegHalf L;
2057   // Check for halfword in low 16 bits, zeros elsewhere.
2058   if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
2059     return false;
2060 
2061   unsigned Opc = MI->getOpcode();
2062   MachineBasicBlock &B = *MI->getParent();
2063   DebugLoc DL = MI->getDebugLoc();
2064 
2065   // Prefer zxth, since zxth can go in any slot, while extractu only in
2066   // slots 2 and 3.
2067   unsigned NewR = 0;
2068   auto At = MI->isPHI() ? B.getFirstNonPHI()
2069                         : MachineBasicBlock::iterator(MI);
2070   if (L.Low && Opc != Hexagon::A2_zxth) {
2071     if (validateReg(L, Hexagon::A2_zxth, 1)) {
2072       NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2073       BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
2074           .addReg(L.Reg, 0, L.Sub);
2075     }
2076   } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) {
2077     if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) {
2078       NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2079       BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)
2080           .addReg(L.Reg, 0, L.Sub)
2081           .addImm(16);
2082     }
2083   }
2084   if (NewR == 0)
2085     return false;
2086   HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2087   BT.put(BitTracker::RegisterRef(NewR), RC);
2088   return true;
2089 }
2090 
2091 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
2092 // combine.
2093 bool BitSimplification::genCombineHalf(MachineInstr *MI,
2094       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2095   RegHalf L, H;
2096   // Check for combine h/l
2097   if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
2098     return false;
2099   // Do nothing if this is just a reg copy.
2100   if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
2101     return false;
2102 
2103   unsigned Opc = MI->getOpcode();
2104   unsigned COpc = getCombineOpcode(H.Low, L.Low);
2105   if (COpc == Opc)
2106     return false;
2107   if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2))
2108     return false;
2109 
2110   MachineBasicBlock &B = *MI->getParent();
2111   DebugLoc DL = MI->getDebugLoc();
2112   Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2113   auto At = MI->isPHI() ? B.getFirstNonPHI()
2114                         : MachineBasicBlock::iterator(MI);
2115   BuildMI(B, At, DL, HII.get(COpc), NewR)
2116       .addReg(H.Reg, 0, H.Sub)
2117       .addReg(L.Reg, 0, L.Sub);
2118   HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2119   BT.put(BitTracker::RegisterRef(NewR), RC);
2120   return true;
2121 }
2122 
2123 // If MI resets high bits of a register and keeps the lower ones, replace it
2124 // with zero-extend byte/half, and-immediate, or extractu, as appropriate.
2125 bool BitSimplification::genExtractLow(MachineInstr *MI,
2126       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2127   unsigned Opc = MI->getOpcode();
2128   switch (Opc) {
2129     case Hexagon::A2_zxtb:
2130     case Hexagon::A2_zxth:
2131     case Hexagon::S2_extractu:
2132       return false;
2133   }
2134   if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
2135     int32_t Imm = MI->getOperand(2).getImm();
2136     if (isInt<10>(Imm))
2137       return false;
2138   }
2139 
2140   if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
2141     return false;
2142   unsigned W = RC.width();
2143   while (W > 0 && RC[W-1].is(0))
2144     W--;
2145   if (W == 0 || W == RC.width())
2146     return false;
2147   unsigned NewOpc = (W == 8)  ? Hexagon::A2_zxtb
2148                   : (W == 16) ? Hexagon::A2_zxth
2149                   : (W < 10)  ? Hexagon::A2_andir
2150                   : Hexagon::S2_extractu;
2151   MachineBasicBlock &B = *MI->getParent();
2152   DebugLoc DL = MI->getDebugLoc();
2153 
2154   for (auto &Op : MI->uses()) {
2155     if (!Op.isReg())
2156       continue;
2157     BitTracker::RegisterRef RS = Op;
2158     if (!BT.has(RS.Reg))
2159       continue;
2160     const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2161     unsigned BN, BW;
2162     if (!HBS::getSubregMask(RS, BN, BW, MRI))
2163       continue;
2164     if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
2165       continue;
2166     if (!validateReg(RS, NewOpc, 1))
2167       continue;
2168 
2169     Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2170     auto At = MI->isPHI() ? B.getFirstNonPHI()
2171                           : MachineBasicBlock::iterator(MI);
2172     auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
2173                   .addReg(RS.Reg, 0, RS.Sub);
2174     if (NewOpc == Hexagon::A2_andir)
2175       MIB.addImm((1 << W) - 1);
2176     else if (NewOpc == Hexagon::S2_extractu)
2177       MIB.addImm(W).addImm(0);
2178     HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2179     BT.put(BitTracker::RegisterRef(NewR), RC);
2180     return true;
2181   }
2182   return false;
2183 }
2184 
2185 bool BitSimplification::genBitSplit(MachineInstr *MI,
2186       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
2187       const RegisterSet &AVs) {
2188   if (!GenBitSplit)
2189     return false;
2190   if (MaxBitSplit.getNumOccurrences()) {
2191     if (CountBitSplit >= MaxBitSplit)
2192       return false;
2193   }
2194 
2195   unsigned Opc = MI->getOpcode();
2196   switch (Opc) {
2197     case Hexagon::A4_bitsplit:
2198     case Hexagon::A4_bitspliti:
2199       return false;
2200   }
2201 
2202   unsigned W = RC.width();
2203   if (W != 32)
2204     return false;
2205 
2206   auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned {
2207     unsigned Z = C.width();
2208     while (Z > 0 && C[Z-1].is(0))
2209       --Z;
2210     return C.width() - Z;
2211   };
2212 
2213   // Count the number of leading zeros in the target RC.
2214   unsigned Z = ctlz(RC);
2215   if (Z == 0 || Z == W)
2216     return false;
2217 
2218   // A simplistic analysis: assume the source register (the one being split)
2219   // is fully unknown, and that all its bits are self-references.
2220   const BitTracker::BitValue &B0 = RC[0];
2221   if (B0.Type != BitTracker::BitValue::Ref)
2222     return false;
2223 
2224   unsigned SrcR = B0.RefI.Reg;
2225   unsigned SrcSR = 0;
2226   unsigned Pos = B0.RefI.Pos;
2227 
2228   // All the non-zero bits should be consecutive bits from the same register.
2229   for (unsigned i = 1; i < W-Z; ++i) {
2230     const BitTracker::BitValue &V = RC[i];
2231     if (V.Type != BitTracker::BitValue::Ref)
2232       return false;
2233     if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i)
2234       return false;
2235   }
2236 
2237   // Now, find the other bitfield among AVs.
2238   for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
2239     // The number of leading zeros here should be the number of trailing
2240     // non-zeros in RC.
2241     unsigned SRC = MRI.getRegClass(S)->getID();
2242     if (SRC != Hexagon::IntRegsRegClassID &&
2243         SRC != Hexagon::DoubleRegsRegClassID)
2244       continue;
2245     if (!BT.has(S))
2246       continue;
2247     const BitTracker::RegisterCell &SC = BT.lookup(S);
2248     if (SC.width() != W || ctlz(SC) != W-Z)
2249       continue;
2250     // The Z lower bits should now match SrcR.
2251     const BitTracker::BitValue &S0 = SC[0];
2252     if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR)
2253       continue;
2254     unsigned P = S0.RefI.Pos;
2255 
2256     if (Pos <= P && (Pos + W-Z) != P)
2257       continue;
2258     if (P < Pos && (P + Z) != Pos)
2259       continue;
2260     // The starting bitfield position must be at a subregister boundary.
2261     if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32)
2262       continue;
2263 
2264     unsigned I;
2265     for (I = 1; I < Z; ++I) {
2266       const BitTracker::BitValue &V = SC[I];
2267       if (V.Type != BitTracker::BitValue::Ref)
2268         break;
2269       if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I)
2270         break;
2271     }
2272     if (I != Z)
2273       continue;
2274 
2275     // Generate bitsplit where S is defined.
2276     if (MaxBitSplit.getNumOccurrences())
2277       CountBitSplit++;
2278     MachineInstr *DefS = MRI.getVRegDef(S);
2279     assert(DefS != nullptr);
2280     DebugLoc DL = DefS->getDebugLoc();
2281     MachineBasicBlock &B = *DefS->getParent();
2282     auto At = DefS->isPHI() ? B.getFirstNonPHI()
2283                             : MachineBasicBlock::iterator(DefS);
2284     if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID)
2285       SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;
2286     if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1))
2287       continue;
2288     unsigned ImmOp = Pos <= P ? W-Z : Z;
2289 
2290     // Find an existing bitsplit instruction if one already exists.
2291     unsigned NewR = 0;
2292     for (MachineInstr *In : NewMIs) {
2293       if (In->getOpcode() != Hexagon::A4_bitspliti)
2294         continue;
2295       MachineOperand &Op1 = In->getOperand(1);
2296       if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR)
2297         continue;
2298       if (In->getOperand(2).getImm() != ImmOp)
2299         continue;
2300       // Check if the target register is available here.
2301       MachineOperand &Op0 = In->getOperand(0);
2302       MachineInstr *DefI = MRI.getVRegDef(Op0.getReg());
2303       assert(DefI != nullptr);
2304       if (!MDT.dominates(DefI, &*At))
2305         continue;
2306 
2307       // Found one that can be reused.
2308       assert(Op0.getSubReg() == 0);
2309       NewR = Op0.getReg();
2310       break;
2311     }
2312     if (!NewR) {
2313       NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2314       auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR)
2315                       .addReg(SrcR, 0, SrcSR)
2316                       .addImm(ImmOp);
2317       NewMIs.push_back(NewBS);
2318     }
2319     if (Pos <= P) {
2320       HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI);
2321       HBS::replaceRegWithSub(S,      NewR, Hexagon::isub_hi, MRI);
2322     } else {
2323       HBS::replaceRegWithSub(S,      NewR, Hexagon::isub_lo, MRI);
2324       HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI);
2325     }
2326     return true;
2327   }
2328 
2329   return false;
2330 }
2331 
2332 // Check for tstbit simplification opportunity, where the bit being checked
2333 // can be tracked back to another register. For example:
2334 //   %2 = S2_lsr_i_r  %1, 5
2335 //   %3 = S2_tstbit_i %2, 0
2336 // =>
2337 //   %3 = S2_tstbit_i %1, 5
2338 bool BitSimplification::simplifyTstbit(MachineInstr *MI,
2339       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2340   unsigned Opc = MI->getOpcode();
2341   if (Opc != Hexagon::S2_tstbit_i)
2342     return false;
2343 
2344   unsigned BN = MI->getOperand(2).getImm();
2345   BitTracker::RegisterRef RS = MI->getOperand(1);
2346   unsigned F, W;
2347   DebugLoc DL = MI->getDebugLoc();
2348   if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
2349     return false;
2350   MachineBasicBlock &B = *MI->getParent();
2351   auto At = MI->isPHI() ? B.getFirstNonPHI()
2352                         : MachineBasicBlock::iterator(MI);
2353 
2354   const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2355   const BitTracker::BitValue &V = SC[F+BN];
2356   if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
2357     const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
2358     // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
2359     // a double register, need to use a subregister and adjust bit
2360     // number.
2361     unsigned P = std::numeric_limits<unsigned>::max();
2362     BitTracker::RegisterRef RR(V.RefI.Reg, 0);
2363     if (TC == &Hexagon::DoubleRegsRegClass) {
2364       P = V.RefI.Pos;
2365       RR.Sub = Hexagon::isub_lo;
2366       if (P >= 32) {
2367         P -= 32;
2368         RR.Sub = Hexagon::isub_hi;
2369       }
2370     } else if (TC == &Hexagon::IntRegsRegClass) {
2371       P = V.RefI.Pos;
2372     }
2373     if (P != std::numeric_limits<unsigned>::max()) {
2374       Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2375       BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
2376           .addReg(RR.Reg, 0, RR.Sub)
2377           .addImm(P);
2378       HBS::replaceReg(RD.Reg, NewR, MRI);
2379       BT.put(NewR, RC);
2380       return true;
2381     }
2382   } else if (V.is(0) || V.is(1)) {
2383     Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2384     unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true;
2385     BuildMI(B, At, DL, HII.get(NewOpc), NewR);
2386     HBS::replaceReg(RD.Reg, NewR, MRI);
2387     return true;
2388   }
2389 
2390   return false;
2391 }
2392 
2393 // Detect whether RD is a bitfield extract (sign- or zero-extended) of
2394 // some register from the AVs set. Create a new corresponding instruction
2395 // at the location of MI. The intent is to recognize situations where
2396 // a sequence of instructions performs an operation that is equivalent to
2397 // an extract operation, such as a shift left followed by a shift right.
2398 bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
2399       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
2400       const RegisterSet &AVs) {
2401   if (!GenExtract)
2402     return false;
2403   if (MaxExtract.getNumOccurrences()) {
2404     if (CountExtract >= MaxExtract)
2405       return false;
2406     CountExtract++;
2407   }
2408 
2409   unsigned W = RC.width();
2410   unsigned RW = W;
2411   unsigned Len;
2412   bool Signed;
2413 
2414   // The code is mostly class-independent, except for the part that generates
2415   // the extract instruction, and establishes the source register (in case it
2416   // needs to use a subregister).
2417   const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2418   if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2419     return false;
2420   assert(RD.Sub == 0);
2421 
2422   // Observation:
2423   // If the cell has a form of 00..0xx..x with k zeros and n remaining
2424   // bits, this could be an extractu of the n bits, but it could also be
2425   // an extractu of a longer field which happens to have 0s in the top
2426   // bit positions.
2427   // The same logic applies to sign-extended fields.
2428   //
2429   // Do not check for the extended extracts, since it would expand the
2430   // search space quite a bit. The search may be expensive as it is.
2431 
2432   const BitTracker::BitValue &TopV = RC[W-1];
2433 
2434   // Eliminate candidates that have self-referential bits, since they
2435   // cannot be extracts from other registers. Also, skip registers that
2436   // have compile-time constant values.
2437   bool IsConst = true;
2438   for (unsigned I = 0; I != W; ++I) {
2439     const BitTracker::BitValue &V = RC[I];
2440     if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg)
2441       return false;
2442     IsConst = IsConst && (V.is(0) || V.is(1));
2443   }
2444   if (IsConst)
2445     return false;
2446 
2447   if (TopV.is(0) || TopV.is(1)) {
2448     bool S = TopV.is(1);
2449     for (--W; W > 0 && RC[W-1].is(S); --W)
2450       ;
2451     Len = W;
2452     Signed = S;
2453     // The sign bit must be a part of the field being extended.
2454     if (Signed)
2455       ++Len;
2456   } else {
2457     // This could still be a sign-extended extract.
2458     assert(TopV.Type == BitTracker::BitValue::Ref);
2459     if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1)
2460       return false;
2461     for (--W; W > 0 && RC[W-1] == TopV; --W)
2462       ;
2463     // The top bits of RC are copies of TopV. One occurrence of TopV will
2464     // be a part of the field.
2465     Len = W + 1;
2466     Signed = true;
2467   }
2468 
2469   // This would be just a copy. It should be handled elsewhere.
2470   if (Len == RW)
2471     return false;
2472 
2473   LLVM_DEBUG({
2474     dbgs() << __func__ << " on reg: " << printReg(RD.Reg, &HRI, RD.Sub)
2475            << ", MI: " << *MI;
2476     dbgs() << "Cell: " << RC << '\n';
2477     dbgs() << "Expected bitfield size: " << Len << " bits, "
2478            << (Signed ? "sign" : "zero") << "-extended\n";
2479   });
2480 
2481   bool Changed = false;
2482 
2483   for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) {
2484     if (!BT.has(R))
2485       continue;
2486     const BitTracker::RegisterCell &SC = BT.lookup(R);
2487     unsigned SW = SC.width();
2488 
2489     // The source can be longer than the destination, as long as its size is
2490     // a multiple of the size of the destination. Also, we would need to be
2491     // able to refer to the subregister in the source that would be of the
2492     // same size as the destination, but only check the sizes here.
2493     if (SW < RW || (SW % RW) != 0)
2494       continue;
2495 
2496     // The field can start at any offset in SC as long as it contains Len
2497     // bits and does not cross subregister boundary (if the source register
2498     // is longer than the destination).
2499     unsigned Off = 0;
2500     while (Off <= SW-Len) {
2501       unsigned OE = (Off+Len)/RW;
2502       if (OE != Off/RW) {
2503         // The assumption here is that if the source (R) is longer than the
2504         // destination, then the destination is a sequence of words of
2505         // size RW, and each such word in R can be accessed via a subregister.
2506         //
2507         // If the beginning and the end of the field cross the subregister
2508         // boundary, advance to the next subregister.
2509         Off = OE*RW;
2510         continue;
2511       }
2512       if (HBS::isEqual(RC, 0, SC, Off, Len))
2513         break;
2514       ++Off;
2515     }
2516 
2517     if (Off > SW-Len)
2518       continue;
2519 
2520     // Found match.
2521     unsigned ExtOpc = 0;
2522     if (Off == 0) {
2523       if (Len == 8)
2524         ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;
2525       else if (Len == 16)
2526         ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;
2527       else if (Len < 10 && !Signed)
2528         ExtOpc = Hexagon::A2_andir;
2529     }
2530     if (ExtOpc == 0) {
2531       ExtOpc =
2532           Signed ? (RW == 32 ? Hexagon::S4_extract  : Hexagon::S4_extractp)
2533                  : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);
2534     }
2535     unsigned SR = 0;
2536     // This only recognizes isub_lo and isub_hi.
2537     if (RW != SW && RW*2 != SW)
2538       continue;
2539     if (RW != SW)
2540       SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;
2541     Off = Off % RW;
2542 
2543     if (!validateReg({R,SR}, ExtOpc, 1))
2544       continue;
2545 
2546     // Don't generate the same instruction as the one being optimized.
2547     if (MI->getOpcode() == ExtOpc) {
2548       // All possible ExtOpc's have the source in operand(1).
2549       const MachineOperand &SrcOp = MI->getOperand(1);
2550       if (SrcOp.getReg() == R)
2551         continue;
2552     }
2553 
2554     DebugLoc DL = MI->getDebugLoc();
2555     MachineBasicBlock &B = *MI->getParent();
2556     Register NewR = MRI.createVirtualRegister(FRC);
2557     auto At = MI->isPHI() ? B.getFirstNonPHI()
2558                           : MachineBasicBlock::iterator(MI);
2559     auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR)
2560                   .addReg(R, 0, SR);
2561     switch (ExtOpc) {
2562       case Hexagon::A2_sxtb:
2563       case Hexagon::A2_zxtb:
2564       case Hexagon::A2_sxth:
2565       case Hexagon::A2_zxth:
2566         break;
2567       case Hexagon::A2_andir:
2568         MIB.addImm((1u << Len) - 1);
2569         break;
2570       case Hexagon::S4_extract:
2571       case Hexagon::S2_extractu:
2572       case Hexagon::S4_extractp:
2573       case Hexagon::S2_extractup:
2574         MIB.addImm(Len)
2575            .addImm(Off);
2576         break;
2577       default:
2578         llvm_unreachable("Unexpected opcode");
2579     }
2580 
2581     HBS::replaceReg(RD.Reg, NewR, MRI);
2582     BT.put(BitTracker::RegisterRef(NewR), RC);
2583     Changed = true;
2584     break;
2585   }
2586 
2587   return Changed;
2588 }
2589 
2590 bool BitSimplification::simplifyRCmp0(MachineInstr *MI,
2591       BitTracker::RegisterRef RD) {
2592   unsigned Opc = MI->getOpcode();
2593   if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)
2594     return false;
2595   MachineOperand &CmpOp = MI->getOperand(2);
2596   if (!CmpOp.isImm() || CmpOp.getImm() != 0)
2597     return false;
2598 
2599   const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2600   if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2601     return false;
2602   assert(RD.Sub == 0);
2603 
2604   MachineBasicBlock &B = *MI->getParent();
2605   const DebugLoc &DL = MI->getDebugLoc();
2606   auto At = MI->isPHI() ? B.getFirstNonPHI()
2607                         : MachineBasicBlock::iterator(MI);
2608   bool KnownZ = true;
2609   bool KnownNZ = false;
2610 
2611   BitTracker::RegisterRef SR = MI->getOperand(1);
2612   if (!BT.has(SR.Reg))
2613     return false;
2614   const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg);
2615   unsigned F, W;
2616   if (!HBS::getSubregMask(SR, F, W, MRI))
2617     return false;
2618 
2619   for (uint16_t I = F; I != F+W; ++I) {
2620     const BitTracker::BitValue &V = SC[I];
2621     if (!V.is(0))
2622       KnownZ = false;
2623     if (V.is(1))
2624       KnownNZ = true;
2625   }
2626 
2627   auto ReplaceWithConst = [&](int C) {
2628     Register NewR = MRI.createVirtualRegister(FRC);
2629     BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)
2630       .addImm(C);
2631     HBS::replaceReg(RD.Reg, NewR, MRI);
2632     BitTracker::RegisterCell NewRC(W);
2633     for (uint16_t I = 0; I != W; ++I) {
2634       NewRC[I] = BitTracker::BitValue(C & 1);
2635       C = unsigned(C) >> 1;
2636     }
2637     BT.put(BitTracker::RegisterRef(NewR), NewRC);
2638     return true;
2639   };
2640 
2641   auto IsNonZero = [] (const MachineOperand &Op) {
2642     if (Op.isGlobal() || Op.isBlockAddress())
2643       return true;
2644     if (Op.isImm())
2645       return Op.getImm() != 0;
2646     if (Op.isCImm())
2647       return !Op.getCImm()->isZero();
2648     if (Op.isFPImm())
2649       return !Op.getFPImm()->isZero();
2650     return false;
2651   };
2652 
2653   auto IsZero = [] (const MachineOperand &Op) {
2654     if (Op.isGlobal() || Op.isBlockAddress())
2655       return false;
2656     if (Op.isImm())
2657       return Op.getImm() == 0;
2658     if (Op.isCImm())
2659       return Op.getCImm()->isZero();
2660     if (Op.isFPImm())
2661       return Op.getFPImm()->isZero();
2662     return false;
2663   };
2664 
2665   // If the source register is known to be 0 or non-0, the comparison can
2666   // be folded to a load of a constant.
2667   if (KnownZ || KnownNZ) {
2668     assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0");
2669     return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));
2670   }
2671 
2672   // Special case: if the compare comes from a C2_muxii, then we know the
2673   // two possible constants that can be the source value.
2674   MachineInstr *InpDef = MRI.getVRegDef(SR.Reg);
2675   if (!InpDef)
2676     return false;
2677   if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) {
2678     MachineOperand &Src1 = InpDef->getOperand(2);
2679     MachineOperand &Src2 = InpDef->getOperand(3);
2680     // Check if both are non-zero.
2681     bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);
2682     if (KnownNZ1 && KnownNZ2)
2683       return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);
2684     // Check if both are zero.
2685     bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);
2686     if (KnownZ1 && KnownZ2)
2687       return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);
2688 
2689     // If for both operands we know that they are either 0 or non-0,
2690     // replace the comparison with a C2_muxii, using the same predicate
2691     // register, but with operands substituted with 0/1 accordingly.
2692     if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
2693       Register NewR = MRI.createVirtualRegister(FRC);
2694       BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)
2695         .addReg(InpDef->getOperand(1).getReg())
2696         .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
2697         .addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));
2698       HBS::replaceReg(RD.Reg, NewR, MRI);
2699       // Create a new cell with only the least significant bit unknown.
2700       BitTracker::RegisterCell NewRC(W);
2701       NewRC[0] = BitTracker::BitValue::self();
2702       NewRC.fill(1, W, BitTracker::BitValue::Zero);
2703       BT.put(BitTracker::RegisterRef(NewR), NewRC);
2704       return true;
2705     }
2706   }
2707 
2708   return false;
2709 }
2710 
2711 bool BitSimplification::processBlock(MachineBasicBlock &B,
2712       const RegisterSet &AVs) {
2713   if (!BT.reached(&B))
2714     return false;
2715   bool Changed = false;
2716   RegisterSet AVB = AVs;
2717   RegisterSet Defs;
2718 
2719   for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
2720     MachineInstr *MI = &*I;
2721     Defs.clear();
2722     HBS::getInstrDefs(*MI, Defs);
2723 
2724     unsigned Opc = MI->getOpcode();
2725     if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
2726       continue;
2727 
2728     if (MI->mayStore()) {
2729       bool T = genStoreUpperHalf(MI);
2730       T = T || genStoreImmediate(MI);
2731       Changed |= T;
2732       continue;
2733     }
2734 
2735     if (Defs.count() != 1)
2736       continue;
2737     const MachineOperand &Op0 = MI->getOperand(0);
2738     if (!Op0.isReg() || !Op0.isDef())
2739       continue;
2740     BitTracker::RegisterRef RD = Op0;
2741     if (!BT.has(RD.Reg))
2742       continue;
2743     const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2744     const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
2745 
2746     if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
2747       bool T = genPackhl(MI, RD, RC);
2748       T = T || simplifyExtractLow(MI, RD, RC, AVB);
2749       Changed |= T;
2750       continue;
2751     }
2752 
2753     if (FRC->getID() == Hexagon::IntRegsRegClassID) {
2754       bool T = genBitSplit(MI, RD, RC, AVB);
2755       T = T || simplifyExtractLow(MI, RD, RC, AVB);
2756       T = T || genExtractHalf(MI, RD, RC);
2757       T = T || genCombineHalf(MI, RD, RC);
2758       T = T || genExtractLow(MI, RD, RC);
2759       T = T || simplifyRCmp0(MI, RD);
2760       Changed |= T;
2761       continue;
2762     }
2763 
2764     if (FRC->getID() == Hexagon::PredRegsRegClassID) {
2765       bool T = simplifyTstbit(MI, RD, RC);
2766       Changed |= T;
2767       continue;
2768     }
2769   }
2770   return Changed;
2771 }
2772 
2773 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
2774   if (skipFunction(MF.getFunction()))
2775     return false;
2776 
2777   auto &HST = MF.getSubtarget<HexagonSubtarget>();
2778   auto &HRI = *HST.getRegisterInfo();
2779   auto &HII = *HST.getInstrInfo();
2780 
2781   MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2782   MachineRegisterInfo &MRI = MF.getRegInfo();
2783   bool Changed;
2784 
2785   Changed = DeadCodeElimination(MF, *MDT).run();
2786 
2787   const HexagonEvaluator HE(HRI, MRI, HII, MF);
2788   BitTracker BT(HE, MF);
2789   LLVM_DEBUG(BT.trace(true));
2790   BT.run();
2791 
2792   MachineBasicBlock &Entry = MF.front();
2793 
2794   RegisterSet AIG;  // Available registers for IG.
2795   ConstGeneration ImmG(BT, HII, MRI);
2796   Changed |= visitBlock(Entry, ImmG, AIG);
2797 
2798   RegisterSet ARE;  // Available registers for RIE.
2799   RedundantInstrElimination RIE(BT, HII, HRI, MRI);
2800   bool Ried = visitBlock(Entry, RIE, ARE);
2801   if (Ried) {
2802     Changed = true;
2803     BT.run();
2804   }
2805 
2806   RegisterSet ACG;  // Available registers for CG.
2807   CopyGeneration CopyG(BT, HII, HRI, MRI);
2808   Changed |= visitBlock(Entry, CopyG, ACG);
2809 
2810   RegisterSet ACP;  // Available registers for CP.
2811   CopyPropagation CopyP(HRI, MRI);
2812   Changed |= visitBlock(Entry, CopyP, ACP);
2813 
2814   Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2815 
2816   BT.run();
2817   RegisterSet ABS;  // Available registers for BS.
2818   BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF);
2819   Changed |= visitBlock(Entry, BitS, ABS);
2820 
2821   Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2822 
2823   if (Changed) {
2824     for (auto &B : MF)
2825       for (auto &I : B)
2826         I.clearKillInfo();
2827     DeadCodeElimination(MF, *MDT).run();
2828   }
2829   return Changed;
2830 }
2831 
2832 // Recognize loops where the code at the end of the loop matches the code
2833 // before the entry of the loop, and the matching code is such that is can
2834 // be simplified. This pass relies on the bit simplification above and only
2835 // prepares code in a way that can be handled by the bit simplification.
2836 //
2837 // This is the motivating testcase (and explanation):
2838 //
2839 // {
2840 //   loop0(.LBB0_2, r1)      // %for.body.preheader
2841 //   r5:4 = memd(r0++#8)
2842 // }
2843 // {
2844 //   r3 = lsr(r4, #16)
2845 //   r7:6 = combine(r5, r5)
2846 // }
2847 // {
2848 //   r3 = insert(r5, #16, #16)
2849 //   r7:6 = vlsrw(r7:6, #16)
2850 // }
2851 // .LBB0_2:
2852 // {
2853 //   memh(r2+#4) = r5
2854 //   memh(r2+#6) = r6            # R6 is really R5.H
2855 // }
2856 // {
2857 //   r2 = add(r2, #8)
2858 //   memh(r2+#0) = r4
2859 //   memh(r2+#2) = r3            # R3 is really R4.H
2860 // }
2861 // {
2862 //   r5:4 = memd(r0++#8)
2863 // }
2864 // {                             # "Shuffling" code that sets up R3 and R6
2865 //   r3 = lsr(r4, #16)           # so that their halves can be stored in the
2866 //   r7:6 = combine(r5, r5)      # next iteration. This could be folded into
2867 // }                             # the stores if the code was at the beginning
2868 // {                             # of the loop iteration. Since the same code
2869 //   r3 = insert(r5, #16, #16)   # precedes the loop, it can actually be moved
2870 //   r7:6 = vlsrw(r7:6, #16)     # there.
2871 // }:endloop0
2872 //
2873 //
2874 // The outcome:
2875 //
2876 // {
2877 //   loop0(.LBB0_2, r1)
2878 //   r5:4 = memd(r0++#8)
2879 // }
2880 // .LBB0_2:
2881 // {
2882 //   memh(r2+#4) = r5
2883 //   memh(r2+#6) = r5.h
2884 // }
2885 // {
2886 //   r2 = add(r2, #8)
2887 //   memh(r2+#0) = r4
2888 //   memh(r2+#2) = r4.h
2889 // }
2890 // {
2891 //   r5:4 = memd(r0++#8)
2892 // }:endloop0
2893 
2894 namespace {
2895 
2896   class HexagonLoopRescheduling : public MachineFunctionPass {
2897   public:
2898     static char ID;
2899 
2900     HexagonLoopRescheduling() : MachineFunctionPass(ID) {}
2901 
2902     bool runOnMachineFunction(MachineFunction &MF) override;
2903 
2904   private:
2905     const HexagonInstrInfo *HII = nullptr;
2906     const HexagonRegisterInfo *HRI = nullptr;
2907     MachineRegisterInfo *MRI = nullptr;
2908     BitTracker *BTP = nullptr;
2909 
2910     struct LoopCand {
2911       LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
2912             MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
2913 
2914       MachineBasicBlock *LB, *PB, *EB;
2915     };
2916     using InstrList = std::vector<MachineInstr *>;
2917     struct InstrGroup {
2918       BitTracker::RegisterRef Inp, Out;
2919       InstrList Ins;
2920     };
2921     struct PhiInfo {
2922       PhiInfo(MachineInstr &P, MachineBasicBlock &B);
2923 
2924       unsigned DefR;
2925       BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register
2926       MachineBasicBlock *LB, *PB;     // Loop Block, Preheader Block
2927     };
2928 
2929     static unsigned getDefReg(const MachineInstr *MI);
2930     bool isConst(unsigned Reg) const;
2931     bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
2932     bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
2933     bool isShuffleOf(unsigned OutR, unsigned InpR) const;
2934     bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
2935         unsigned &InpR2) const;
2936     void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
2937         MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
2938     bool processLoop(LoopCand &C);
2939   };
2940 
2941 } // end anonymous namespace
2942 
2943 char HexagonLoopRescheduling::ID = 0;
2944 
2945 INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched-pass",
2946                 "Hexagon Loop Rescheduling", false, false)
2947 
2948 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
2949       MachineBasicBlock &B) {
2950   DefR = HexagonLoopRescheduling::getDefReg(&P);
2951   LB = &B;
2952   PB = nullptr;
2953   for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
2954     const MachineOperand &OpB = P.getOperand(i+1);
2955     if (OpB.getMBB() == &B) {
2956       LR = P.getOperand(i);
2957       continue;
2958     }
2959     PB = OpB.getMBB();
2960     PR = P.getOperand(i);
2961   }
2962 }
2963 
2964 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
2965   RegisterSet Defs;
2966   HBS::getInstrDefs(*MI, Defs);
2967   if (Defs.count() != 1)
2968     return 0;
2969   return Defs.find_first();
2970 }
2971 
2972 bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
2973   if (!BTP->has(Reg))
2974     return false;
2975   const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
2976   for (unsigned i = 0, w = RC.width(); i < w; ++i) {
2977     const BitTracker::BitValue &V = RC[i];
2978     if (!V.is(0) && !V.is(1))
2979       return false;
2980   }
2981   return true;
2982 }
2983 
2984 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
2985       unsigned DefR) const {
2986   unsigned Opc = MI->getOpcode();
2987   switch (Opc) {
2988     case TargetOpcode::COPY:
2989     case Hexagon::S2_lsr_i_r:
2990     case Hexagon::S2_asr_i_r:
2991     case Hexagon::S2_asl_i_r:
2992     case Hexagon::S2_lsr_i_p:
2993     case Hexagon::S2_asr_i_p:
2994     case Hexagon::S2_asl_i_p:
2995     case Hexagon::S2_insert:
2996     case Hexagon::A2_or:
2997     case Hexagon::A2_orp:
2998     case Hexagon::A2_and:
2999     case Hexagon::A2_andp:
3000     case Hexagon::A2_combinew:
3001     case Hexagon::A4_combineri:
3002     case Hexagon::A4_combineir:
3003     case Hexagon::A2_combineii:
3004     case Hexagon::A4_combineii:
3005     case Hexagon::A2_combine_ll:
3006     case Hexagon::A2_combine_lh:
3007     case Hexagon::A2_combine_hl:
3008     case Hexagon::A2_combine_hh:
3009       return true;
3010   }
3011   return false;
3012 }
3013 
3014 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
3015       unsigned InpR) const {
3016   for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
3017     const MachineOperand &Op = MI->getOperand(i);
3018     if (!Op.isReg())
3019       continue;
3020     if (Op.getReg() == InpR)
3021       return i == n-1;
3022   }
3023   return false;
3024 }
3025 
3026 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
3027   if (!BTP->has(OutR) || !BTP->has(InpR))
3028     return false;
3029   const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
3030   for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
3031     const BitTracker::BitValue &V = OutC[i];
3032     if (V.Type != BitTracker::BitValue::Ref)
3033       continue;
3034     if (V.RefI.Reg != InpR)
3035       return false;
3036   }
3037   return true;
3038 }
3039 
3040 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
3041       unsigned OutR2, unsigned &InpR2) const {
3042   if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
3043     return false;
3044   const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
3045   const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
3046   unsigned W = OutC1.width();
3047   unsigned MatchR = 0;
3048   if (W != OutC2.width())
3049     return false;
3050   for (unsigned i = 0; i < W; ++i) {
3051     const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
3052     if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
3053       return false;
3054     if (V1.Type != BitTracker::BitValue::Ref)
3055       continue;
3056     if (V1.RefI.Pos != V2.RefI.Pos)
3057       return false;
3058     if (V1.RefI.Reg != InpR1)
3059       return false;
3060     if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
3061       return false;
3062     if (!MatchR)
3063       MatchR = V2.RefI.Reg;
3064     else if (V2.RefI.Reg != MatchR)
3065       return false;
3066   }
3067   InpR2 = MatchR;
3068   return true;
3069 }
3070 
3071 void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
3072       MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
3073       unsigned NewPredR) {
3074   DenseMap<unsigned,unsigned> RegMap;
3075 
3076   const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
3077   Register PhiR = MRI->createVirtualRegister(PhiRC);
3078   BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
3079     .addReg(NewPredR)
3080     .addMBB(&PB)
3081     .addReg(G.Inp.Reg)
3082     .addMBB(&LB);
3083   RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
3084 
3085   for (const MachineInstr *SI : llvm::reverse(G.Ins)) {
3086     unsigned DR = getDefReg(SI);
3087     const TargetRegisterClass *RC = MRI->getRegClass(DR);
3088     Register NewDR = MRI->createVirtualRegister(RC);
3089     DebugLoc DL = SI->getDebugLoc();
3090 
3091     auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
3092     for (const MachineOperand &Op : SI->operands()) {
3093       if (!Op.isReg()) {
3094         MIB.add(Op);
3095         continue;
3096       }
3097       if (!Op.isUse())
3098         continue;
3099       unsigned UseR = RegMap[Op.getReg()];
3100       MIB.addReg(UseR, 0, Op.getSubReg());
3101     }
3102     RegMap.insert(std::make_pair(DR, NewDR));
3103   }
3104 
3105   HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
3106 }
3107 
3108 bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
3109   LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB)
3110                     << "\n");
3111   std::vector<PhiInfo> Phis;
3112   for (auto &I : *C.LB) {
3113     if (!I.isPHI())
3114       break;
3115     unsigned PR = getDefReg(&I);
3116     if (isConst(PR))
3117       continue;
3118     bool BadUse = false, GoodUse = false;
3119     for (const MachineOperand &MO : MRI->use_operands(PR)) {
3120       const MachineInstr *UseI = MO.getParent();
3121       if (UseI->getParent() != C.LB) {
3122         BadUse = true;
3123         break;
3124       }
3125       if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
3126         GoodUse = true;
3127     }
3128     if (BadUse || !GoodUse)
3129       continue;
3130 
3131     Phis.push_back(PhiInfo(I, *C.LB));
3132   }
3133 
3134   LLVM_DEBUG({
3135     dbgs() << "Phis: {";
3136     for (auto &I : Phis) {
3137       dbgs() << ' ' << printReg(I.DefR, HRI) << "=phi("
3138              << printReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
3139              << ',' << printReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
3140              << I.LB->getNumber() << ')';
3141     }
3142     dbgs() << " }\n";
3143   });
3144 
3145   if (Phis.empty())
3146     return false;
3147 
3148   bool Changed = false;
3149   InstrList ShufIns;
3150 
3151   // Go backwards in the block: for each bit shuffling instruction, check
3152   // if that instruction could potentially be moved to the front of the loop:
3153   // the output of the loop cannot be used in a non-shuffling instruction
3154   // in this loop.
3155   for (MachineInstr &MI : llvm::reverse(*C.LB)) {
3156     if (MI.isTerminator())
3157       continue;
3158     if (MI.isPHI())
3159       break;
3160 
3161     RegisterSet Defs;
3162     HBS::getInstrDefs(MI, Defs);
3163     if (Defs.count() != 1)
3164       continue;
3165     Register DefR = Defs.find_first();
3166     if (!DefR.isVirtual())
3167       continue;
3168     if (!isBitShuffle(&MI, DefR))
3169       continue;
3170 
3171     bool BadUse = false;
3172     for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
3173       MachineInstr *UseI = UI->getParent();
3174       if (UseI->getParent() == C.LB) {
3175         if (UseI->isPHI()) {
3176           // If the use is in a phi node in this loop, then it should be
3177           // the value corresponding to the back edge.
3178           unsigned Idx = UI.getOperandNo();
3179           if (UseI->getOperand(Idx+1).getMBB() != C.LB)
3180             BadUse = true;
3181         } else {
3182           if (!llvm::is_contained(ShufIns, UseI))
3183             BadUse = true;
3184         }
3185       } else {
3186         // There is a use outside of the loop, but there is no epilog block
3187         // suitable for a copy-out.
3188         if (C.EB == nullptr)
3189           BadUse = true;
3190       }
3191       if (BadUse)
3192         break;
3193     }
3194 
3195     if (BadUse)
3196       continue;
3197     ShufIns.push_back(&MI);
3198   }
3199 
3200   // Partition the list of shuffling instructions into instruction groups,
3201   // where each group has to be moved as a whole (i.e. a group is a chain of
3202   // dependent instructions). A group produces a single live output register,
3203   // which is meant to be the input of the loop phi node (although this is
3204   // not checked here yet). It also uses a single register as its input,
3205   // which is some value produced in the loop body. After moving the group
3206   // to the beginning of the loop, that input register would need to be
3207   // the loop-carried register (through a phi node) instead of the (currently
3208   // loop-carried) output register.
3209   using InstrGroupList = std::vector<InstrGroup>;
3210   InstrGroupList Groups;
3211 
3212   for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
3213     MachineInstr *SI = ShufIns[i];
3214     if (SI == nullptr)
3215       continue;
3216 
3217     InstrGroup G;
3218     G.Ins.push_back(SI);
3219     G.Out.Reg = getDefReg(SI);
3220     RegisterSet Inputs;
3221     HBS::getInstrUses(*SI, Inputs);
3222 
3223     for (unsigned j = i+1; j < n; ++j) {
3224       MachineInstr *MI = ShufIns[j];
3225       if (MI == nullptr)
3226         continue;
3227       RegisterSet Defs;
3228       HBS::getInstrDefs(*MI, Defs);
3229       // If this instruction does not define any pending inputs, skip it.
3230       if (!Defs.intersects(Inputs))
3231         continue;
3232       // Otherwise, add it to the current group and remove the inputs that
3233       // are defined by MI.
3234       G.Ins.push_back(MI);
3235       Inputs.remove(Defs);
3236       // Then add all registers used by MI.
3237       HBS::getInstrUses(*MI, Inputs);
3238       ShufIns[j] = nullptr;
3239     }
3240 
3241     // Only add a group if it requires at most one register.
3242     if (Inputs.count() > 1)
3243       continue;
3244     auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
3245       return G.Out.Reg == P.LR.Reg;
3246     };
3247     if (llvm::none_of(Phis, LoopInpEq))
3248       continue;
3249 
3250     G.Inp.Reg = Inputs.find_first();
3251     Groups.push_back(G);
3252   }
3253 
3254   LLVM_DEBUG({
3255     for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
3256       InstrGroup &G = Groups[i];
3257       dbgs() << "Group[" << i << "] inp: "
3258              << printReg(G.Inp.Reg, HRI, G.Inp.Sub)
3259              << "  out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
3260       for (const MachineInstr *MI : G.Ins)
3261         dbgs() << "  " << MI;
3262     }
3263   });
3264 
3265   for (InstrGroup &G : Groups) {
3266     if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
3267       continue;
3268     auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
3269       return G.Out.Reg == P.LR.Reg;
3270     };
3271     auto F = llvm::find_if(Phis, LoopInpEq);
3272     if (F == Phis.end())
3273       continue;
3274     unsigned PrehR = 0;
3275     if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) {
3276       const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg);
3277       unsigned Opc = DefPrehR->getOpcode();
3278       if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
3279         continue;
3280       if (!DefPrehR->getOperand(1).isImm())
3281         continue;
3282       if (DefPrehR->getOperand(1).getImm() != 0)
3283         continue;
3284       const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
3285       if (RC != MRI->getRegClass(F->PR.Reg)) {
3286         PrehR = MRI->createVirtualRegister(RC);
3287         unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
3288                                                           : Hexagon::A2_tfrpi;
3289         auto T = C.PB->getFirstTerminator();
3290         DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
3291         BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR)
3292           .addImm(0);
3293       } else {
3294         PrehR = F->PR.Reg;
3295       }
3296     }
3297     // isSameShuffle could match with PrehR being of a wider class than
3298     // G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
3299     // it would match for the input being a 32-bit register, and PrehR
3300     // being a 64-bit register (where the low 32 bits match). This could
3301     // be handled, but for now skip these cases.
3302     if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg))
3303       continue;
3304     moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR);
3305     Changed = true;
3306   }
3307 
3308   return Changed;
3309 }
3310 
3311 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
3312   if (skipFunction(MF.getFunction()))
3313     return false;
3314 
3315   auto &HST = MF.getSubtarget<HexagonSubtarget>();
3316   HII = HST.getInstrInfo();
3317   HRI = HST.getRegisterInfo();
3318   MRI = &MF.getRegInfo();
3319   const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
3320   BitTracker BT(HE, MF);
3321   LLVM_DEBUG(BT.trace(true));
3322   BT.run();
3323   BTP = &BT;
3324 
3325   std::vector<LoopCand> Cand;
3326 
3327   for (auto &B : MF) {
3328     if (B.pred_size() != 2 || B.succ_size() != 2)
3329       continue;
3330     MachineBasicBlock *PB = nullptr;
3331     bool IsLoop = false;
3332     for (MachineBasicBlock *Pred : B.predecessors()) {
3333       if (Pred != &B)
3334         PB = Pred;
3335       else
3336         IsLoop = true;
3337     }
3338     if (!IsLoop)
3339       continue;
3340 
3341     MachineBasicBlock *EB = nullptr;
3342     for (MachineBasicBlock *Succ : B.successors()) {
3343       if (Succ == &B)
3344         continue;
3345       // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
3346       // edge from B to EP is non-critical.
3347       if (Succ->pred_size() == 1)
3348         EB = Succ;
3349       break;
3350     }
3351 
3352     Cand.push_back(LoopCand(&B, PB, EB));
3353   }
3354 
3355   bool Changed = false;
3356   for (auto &C : Cand)
3357     Changed |= processLoop(C);
3358 
3359   return Changed;
3360 }
3361 
3362 //===----------------------------------------------------------------------===//
3363 //                         Public Constructor Functions
3364 //===----------------------------------------------------------------------===//
3365 
3366 FunctionPass *llvm::createHexagonLoopRescheduling() {
3367   return new HexagonLoopRescheduling();
3368 }
3369 
3370 FunctionPass *llvm::createHexagonBitSimplify() {
3371   return new HexagonBitSimplify();
3372 }
3373