xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (revision 43e29d03f416d7dda52112a29600a7c82ee1a91e)
1 //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs below peephole optimizations on MIR level.
10 //
11 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12 //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13 //
14 // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15 //    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16 //
17 // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18 //    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19 //
20 //    The mov pseudo instruction could be expanded to multiple mov instructions
21 //    later. In this case, we could try to split the constant  operand of mov
22 //    instruction into two immediates which can be directly encoded into
23 //    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24 //    multiple `mov` + `and/add/sub` instructions.
25 //
26 // 4. Remove redundant ORRWrs which is generated by zero-extend.
27 //
28 //    %3:gpr32 = ORRWrs $wzr, %2, 0
29 //    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30 //
31 //    If AArch64's 32-bit form of instruction defines the source operand of
32 //    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33 //    operand are set to zero.
34 //
35 // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36 //     ==> %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
37 //
38 //===----------------------------------------------------------------------===//
39 
40 #include "AArch64ExpandImm.h"
41 #include "AArch64InstrInfo.h"
42 #include "MCTargetDesc/AArch64AddressingModes.h"
43 #include "llvm/CodeGen/MachineDominators.h"
44 #include "llvm/CodeGen/MachineLoopInfo.h"
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "aarch64-mi-peephole-opt"
49 
50 namespace {
51 
52 struct AArch64MIPeepholeOpt : public MachineFunctionPass {
53   static char ID;
54 
55   AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
56     initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
57   }
58 
59   const AArch64InstrInfo *TII;
60   const AArch64RegisterInfo *TRI;
61   MachineLoopInfo *MLI;
62   MachineRegisterInfo *MRI;
63 
64   using OpcodePair = std::pair<unsigned, unsigned>;
65   template <typename T>
66   using SplitAndOpcFunc =
67       std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
68   using BuildMIFunc =
69       std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
70                          Register, Register, Register)>;
71 
72   /// For instructions where an immediate operand could be split into two
73   /// separate immediate instructions, use the splitTwoPartImm two handle the
74   /// optimization.
75   ///
76   /// To implement, the following function types must be passed to
77   /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
78   /// splitting the immediate is valid and returns the associated new opcode. A
79   /// BuildMIFunc must be implemented to build the two immediate instructions.
80   ///
81   /// Example Pattern (where IMM would require 2+ MOV instructions):
82   ///     %dst = <Instr>rr %src IMM [...]
83   /// becomes:
84   ///     %tmp = <Instr>ri %src (encode half IMM) [...]
85   ///     %dst = <Instr>ri %tmp (encode half IMM) [...]
86   template <typename T>
87   bool splitTwoPartImm(MachineInstr &MI,
88                        SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
89 
90   bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
91                         MachineInstr *&SubregToRegMI);
92 
93   template <typename T>
94   bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
95   template <typename T>
96   bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
97 
98   template <typename T>
99   bool visitAND(unsigned Opc, MachineInstr &MI);
100   bool visitORR(MachineInstr &MI);
101   bool visitINSERT(MachineInstr &MI);
102   bool runOnMachineFunction(MachineFunction &MF) override;
103 
104   StringRef getPassName() const override {
105     return "AArch64 MI Peephole Optimization pass";
106   }
107 
108   void getAnalysisUsage(AnalysisUsage &AU) const override {
109     AU.setPreservesCFG();
110     AU.addRequired<MachineLoopInfo>();
111     MachineFunctionPass::getAnalysisUsage(AU);
112   }
113 };
114 
115 char AArch64MIPeepholeOpt::ID = 0;
116 
117 } // end anonymous namespace
118 
119 INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
120                 "AArch64 MI Peephole Optimization", false, false)
121 
122 template <typename T>
123 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
124   T UImm = static_cast<T>(Imm);
125   if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
126     return false;
127 
128   // If this immediate can be handled by one instruction, do not split it.
129   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
130   AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
131   if (Insn.size() == 1)
132     return false;
133 
134   // The bitmask immediate consists of consecutive ones.  Let's say there is
135   // constant 0b00000000001000000000010000000000 which does not consist of
136   // consecutive ones. We can split it in to two bitmask immediate like
137   // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
138   // If we do AND with these two bitmask immediate, we can see original one.
139   unsigned LowestBitSet = countTrailingZeros(UImm);
140   unsigned HighestBitSet = Log2_64(UImm);
141 
142   // Create a mask which is filled with one from the position of lowest bit set
143   // to the position of highest bit set.
144   T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
145               (static_cast<T>(1) << LowestBitSet);
146   // Create a mask which is filled with one outside the position of lowest bit
147   // set and the position of highest bit set.
148   T NewImm2 = UImm | ~NewImm1;
149 
150   // If the split value is not valid bitmask immediate, do not split this
151   // constant.
152   if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
153     return false;
154 
155   Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
156   Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
157   return true;
158 }
159 
160 template <typename T>
161 bool AArch64MIPeepholeOpt::visitAND(
162     unsigned Opc, MachineInstr &MI) {
163   // Try below transformation.
164   //
165   // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
166   // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
167   //
168   // The mov pseudo instruction could be expanded to multiple mov instructions
169   // later. Let's try to split the constant operand of mov instruction into two
170   // bitmask immediates. It makes only two AND instructions intead of multiple
171   // mov + and instructions.
172 
173   return splitTwoPartImm<T>(
174       MI,
175       [Opc](T Imm, unsigned RegSize, T &Imm0,
176             T &Imm1) -> std::optional<OpcodePair> {
177         if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
178           return std::make_pair(Opc, Opc);
179         return std::nullopt;
180       },
181       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
182                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
183                    Register NewDstReg) {
184         DebugLoc DL = MI.getDebugLoc();
185         MachineBasicBlock *MBB = MI.getParent();
186         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
187             .addReg(SrcReg)
188             .addImm(Imm0);
189         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
190             .addReg(NewTmpReg)
191             .addImm(Imm1);
192       });
193 }
194 
195 bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
196   // Check this ORR comes from below zero-extend pattern.
197   //
198   // def : Pat<(i64 (zext GPR32:$src)),
199   //           (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
200   if (MI.getOperand(3).getImm() != 0)
201     return false;
202 
203   if (MI.getOperand(1).getReg() != AArch64::WZR)
204     return false;
205 
206   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
207   if (!SrcMI)
208     return false;
209 
210   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
211   //
212   // When you use the 32-bit form of an instruction, the upper 32 bits of the
213   // source registers are ignored and the upper 32 bits of the destination
214   // register are set to zero.
215   //
216   // If AArch64's 32-bit form of instruction defines the source operand of
217   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
218   // real AArch64 instruction and if it is not, do not process the opcode
219   // conservatively.
220   if (SrcMI->getOpcode() == TargetOpcode::COPY &&
221       SrcMI->getOperand(1).getReg().isVirtual()) {
222     const TargetRegisterClass *RC =
223         MRI->getRegClass(SrcMI->getOperand(1).getReg());
224 
225     // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
226     // that the upper bits are zero.
227     if (RC != &AArch64::FPR32RegClass &&
228         ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
229          SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
230       return false;
231     Register CpySrc = SrcMI->getOperand(1).getReg();
232     if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
233       CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
234       BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
235               TII->get(TargetOpcode::COPY), CpySrc)
236           .add(SrcMI->getOperand(1));
237     }
238     BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
239             TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
240         .addReg(CpySrc);
241     SrcMI->eraseFromParent();
242   }
243   else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
244     return false;
245 
246   Register DefReg = MI.getOperand(0).getReg();
247   Register SrcReg = MI.getOperand(2).getReg();
248   MRI->replaceRegWith(DefReg, SrcReg);
249   MRI->clearKillFlags(SrcReg);
250   LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
251   MI.eraseFromParent();
252 
253   return true;
254 }
255 
256 bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
257   // Check this INSERT_SUBREG comes from below zero-extend pattern.
258   //
259   // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
260   // To   %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
261   //
262   // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
263   // COPY would destroy the upper part of the register anyway
264   if (!MI.isRegTiedToDefOperand(1))
265     return false;
266 
267   Register DstReg = MI.getOperand(0).getReg();
268   const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
269   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
270   if (!SrcMI)
271     return false;
272 
273   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
274   //
275   // When you use the 32-bit form of an instruction, the upper 32 bits of the
276   // source registers are ignored and the upper 32 bits of the destination
277   // register are set to zero.
278   //
279   // If AArch64's 32-bit form of instruction defines the source operand of
280   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
281   // real AArch64 instruction and if it is not, do not process the opcode
282   // conservatively.
283   if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
284       !AArch64::GPR64allRegClass.hasSubClassEq(RC))
285     return false;
286 
287   // Build a SUBREG_TO_REG instruction
288   MachineInstr *SubregMI =
289       BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
290               TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
291           .addImm(0)
292           .add(MI.getOperand(2))
293           .add(MI.getOperand(3));
294   LLVM_DEBUG(dbgs() << MI << "  replace by:\n: " << *SubregMI << "\n");
295   (void)SubregMI;
296   MI.eraseFromParent();
297 
298   return true;
299 }
300 
301 template <typename T>
302 static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
303   // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
304   // imm0 and imm1 are non-zero 12-bit unsigned int.
305   if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
306       (Imm & ~static_cast<T>(0xffffff)) != 0)
307     return false;
308 
309   // The immediate can not be composed via a single instruction.
310   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
311   AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
312   if (Insn.size() == 1)
313     return false;
314 
315   // Split Imm into (Imm0 << 12) + Imm1;
316   Imm0 = (Imm >> 12) & 0xfff;
317   Imm1 = Imm & 0xfff;
318   return true;
319 }
320 
321 template <typename T>
322 bool AArch64MIPeepholeOpt::visitADDSUB(
323     unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
324   // Try below transformation.
325   //
326   // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
327   // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
328   //
329   // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
330   // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
331   //
332   // The mov pseudo instruction could be expanded to multiple mov instructions
333   // later. Let's try to split the constant operand of mov instruction into two
334   // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
335   // multiple `mov` + `and/sub` instructions.
336 
337   return splitTwoPartImm<T>(
338       MI,
339       [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
340                        T &Imm1) -> std::optional<OpcodePair> {
341         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
342           return std::make_pair(PosOpc, PosOpc);
343         if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
344           return std::make_pair(NegOpc, NegOpc);
345         return std::nullopt;
346       },
347       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
348                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
349                    Register NewDstReg) {
350         DebugLoc DL = MI.getDebugLoc();
351         MachineBasicBlock *MBB = MI.getParent();
352         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
353             .addReg(SrcReg)
354             .addImm(Imm0)
355             .addImm(12);
356         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
357             .addReg(NewTmpReg)
358             .addImm(Imm1)
359             .addImm(0);
360       });
361 }
362 
363 template <typename T>
364 bool AArch64MIPeepholeOpt::visitADDSSUBS(
365     OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
366   // Try the same transformation as ADDSUB but with additional requirement
367   // that the condition code usages are only for Equal and Not Equal
368   return splitTwoPartImm<T>(
369       MI,
370       [PosOpcs, NegOpcs, &MI, &TRI = TRI,
371        &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
372                    T &Imm1) -> std::optional<OpcodePair> {
373         OpcodePair OP;
374         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
375           OP = PosOpcs;
376         else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
377           OP = NegOpcs;
378         else
379           return std::nullopt;
380         // Check conditional uses last since it is expensive for scanning
381         // proceeding instructions
382         MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
383         std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
384         if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
385           return std::nullopt;
386         return OP;
387       },
388       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
389                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
390                    Register NewDstReg) {
391         DebugLoc DL = MI.getDebugLoc();
392         MachineBasicBlock *MBB = MI.getParent();
393         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
394             .addReg(SrcReg)
395             .addImm(Imm0)
396             .addImm(12);
397         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
398             .addReg(NewTmpReg)
399             .addImm(Imm1)
400             .addImm(0);
401       });
402 }
403 
404 // Checks if the corresponding MOV immediate instruction is applicable for
405 // this peephole optimization.
406 bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
407                                             MachineInstr *&MovMI,
408                                             MachineInstr *&SubregToRegMI) {
409   // Check whether current MBB is in loop and the AND is loop invariant.
410   MachineBasicBlock *MBB = MI.getParent();
411   MachineLoop *L = MLI->getLoopFor(MBB);
412   if (L && !L->isLoopInvariant(MI))
413     return false;
414 
415   // Check whether current MI's operand is MOV with immediate.
416   MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
417   if (!MovMI)
418     return false;
419 
420   // If it is SUBREG_TO_REG, check its operand.
421   SubregToRegMI = nullptr;
422   if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
423     SubregToRegMI = MovMI;
424     MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
425     if (!MovMI)
426       return false;
427   }
428 
429   if (MovMI->getOpcode() != AArch64::MOVi32imm &&
430       MovMI->getOpcode() != AArch64::MOVi64imm)
431     return false;
432 
433   // If the MOV has multiple uses, do not split the immediate because it causes
434   // more instructions.
435   if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
436     return false;
437   if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
438     return false;
439 
440   // It is OK to perform this peephole optimization.
441   return true;
442 }
443 
444 template <typename T>
445 bool AArch64MIPeepholeOpt::splitTwoPartImm(
446     MachineInstr &MI,
447     SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
448   unsigned RegSize = sizeof(T) * 8;
449   assert((RegSize == 32 || RegSize == 64) &&
450          "Invalid RegSize for legal immediate peephole optimization");
451 
452   // Perform several essential checks against current MI.
453   MachineInstr *MovMI, *SubregToRegMI;
454   if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
455     return false;
456 
457   // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
458   T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
459   // For the 32 bit form of instruction, the upper 32 bits of the destination
460   // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
461   // of Imm to zero. This is essential if the Immediate value was a negative
462   // number since it was sign extended when we assign to the 64-bit Imm.
463   if (SubregToRegMI)
464     Imm &= 0xFFFFFFFF;
465   OpcodePair Opcode;
466   if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
467     Opcode = *R;
468   else
469     return false;
470 
471   // Create new MIs using the first and second opcodes. Opcodes might differ for
472   // flag setting operations that should only set flags on second instruction.
473   // NewTmpReg = Opcode.first SrcReg Imm0
474   // NewDstReg = Opcode.second NewTmpReg Imm1
475 
476   // Determine register classes for destinations and register operands
477   MachineFunction *MF = MI.getMF();
478   const TargetRegisterClass *FirstInstrDstRC =
479       TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
480   const TargetRegisterClass *FirstInstrOperandRC =
481       TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
482   const TargetRegisterClass *SecondInstrDstRC =
483       (Opcode.first == Opcode.second)
484           ? FirstInstrDstRC
485           : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
486   const TargetRegisterClass *SecondInstrOperandRC =
487       (Opcode.first == Opcode.second)
488           ? FirstInstrOperandRC
489           : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
490 
491   // Get old registers destinations and new register destinations
492   Register DstReg = MI.getOperand(0).getReg();
493   Register SrcReg = MI.getOperand(1).getReg();
494   Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
495   // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
496   // reuse that same destination register.
497   Register NewDstReg = DstReg.isVirtual()
498                            ? MRI->createVirtualRegister(SecondInstrDstRC)
499                            : DstReg;
500 
501   // Constrain registers based on their new uses
502   MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
503   MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
504   if (DstReg != NewDstReg)
505     MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
506 
507   // Call the delegating operation to build the instruction
508   BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
509 
510   // replaceRegWith changes MI's definition register. Keep it for SSA form until
511   // deleting MI. Only if we made a new destination register.
512   if (DstReg != NewDstReg) {
513     MRI->replaceRegWith(DstReg, NewDstReg);
514     MI.getOperand(0).setReg(DstReg);
515   }
516 
517   // Record the MIs need to be removed.
518   MI.eraseFromParent();
519   if (SubregToRegMI)
520     SubregToRegMI->eraseFromParent();
521   MovMI->eraseFromParent();
522 
523   return true;
524 }
525 
526 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
527   if (skipFunction(MF.getFunction()))
528     return false;
529 
530   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
531   TRI = static_cast<const AArch64RegisterInfo *>(
532       MF.getSubtarget().getRegisterInfo());
533   MLI = &getAnalysis<MachineLoopInfo>();
534   MRI = &MF.getRegInfo();
535 
536   assert(MRI->isSSA() && "Expected to be run on SSA form!");
537 
538   bool Changed = false;
539 
540   for (MachineBasicBlock &MBB : MF) {
541     for (MachineInstr &MI : make_early_inc_range(MBB)) {
542       switch (MI.getOpcode()) {
543       default:
544         break;
545       case AArch64::INSERT_SUBREG:
546         Changed = visitINSERT(MI);
547         break;
548       case AArch64::ANDWrr:
549         Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
550         break;
551       case AArch64::ANDXrr:
552         Changed = visitAND<uint64_t>(AArch64::ANDXri, MI);
553         break;
554       case AArch64::ORRWrs:
555         Changed = visitORR(MI);
556         break;
557       case AArch64::ADDWrr:
558         Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
559         break;
560       case AArch64::SUBWrr:
561         Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
562         break;
563       case AArch64::ADDXrr:
564         Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
565         break;
566       case AArch64::SUBXrr:
567         Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
568         break;
569       case AArch64::ADDSWrr:
570         Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
571                                           {AArch64::SUBWri, AArch64::SUBSWri},
572                                           MI);
573         break;
574       case AArch64::SUBSWrr:
575         Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
576                                           {AArch64::ADDWri, AArch64::ADDSWri},
577                                           MI);
578         break;
579       case AArch64::ADDSXrr:
580         Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
581                                           {AArch64::SUBXri, AArch64::SUBSXri},
582                                           MI);
583         break;
584       case AArch64::SUBSXrr:
585         Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
586                                           {AArch64::ADDXri, AArch64::ADDSXri},
587                                           MI);
588         break;
589       }
590     }
591   }
592 
593   return Changed;
594 }
595 
596 FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
597   return new AArch64MIPeepholeOpt();
598 }
599