xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (revision 3dd5524264095ed8612c28908e13f80668eff2f9)
1 //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs below peephole optimizations on MIR level.
10 //
11 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12 //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13 //
14 // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15 //    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16 //
17 // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18 //    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19 //
20 //    The mov pseudo instruction could be expanded to multiple mov instructions
21 //    later. In this case, we could try to split the constant  operand of mov
22 //    instruction into two immediates which can be directly encoded into
23 //    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24 //    multiple `mov` + `and/add/sub` instructions.
25 //
26 // 4. Remove redundant ORRWrs which is generated by zero-extend.
27 //
28 //    %3:gpr32 = ORRWrs $wzr, %2, 0
29 //    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30 //
31 //    If AArch64's 32-bit form of instruction defines the source operand of
32 //    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33 //    operand are set to zero.
34 //
35 //===----------------------------------------------------------------------===//
36 
37 #include "AArch64ExpandImm.h"
38 #include "AArch64InstrInfo.h"
39 #include "MCTargetDesc/AArch64AddressingModes.h"
40 #include "llvm/ADT/Optional.h"
41 #include "llvm/ADT/SetVector.h"
42 #include "llvm/CodeGen/MachineDominators.h"
43 #include "llvm/CodeGen/MachineLoopInfo.h"
44 
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "aarch64-mi-peephole-opt"
48 
49 namespace {
50 
51 struct AArch64MIPeepholeOpt : public MachineFunctionPass {
52   static char ID;
53 
54   AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
55     initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
56   }
57 
58   const AArch64InstrInfo *TII;
59   const AArch64RegisterInfo *TRI;
60   MachineLoopInfo *MLI;
61   MachineRegisterInfo *MRI;
62 
63   using OpcodePair = std::pair<unsigned, unsigned>;
64   template <typename T>
65   using SplitAndOpcFunc =
66       std::function<Optional<OpcodePair>(T, unsigned, T &, T &)>;
67   using BuildMIFunc =
68       std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
69                          Register, Register, Register)>;
70 
71   /// For instructions where an immediate operand could be split into two
72   /// separate immediate instructions, use the splitTwoPartImm two handle the
73   /// optimization.
74   ///
75   /// To implement, the following function types must be passed to
76   /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
77   /// splitting the immediate is valid and returns the associated new opcode. A
78   /// BuildMIFunc must be implemented to build the two immediate instructions.
79   ///
80   /// Example Pattern (where IMM would require 2+ MOV instructions):
81   ///     %dst = <Instr>rr %src IMM [...]
82   /// becomes:
83   ///     %tmp = <Instr>ri %src (encode half IMM) [...]
84   ///     %dst = <Instr>ri %tmp (encode half IMM) [...]
85   template <typename T>
86   bool splitTwoPartImm(MachineInstr &MI,
87                        SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
88 
89   bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
90                         MachineInstr *&SubregToRegMI);
91 
92   template <typename T>
93   bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
94   template <typename T>
95   bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
96 
97   template <typename T>
98   bool visitAND(unsigned Opc, MachineInstr &MI);
99   bool visitORR(MachineInstr &MI);
100   bool runOnMachineFunction(MachineFunction &MF) override;
101 
102   StringRef getPassName() const override {
103     return "AArch64 MI Peephole Optimization pass";
104   }
105 
106   void getAnalysisUsage(AnalysisUsage &AU) const override {
107     AU.setPreservesCFG();
108     AU.addRequired<MachineLoopInfo>();
109     MachineFunctionPass::getAnalysisUsage(AU);
110   }
111 };
112 
113 char AArch64MIPeepholeOpt::ID = 0;
114 
115 } // end anonymous namespace
116 
117 INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
118                 "AArch64 MI Peephole Optimization", false, false)
119 
120 template <typename T>
121 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
122   T UImm = static_cast<T>(Imm);
123   if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
124     return false;
125 
126   // If this immediate can be handled by one instruction, do not split it.
127   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
128   AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
129   if (Insn.size() == 1)
130     return false;
131 
132   // The bitmask immediate consists of consecutive ones.  Let's say there is
133   // constant 0b00000000001000000000010000000000 which does not consist of
134   // consecutive ones. We can split it in to two bitmask immediate like
135   // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
136   // If we do AND with these two bitmask immediate, we can see original one.
137   unsigned LowestBitSet = countTrailingZeros(UImm);
138   unsigned HighestBitSet = Log2_64(UImm);
139 
140   // Create a mask which is filled with one from the position of lowest bit set
141   // to the position of highest bit set.
142   T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
143               (static_cast<T>(1) << LowestBitSet);
144   // Create a mask which is filled with one outside the position of lowest bit
145   // set and the position of highest bit set.
146   T NewImm2 = UImm | ~NewImm1;
147 
148   // If the split value is not valid bitmask immediate, do not split this
149   // constant.
150   if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
151     return false;
152 
153   Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
154   Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
155   return true;
156 }
157 
158 template <typename T>
159 bool AArch64MIPeepholeOpt::visitAND(
160     unsigned Opc, MachineInstr &MI) {
161   // Try below transformation.
162   //
163   // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
164   // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
165   //
166   // The mov pseudo instruction could be expanded to multiple mov instructions
167   // later. Let's try to split the constant operand of mov instruction into two
168   // bitmask immediates. It makes only two AND instructions intead of multiple
169   // mov + and instructions.
170 
171   return splitTwoPartImm<T>(
172       MI,
173       [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
174         if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
175           return std::make_pair(Opc, Opc);
176         return None;
177       },
178       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
179                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
180                    Register NewDstReg) {
181         DebugLoc DL = MI.getDebugLoc();
182         MachineBasicBlock *MBB = MI.getParent();
183         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
184             .addReg(SrcReg)
185             .addImm(Imm0);
186         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
187             .addReg(NewTmpReg)
188             .addImm(Imm1);
189       });
190 }
191 
192 bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
193   // Check this ORR comes from below zero-extend pattern.
194   //
195   // def : Pat<(i64 (zext GPR32:$src)),
196   //           (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
197   if (MI.getOperand(3).getImm() != 0)
198     return false;
199 
200   if (MI.getOperand(1).getReg() != AArch64::WZR)
201     return false;
202 
203   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
204   if (!SrcMI)
205     return false;
206 
207   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
208   //
209   // When you use the 32-bit form of an instruction, the upper 32 bits of the
210   // source registers are ignored and the upper 32 bits of the destination
211   // register are set to zero.
212   //
213   // If AArch64's 32-bit form of instruction defines the source operand of
214   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
215   // real AArch64 instruction and if it is not, do not process the opcode
216   // conservatively.
217   if (SrcMI->getOpcode() == TargetOpcode::COPY &&
218       SrcMI->getOperand(1).getReg().isVirtual()) {
219     const TargetRegisterClass *RC =
220         MRI->getRegClass(SrcMI->getOperand(1).getReg());
221 
222     // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
223     // that the upper bits are zero.
224     if (RC != &AArch64::FPR32RegClass &&
225         ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
226          SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
227       return false;
228     Register CpySrc = SrcMI->getOperand(1).getReg();
229     if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
230       CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
231       BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
232               TII->get(TargetOpcode::COPY), CpySrc)
233           .add(SrcMI->getOperand(1));
234     }
235     BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
236             TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
237         .addReg(CpySrc);
238     SrcMI->eraseFromParent();
239   }
240   else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
241     return false;
242 
243   Register DefReg = MI.getOperand(0).getReg();
244   Register SrcReg = MI.getOperand(2).getReg();
245   MRI->replaceRegWith(DefReg, SrcReg);
246   MRI->clearKillFlags(SrcReg);
247   LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
248   MI.eraseFromParent();
249 
250   return true;
251 }
252 
253 template <typename T>
254 static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
255   // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
256   // imm0 and imm1 are non-zero 12-bit unsigned int.
257   if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
258       (Imm & ~static_cast<T>(0xffffff)) != 0)
259     return false;
260 
261   // The immediate can not be composed via a single instruction.
262   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
263   AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
264   if (Insn.size() == 1)
265     return false;
266 
267   // Split Imm into (Imm0 << 12) + Imm1;
268   Imm0 = (Imm >> 12) & 0xfff;
269   Imm1 = Imm & 0xfff;
270   return true;
271 }
272 
273 template <typename T>
274 bool AArch64MIPeepholeOpt::visitADDSUB(
275     unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
276   // Try below transformation.
277   //
278   // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
279   // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
280   //
281   // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
282   // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
283   //
284   // The mov pseudo instruction could be expanded to multiple mov instructions
285   // later. Let's try to split the constant operand of mov instruction into two
286   // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
287   // multiple `mov` + `and/sub` instructions.
288 
289   return splitTwoPartImm<T>(
290       MI,
291       [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
292                        T &Imm1) -> Optional<OpcodePair> {
293         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
294           return std::make_pair(PosOpc, PosOpc);
295         if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
296           return std::make_pair(NegOpc, NegOpc);
297         return None;
298       },
299       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
300                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
301                    Register NewDstReg) {
302         DebugLoc DL = MI.getDebugLoc();
303         MachineBasicBlock *MBB = MI.getParent();
304         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
305             .addReg(SrcReg)
306             .addImm(Imm0)
307             .addImm(12);
308         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
309             .addReg(NewTmpReg)
310             .addImm(Imm1)
311             .addImm(0);
312       });
313 }
314 
315 template <typename T>
316 bool AArch64MIPeepholeOpt::visitADDSSUBS(
317     OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
318   // Try the same transformation as ADDSUB but with additional requirement
319   // that the condition code usages are only for Equal and Not Equal
320   return splitTwoPartImm<T>(
321       MI,
322       [PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI](
323           T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
324         OpcodePair OP;
325         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
326           OP = PosOpcs;
327         else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
328           OP = NegOpcs;
329         else
330           return None;
331         // Check conditional uses last since it is expensive for scanning
332         // proceeding instructions
333         MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
334         Optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
335         if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
336           return None;
337         return OP;
338       },
339       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
340                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
341                    Register NewDstReg) {
342         DebugLoc DL = MI.getDebugLoc();
343         MachineBasicBlock *MBB = MI.getParent();
344         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
345             .addReg(SrcReg)
346             .addImm(Imm0)
347             .addImm(12);
348         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
349             .addReg(NewTmpReg)
350             .addImm(Imm1)
351             .addImm(0);
352       });
353 }
354 
355 // Checks if the corresponding MOV immediate instruction is applicable for
356 // this peephole optimization.
357 bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
358                                             MachineInstr *&MovMI,
359                                             MachineInstr *&SubregToRegMI) {
360   // Check whether current MBB is in loop and the AND is loop invariant.
361   MachineBasicBlock *MBB = MI.getParent();
362   MachineLoop *L = MLI->getLoopFor(MBB);
363   if (L && !L->isLoopInvariant(MI))
364     return false;
365 
366   // Check whether current MI's operand is MOV with immediate.
367   MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
368   if (!MovMI)
369     return false;
370 
371   // If it is SUBREG_TO_REG, check its operand.
372   SubregToRegMI = nullptr;
373   if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
374     SubregToRegMI = MovMI;
375     MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
376     if (!MovMI)
377       return false;
378   }
379 
380   if (MovMI->getOpcode() != AArch64::MOVi32imm &&
381       MovMI->getOpcode() != AArch64::MOVi64imm)
382     return false;
383 
384   // If the MOV has multiple uses, do not split the immediate because it causes
385   // more instructions.
386   if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
387     return false;
388   if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
389     return false;
390 
391   // It is OK to perform this peephole optimization.
392   return true;
393 }
394 
395 template <typename T>
396 bool AArch64MIPeepholeOpt::splitTwoPartImm(
397     MachineInstr &MI,
398     SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
399   unsigned RegSize = sizeof(T) * 8;
400   assert((RegSize == 32 || RegSize == 64) &&
401          "Invalid RegSize for legal immediate peephole optimization");
402 
403   // Perform several essential checks against current MI.
404   MachineInstr *MovMI, *SubregToRegMI;
405   if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
406     return false;
407 
408   // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
409   T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
410   // For the 32 bit form of instruction, the upper 32 bits of the destination
411   // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
412   // of Imm to zero. This is essential if the Immediate value was a negative
413   // number since it was sign extended when we assign to the 64-bit Imm.
414   if (SubregToRegMI)
415     Imm &= 0xFFFFFFFF;
416   OpcodePair Opcode;
417   if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
418     Opcode = *R;
419   else
420     return false;
421 
422   // Create new MIs using the first and second opcodes. Opcodes might differ for
423   // flag setting operations that should only set flags on second instruction.
424   // NewTmpReg = Opcode.first SrcReg Imm0
425   // NewDstReg = Opcode.second NewTmpReg Imm1
426 
427   // Determine register classes for destinations and register operands
428   MachineFunction *MF = MI.getMF();
429   const TargetRegisterClass *FirstInstrDstRC =
430       TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
431   const TargetRegisterClass *FirstInstrOperandRC =
432       TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
433   const TargetRegisterClass *SecondInstrDstRC =
434       (Opcode.first == Opcode.second)
435           ? FirstInstrDstRC
436           : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
437   const TargetRegisterClass *SecondInstrOperandRC =
438       (Opcode.first == Opcode.second)
439           ? FirstInstrOperandRC
440           : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
441 
442   // Get old registers destinations and new register destinations
443   Register DstReg = MI.getOperand(0).getReg();
444   Register SrcReg = MI.getOperand(1).getReg();
445   Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
446   // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
447   // reuse that same destination register.
448   Register NewDstReg = DstReg.isVirtual()
449                            ? MRI->createVirtualRegister(SecondInstrDstRC)
450                            : DstReg;
451 
452   // Constrain registers based on their new uses
453   MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
454   MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
455   if (DstReg != NewDstReg)
456     MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
457 
458   // Call the delegating operation to build the instruction
459   BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
460 
461   // replaceRegWith changes MI's definition register. Keep it for SSA form until
462   // deleting MI. Only if we made a new destination register.
463   if (DstReg != NewDstReg) {
464     MRI->replaceRegWith(DstReg, NewDstReg);
465     MI.getOperand(0).setReg(DstReg);
466   }
467 
468   // Record the MIs need to be removed.
469   MI.eraseFromParent();
470   if (SubregToRegMI)
471     SubregToRegMI->eraseFromParent();
472   MovMI->eraseFromParent();
473 
474   return true;
475 }
476 
477 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
478   if (skipFunction(MF.getFunction()))
479     return false;
480 
481   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
482   TRI = static_cast<const AArch64RegisterInfo *>(
483       MF.getSubtarget().getRegisterInfo());
484   MLI = &getAnalysis<MachineLoopInfo>();
485   MRI = &MF.getRegInfo();
486 
487   assert(MRI->isSSA() && "Expected to be run on SSA form!");
488 
489   bool Changed = false;
490 
491   for (MachineBasicBlock &MBB : MF) {
492     for (MachineInstr &MI : make_early_inc_range(MBB)) {
493       switch (MI.getOpcode()) {
494       default:
495         break;
496       case AArch64::ANDWrr:
497         Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
498         break;
499       case AArch64::ANDXrr:
500         Changed = visitAND<uint64_t>(AArch64::ANDXri, MI);
501         break;
502       case AArch64::ORRWrs:
503         Changed = visitORR(MI);
504         break;
505       case AArch64::ADDWrr:
506         Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
507         break;
508       case AArch64::SUBWrr:
509         Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
510         break;
511       case AArch64::ADDXrr:
512         Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
513         break;
514       case AArch64::SUBXrr:
515         Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
516         break;
517       case AArch64::ADDSWrr:
518         Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
519                                           {AArch64::SUBWri, AArch64::SUBSWri},
520                                           MI);
521         break;
522       case AArch64::SUBSWrr:
523         Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
524                                           {AArch64::ADDWri, AArch64::ADDSWri},
525                                           MI);
526         break;
527       case AArch64::ADDSXrr:
528         Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
529                                           {AArch64::SUBXri, AArch64::SUBSXri},
530                                           MI);
531         break;
532       case AArch64::SUBSXrr:
533         Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
534                                           {AArch64::ADDXri, AArch64::ADDSXri},
535                                           MI);
536         break;
537       }
538     }
539   }
540 
541   return Changed;
542 }
543 
544 FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
545   return new AArch64MIPeepholeOpt();
546 }
547