xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1349cc55cSDimitry Andric //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric //
9349cc55cSDimitry Andric // This pass performs below peephole optimizations on MIR level.
10349cc55cSDimitry Andric //
11349cc55cSDimitry Andric // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12349cc55cSDimitry Andric //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13349cc55cSDimitry Andric //
1404eeddc0SDimitry Andric // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
1504eeddc0SDimitry Andric //    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
1604eeddc0SDimitry Andric //
1704eeddc0SDimitry Andric // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
1804eeddc0SDimitry Andric //    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
1904eeddc0SDimitry Andric //
20349cc55cSDimitry Andric //    The mov pseudo instruction could be expanded to multiple mov instructions
21349cc55cSDimitry Andric //    later. In this case, we could try to split the constant  operand of mov
2204eeddc0SDimitry Andric //    instruction into two immediates which can be directly encoded into
2304eeddc0SDimitry Andric //    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
2404eeddc0SDimitry Andric //    multiple `mov` + `and/add/sub` instructions.
25349cc55cSDimitry Andric //
2604eeddc0SDimitry Andric // 4. Remove redundant ORRWrs which is generated by zero-extend.
27349cc55cSDimitry Andric //
28349cc55cSDimitry Andric //    %3:gpr32 = ORRWrs $wzr, %2, 0
29349cc55cSDimitry Andric //    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30349cc55cSDimitry Andric //
31349cc55cSDimitry Andric //    If AArch64's 32-bit form of instruction defines the source operand of
32349cc55cSDimitry Andric //    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33349cc55cSDimitry Andric //    operand are set to zero.
34349cc55cSDimitry Andric //
35bdd1243dSDimitry Andric // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36bdd1243dSDimitry Andric //     ==> %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
37bdd1243dSDimitry Andric //
3806c3fb27SDimitry Andric // 6. %intermediate:gpr32 = COPY %src:fpr128
3906c3fb27SDimitry Andric //    %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
4006c3fb27SDimitry Andric //     ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
4106c3fb27SDimitry Andric //
4206c3fb27SDimitry Andric //    In cases where a source FPR is copied to a GPR in order to be copied
4306c3fb27SDimitry Andric //    to a destination FPR, we can directly copy the values between the FPRs,
4406c3fb27SDimitry Andric //    eliminating the use of the Integer unit. When we match a pattern of
4506c3fb27SDimitry Andric //    INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
4606c3fb27SDimitry Andric //    source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
4706c3fb27SDimitry Andric //    instructions.
4806c3fb27SDimitry Andric //
4906c3fb27SDimitry Andric // 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
5006c3fb27SDimitry Andric //    64-bits. For example,
5106c3fb27SDimitry Andric //
5206c3fb27SDimitry Andric //   %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
5306c3fb27SDimitry Andric //   %2:fpr64 = MOVID 0
5406c3fb27SDimitry Andric //   %4:fpr128 = IMPLICIT_DEF
5506c3fb27SDimitry Andric //   %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
5606c3fb27SDimitry Andric //   %6:fpr128 = IMPLICIT_DEF
5706c3fb27SDimitry Andric //   %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
5806c3fb27SDimitry Andric //   %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
5906c3fb27SDimitry Andric //   ==>
6006c3fb27SDimitry Andric //   %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
6106c3fb27SDimitry Andric //   %6:fpr128 = IMPLICIT_DEF
6206c3fb27SDimitry Andric //   %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
6306c3fb27SDimitry Andric //
64349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
65349cc55cSDimitry Andric 
66349cc55cSDimitry Andric #include "AArch64ExpandImm.h"
67349cc55cSDimitry Andric #include "AArch64InstrInfo.h"
68349cc55cSDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
69349cc55cSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
70349cc55cSDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
71349cc55cSDimitry Andric 
72349cc55cSDimitry Andric using namespace llvm;
73349cc55cSDimitry Andric 
74349cc55cSDimitry Andric #define DEBUG_TYPE "aarch64-mi-peephole-opt"
75349cc55cSDimitry Andric 
76349cc55cSDimitry Andric namespace {
77349cc55cSDimitry Andric 
78349cc55cSDimitry Andric struct AArch64MIPeepholeOpt : public MachineFunctionPass {
79349cc55cSDimitry Andric   static char ID;
80349cc55cSDimitry Andric 
AArch64MIPeepholeOpt__anona145c6620111::AArch64MIPeepholeOpt81349cc55cSDimitry Andric   AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
82349cc55cSDimitry Andric     initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
83349cc55cSDimitry Andric   }
84349cc55cSDimitry Andric 
85349cc55cSDimitry Andric   const AArch64InstrInfo *TII;
8604eeddc0SDimitry Andric   const AArch64RegisterInfo *TRI;
87349cc55cSDimitry Andric   MachineLoopInfo *MLI;
88349cc55cSDimitry Andric   MachineRegisterInfo *MRI;
89349cc55cSDimitry Andric 
9081ad6265SDimitry Andric   using OpcodePair = std::pair<unsigned, unsigned>;
91349cc55cSDimitry Andric   template <typename T>
9204eeddc0SDimitry Andric   using SplitAndOpcFunc =
93bdd1243dSDimitry Andric       std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
9404eeddc0SDimitry Andric   using BuildMIFunc =
9581ad6265SDimitry Andric       std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
9681ad6265SDimitry Andric                          Register, Register, Register)>;
9704eeddc0SDimitry Andric 
9804eeddc0SDimitry Andric   /// For instructions where an immediate operand could be split into two
9904eeddc0SDimitry Andric   /// separate immediate instructions, use the splitTwoPartImm two handle the
10004eeddc0SDimitry Andric   /// optimization.
10104eeddc0SDimitry Andric   ///
10204eeddc0SDimitry Andric   /// To implement, the following function types must be passed to
10304eeddc0SDimitry Andric   /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
10404eeddc0SDimitry Andric   /// splitting the immediate is valid and returns the associated new opcode. A
10504eeddc0SDimitry Andric   /// BuildMIFunc must be implemented to build the two immediate instructions.
10604eeddc0SDimitry Andric   ///
10704eeddc0SDimitry Andric   /// Example Pattern (where IMM would require 2+ MOV instructions):
10804eeddc0SDimitry Andric   ///     %dst = <Instr>rr %src IMM [...]
10904eeddc0SDimitry Andric   /// becomes:
11004eeddc0SDimitry Andric   ///     %tmp = <Instr>ri %src (encode half IMM) [...]
11104eeddc0SDimitry Andric   ///     %dst = <Instr>ri %tmp (encode half IMM) [...]
11204eeddc0SDimitry Andric   template <typename T>
11304eeddc0SDimitry Andric   bool splitTwoPartImm(MachineInstr &MI,
11404eeddc0SDimitry Andric                        SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
11504eeddc0SDimitry Andric 
11604eeddc0SDimitry Andric   bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
11704eeddc0SDimitry Andric                         MachineInstr *&SubregToRegMI);
11804eeddc0SDimitry Andric 
11904eeddc0SDimitry Andric   template <typename T>
12081ad6265SDimitry Andric   bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
12104eeddc0SDimitry Andric   template <typename T>
12281ad6265SDimitry Andric   bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
12381ad6265SDimitry Andric 
12481ad6265SDimitry Andric   template <typename T>
12581ad6265SDimitry Andric   bool visitAND(unsigned Opc, MachineInstr &MI);
12681ad6265SDimitry Andric   bool visitORR(MachineInstr &MI);
127bdd1243dSDimitry Andric   bool visitINSERT(MachineInstr &MI);
12806c3fb27SDimitry Andric   bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
12906c3fb27SDimitry Andric   bool visitINSvi64lane(MachineInstr &MI);
130*0fca6ea1SDimitry Andric   bool visitFMOVDr(MachineInstr &MI);
131*0fca6ea1SDimitry Andric   bool visitCopy(MachineInstr &MI);
132349cc55cSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
133349cc55cSDimitry Andric 
getPassName__anona145c6620111::AArch64MIPeepholeOpt134349cc55cSDimitry Andric   StringRef getPassName() const override {
135349cc55cSDimitry Andric     return "AArch64 MI Peephole Optimization pass";
136349cc55cSDimitry Andric   }
137349cc55cSDimitry Andric 
getAnalysisUsage__anona145c6620111::AArch64MIPeepholeOpt138349cc55cSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
139349cc55cSDimitry Andric     AU.setPreservesCFG();
140*0fca6ea1SDimitry Andric     AU.addRequired<MachineLoopInfoWrapperPass>();
141349cc55cSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
142349cc55cSDimitry Andric   }
143349cc55cSDimitry Andric };
144349cc55cSDimitry Andric 
145349cc55cSDimitry Andric char AArch64MIPeepholeOpt::ID = 0;
146349cc55cSDimitry Andric 
147349cc55cSDimitry Andric } // end anonymous namespace
148349cc55cSDimitry Andric 
149349cc55cSDimitry Andric INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
150349cc55cSDimitry Andric                 "AArch64 MI Peephole Optimization", false, false)
151349cc55cSDimitry Andric 
152349cc55cSDimitry Andric template <typename T>
splitBitmaskImm(T Imm,unsigned RegSize,T & Imm1Enc,T & Imm2Enc)153349cc55cSDimitry Andric static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
154349cc55cSDimitry Andric   T UImm = static_cast<T>(Imm);
155349cc55cSDimitry Andric   if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
156349cc55cSDimitry Andric     return false;
157349cc55cSDimitry Andric 
158349cc55cSDimitry Andric   // If this immediate can be handled by one instruction, do not split it.
159349cc55cSDimitry Andric   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
160349cc55cSDimitry Andric   AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
161349cc55cSDimitry Andric   if (Insn.size() == 1)
162349cc55cSDimitry Andric     return false;
163349cc55cSDimitry Andric 
164349cc55cSDimitry Andric   // The bitmask immediate consists of consecutive ones.  Let's say there is
165349cc55cSDimitry Andric   // constant 0b00000000001000000000010000000000 which does not consist of
166349cc55cSDimitry Andric   // consecutive ones. We can split it in to two bitmask immediate like
167349cc55cSDimitry Andric   // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
168349cc55cSDimitry Andric   // If we do AND with these two bitmask immediate, we can see original one.
16906c3fb27SDimitry Andric   unsigned LowestBitSet = llvm::countr_zero(UImm);
170349cc55cSDimitry Andric   unsigned HighestBitSet = Log2_64(UImm);
171349cc55cSDimitry Andric 
172349cc55cSDimitry Andric   // Create a mask which is filled with one from the position of lowest bit set
173349cc55cSDimitry Andric   // to the position of highest bit set.
174349cc55cSDimitry Andric   T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
175349cc55cSDimitry Andric               (static_cast<T>(1) << LowestBitSet);
176349cc55cSDimitry Andric   // Create a mask which is filled with one outside the position of lowest bit
177349cc55cSDimitry Andric   // set and the position of highest bit set.
178349cc55cSDimitry Andric   T NewImm2 = UImm | ~NewImm1;
179349cc55cSDimitry Andric 
180349cc55cSDimitry Andric   // If the split value is not valid bitmask immediate, do not split this
181349cc55cSDimitry Andric   // constant.
182349cc55cSDimitry Andric   if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
183349cc55cSDimitry Andric     return false;
184349cc55cSDimitry Andric 
185349cc55cSDimitry Andric   Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
186349cc55cSDimitry Andric   Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
187349cc55cSDimitry Andric   return true;
188349cc55cSDimitry Andric }
189349cc55cSDimitry Andric 
190349cc55cSDimitry Andric template <typename T>
visitAND(unsigned Opc,MachineInstr & MI)191349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::visitAND(
19281ad6265SDimitry Andric     unsigned Opc, MachineInstr &MI) {
193349cc55cSDimitry Andric   // Try below transformation.
194349cc55cSDimitry Andric   //
195349cc55cSDimitry Andric   // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
196349cc55cSDimitry Andric   // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
197349cc55cSDimitry Andric   //
198349cc55cSDimitry Andric   // The mov pseudo instruction could be expanded to multiple mov instructions
199349cc55cSDimitry Andric   // later. Let's try to split the constant operand of mov instruction into two
200349cc55cSDimitry Andric   // bitmask immediates. It makes only two AND instructions intead of multiple
201349cc55cSDimitry Andric   // mov + and instructions.
202349cc55cSDimitry Andric 
20304eeddc0SDimitry Andric   return splitTwoPartImm<T>(
20481ad6265SDimitry Andric       MI,
205bdd1243dSDimitry Andric       [Opc](T Imm, unsigned RegSize, T &Imm0,
206bdd1243dSDimitry Andric             T &Imm1) -> std::optional<OpcodePair> {
20704eeddc0SDimitry Andric         if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
20881ad6265SDimitry Andric           return std::make_pair(Opc, Opc);
209bdd1243dSDimitry Andric         return std::nullopt;
21004eeddc0SDimitry Andric       },
21181ad6265SDimitry Andric       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
21204eeddc0SDimitry Andric                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
21304eeddc0SDimitry Andric                    Register NewDstReg) {
214349cc55cSDimitry Andric         DebugLoc DL = MI.getDebugLoc();
21504eeddc0SDimitry Andric         MachineBasicBlock *MBB = MI.getParent();
21681ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
217349cc55cSDimitry Andric             .addReg(SrcReg)
21804eeddc0SDimitry Andric             .addImm(Imm0);
21981ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
220349cc55cSDimitry Andric             .addReg(NewTmpReg)
22104eeddc0SDimitry Andric             .addImm(Imm1);
22204eeddc0SDimitry Andric       });
223349cc55cSDimitry Andric }
224349cc55cSDimitry Andric 
visitORR(MachineInstr & MI)22581ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
226349cc55cSDimitry Andric   // Check this ORR comes from below zero-extend pattern.
227349cc55cSDimitry Andric   //
228349cc55cSDimitry Andric   // def : Pat<(i64 (zext GPR32:$src)),
229349cc55cSDimitry Andric   //           (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
230349cc55cSDimitry Andric   if (MI.getOperand(3).getImm() != 0)
231349cc55cSDimitry Andric     return false;
232349cc55cSDimitry Andric 
233349cc55cSDimitry Andric   if (MI.getOperand(1).getReg() != AArch64::WZR)
234349cc55cSDimitry Andric     return false;
235349cc55cSDimitry Andric 
236349cc55cSDimitry Andric   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
237349cc55cSDimitry Andric   if (!SrcMI)
238349cc55cSDimitry Andric     return false;
239349cc55cSDimitry Andric 
240349cc55cSDimitry Andric   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
241349cc55cSDimitry Andric   //
242349cc55cSDimitry Andric   // When you use the 32-bit form of an instruction, the upper 32 bits of the
243349cc55cSDimitry Andric   // source registers are ignored and the upper 32 bits of the destination
244349cc55cSDimitry Andric   // register are set to zero.
245349cc55cSDimitry Andric   //
246349cc55cSDimitry Andric   // If AArch64's 32-bit form of instruction defines the source operand of
247349cc55cSDimitry Andric   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
248349cc55cSDimitry Andric   // real AArch64 instruction and if it is not, do not process the opcode
249349cc55cSDimitry Andric   // conservatively.
25081ad6265SDimitry Andric   if (SrcMI->getOpcode() == TargetOpcode::COPY &&
25181ad6265SDimitry Andric       SrcMI->getOperand(1).getReg().isVirtual()) {
25281ad6265SDimitry Andric     const TargetRegisterClass *RC =
25381ad6265SDimitry Andric         MRI->getRegClass(SrcMI->getOperand(1).getReg());
25481ad6265SDimitry Andric 
25581ad6265SDimitry Andric     // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
25681ad6265SDimitry Andric     // that the upper bits are zero.
25781ad6265SDimitry Andric     if (RC != &AArch64::FPR32RegClass &&
25881ad6265SDimitry Andric         ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
25981ad6265SDimitry Andric          SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
26081ad6265SDimitry Andric       return false;
26181ad6265SDimitry Andric     Register CpySrc = SrcMI->getOperand(1).getReg();
26281ad6265SDimitry Andric     if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
26381ad6265SDimitry Andric       CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
26481ad6265SDimitry Andric       BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
26581ad6265SDimitry Andric               TII->get(TargetOpcode::COPY), CpySrc)
26681ad6265SDimitry Andric           .add(SrcMI->getOperand(1));
26781ad6265SDimitry Andric     }
26881ad6265SDimitry Andric     BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
26981ad6265SDimitry Andric             TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
27081ad6265SDimitry Andric         .addReg(CpySrc);
27181ad6265SDimitry Andric     SrcMI->eraseFromParent();
27281ad6265SDimitry Andric   }
27381ad6265SDimitry Andric   else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
274349cc55cSDimitry Andric     return false;
275349cc55cSDimitry Andric 
276349cc55cSDimitry Andric   Register DefReg = MI.getOperand(0).getReg();
277349cc55cSDimitry Andric   Register SrcReg = MI.getOperand(2).getReg();
278349cc55cSDimitry Andric   MRI->replaceRegWith(DefReg, SrcReg);
279349cc55cSDimitry Andric   MRI->clearKillFlags(SrcReg);
28004eeddc0SDimitry Andric   LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
28181ad6265SDimitry Andric   MI.eraseFromParent();
28204eeddc0SDimitry Andric 
28304eeddc0SDimitry Andric   return true;
28404eeddc0SDimitry Andric }
28504eeddc0SDimitry Andric 
visitINSERT(MachineInstr & MI)286bdd1243dSDimitry Andric bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
287bdd1243dSDimitry Andric   // Check this INSERT_SUBREG comes from below zero-extend pattern.
288bdd1243dSDimitry Andric   //
289bdd1243dSDimitry Andric   // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
290bdd1243dSDimitry Andric   // To   %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
291bdd1243dSDimitry Andric   //
292bdd1243dSDimitry Andric   // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
293bdd1243dSDimitry Andric   // COPY would destroy the upper part of the register anyway
294bdd1243dSDimitry Andric   if (!MI.isRegTiedToDefOperand(1))
295bdd1243dSDimitry Andric     return false;
296bdd1243dSDimitry Andric 
297bdd1243dSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
298bdd1243dSDimitry Andric   const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
299bdd1243dSDimitry Andric   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
300bdd1243dSDimitry Andric   if (!SrcMI)
301bdd1243dSDimitry Andric     return false;
302bdd1243dSDimitry Andric 
303bdd1243dSDimitry Andric   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
304bdd1243dSDimitry Andric   //
305bdd1243dSDimitry Andric   // When you use the 32-bit form of an instruction, the upper 32 bits of the
306bdd1243dSDimitry Andric   // source registers are ignored and the upper 32 bits of the destination
307bdd1243dSDimitry Andric   // register are set to zero.
308bdd1243dSDimitry Andric   //
309bdd1243dSDimitry Andric   // If AArch64's 32-bit form of instruction defines the source operand of
310bdd1243dSDimitry Andric   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
311bdd1243dSDimitry Andric   // real AArch64 instruction and if it is not, do not process the opcode
312bdd1243dSDimitry Andric   // conservatively.
313bdd1243dSDimitry Andric   if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
314bdd1243dSDimitry Andric       !AArch64::GPR64allRegClass.hasSubClassEq(RC))
315bdd1243dSDimitry Andric     return false;
316bdd1243dSDimitry Andric 
317bdd1243dSDimitry Andric   // Build a SUBREG_TO_REG instruction
318bdd1243dSDimitry Andric   MachineInstr *SubregMI =
319bdd1243dSDimitry Andric       BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
320bdd1243dSDimitry Andric               TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
321bdd1243dSDimitry Andric           .addImm(0)
322bdd1243dSDimitry Andric           .add(MI.getOperand(2))
323bdd1243dSDimitry Andric           .add(MI.getOperand(3));
324bdd1243dSDimitry Andric   LLVM_DEBUG(dbgs() << MI << "  replace by:\n: " << *SubregMI << "\n");
325bdd1243dSDimitry Andric   (void)SubregMI;
326bdd1243dSDimitry Andric   MI.eraseFromParent();
327bdd1243dSDimitry Andric 
328bdd1243dSDimitry Andric   return true;
329bdd1243dSDimitry Andric }
330bdd1243dSDimitry Andric 
33104eeddc0SDimitry Andric template <typename T>
splitAddSubImm(T Imm,unsigned RegSize,T & Imm0,T & Imm1)33204eeddc0SDimitry Andric static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
33304eeddc0SDimitry Andric   // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
33404eeddc0SDimitry Andric   // imm0 and imm1 are non-zero 12-bit unsigned int.
33504eeddc0SDimitry Andric   if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
33604eeddc0SDimitry Andric       (Imm & ~static_cast<T>(0xffffff)) != 0)
33704eeddc0SDimitry Andric     return false;
33804eeddc0SDimitry Andric 
33904eeddc0SDimitry Andric   // The immediate can not be composed via a single instruction.
34004eeddc0SDimitry Andric   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
34104eeddc0SDimitry Andric   AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
34204eeddc0SDimitry Andric   if (Insn.size() == 1)
34304eeddc0SDimitry Andric     return false;
34404eeddc0SDimitry Andric 
34504eeddc0SDimitry Andric   // Split Imm into (Imm0 << 12) + Imm1;
34604eeddc0SDimitry Andric   Imm0 = (Imm >> 12) & 0xfff;
34704eeddc0SDimitry Andric   Imm1 = Imm & 0xfff;
34804eeddc0SDimitry Andric   return true;
34904eeddc0SDimitry Andric }
35004eeddc0SDimitry Andric 
35104eeddc0SDimitry Andric template <typename T>
visitADDSUB(unsigned PosOpc,unsigned NegOpc,MachineInstr & MI)35204eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSUB(
35381ad6265SDimitry Andric     unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
35404eeddc0SDimitry Andric   // Try below transformation.
35504eeddc0SDimitry Andric   //
35606c3fb27SDimitry Andric   // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
35706c3fb27SDimitry Andric   // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
35804eeddc0SDimitry Andric   //
35906c3fb27SDimitry Andric   // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
36006c3fb27SDimitry Andric   // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
36104eeddc0SDimitry Andric   //
36204eeddc0SDimitry Andric   // The mov pseudo instruction could be expanded to multiple mov instructions
36304eeddc0SDimitry Andric   // later. Let's try to split the constant operand of mov instruction into two
36404eeddc0SDimitry Andric   // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
36504eeddc0SDimitry Andric   // multiple `mov` + `and/sub` instructions.
36604eeddc0SDimitry Andric 
36706c3fb27SDimitry Andric   // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
36806c3fb27SDimitry Andric   // folded. Make sure that we don't generate invalid instructions that use XZR
36906c3fb27SDimitry Andric   // in those cases.
37006c3fb27SDimitry Andric   if (MI.getOperand(1).getReg() == AArch64::XZR ||
37106c3fb27SDimitry Andric       MI.getOperand(1).getReg() == AArch64::WZR)
37206c3fb27SDimitry Andric     return false;
37306c3fb27SDimitry Andric 
37404eeddc0SDimitry Andric   return splitTwoPartImm<T>(
37581ad6265SDimitry Andric       MI,
37604eeddc0SDimitry Andric       [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
377bdd1243dSDimitry Andric                        T &Imm1) -> std::optional<OpcodePair> {
37804eeddc0SDimitry Andric         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
37981ad6265SDimitry Andric           return std::make_pair(PosOpc, PosOpc);
38004eeddc0SDimitry Andric         if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
38181ad6265SDimitry Andric           return std::make_pair(NegOpc, NegOpc);
382bdd1243dSDimitry Andric         return std::nullopt;
38304eeddc0SDimitry Andric       },
38481ad6265SDimitry Andric       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
38504eeddc0SDimitry Andric                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
38604eeddc0SDimitry Andric                    Register NewDstReg) {
38704eeddc0SDimitry Andric         DebugLoc DL = MI.getDebugLoc();
38804eeddc0SDimitry Andric         MachineBasicBlock *MBB = MI.getParent();
38981ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
39004eeddc0SDimitry Andric             .addReg(SrcReg)
39104eeddc0SDimitry Andric             .addImm(Imm0)
39204eeddc0SDimitry Andric             .addImm(12);
39381ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
39481ad6265SDimitry Andric             .addReg(NewTmpReg)
39581ad6265SDimitry Andric             .addImm(Imm1)
39681ad6265SDimitry Andric             .addImm(0);
39781ad6265SDimitry Andric       });
39881ad6265SDimitry Andric }
39981ad6265SDimitry Andric 
40081ad6265SDimitry Andric template <typename T>
visitADDSSUBS(OpcodePair PosOpcs,OpcodePair NegOpcs,MachineInstr & MI)40181ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSSUBS(
40281ad6265SDimitry Andric     OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
40381ad6265SDimitry Andric   // Try the same transformation as ADDSUB but with additional requirement
40481ad6265SDimitry Andric   // that the condition code usages are only for Equal and Not Equal
40506c3fb27SDimitry Andric 
40606c3fb27SDimitry Andric   if (MI.getOperand(1).getReg() == AArch64::XZR ||
40706c3fb27SDimitry Andric       MI.getOperand(1).getReg() == AArch64::WZR)
40806c3fb27SDimitry Andric     return false;
40906c3fb27SDimitry Andric 
41081ad6265SDimitry Andric   return splitTwoPartImm<T>(
41181ad6265SDimitry Andric       MI,
412bdd1243dSDimitry Andric       [PosOpcs, NegOpcs, &MI, &TRI = TRI,
413bdd1243dSDimitry Andric        &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
414bdd1243dSDimitry Andric                    T &Imm1) -> std::optional<OpcodePair> {
41581ad6265SDimitry Andric         OpcodePair OP;
41681ad6265SDimitry Andric         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
41781ad6265SDimitry Andric           OP = PosOpcs;
41881ad6265SDimitry Andric         else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
41981ad6265SDimitry Andric           OP = NegOpcs;
42081ad6265SDimitry Andric         else
421bdd1243dSDimitry Andric           return std::nullopt;
42281ad6265SDimitry Andric         // Check conditional uses last since it is expensive for scanning
42381ad6265SDimitry Andric         // proceeding instructions
42481ad6265SDimitry Andric         MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
425bdd1243dSDimitry Andric         std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
42681ad6265SDimitry Andric         if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
427bdd1243dSDimitry Andric           return std::nullopt;
42881ad6265SDimitry Andric         return OP;
42981ad6265SDimitry Andric       },
43081ad6265SDimitry Andric       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
43181ad6265SDimitry Andric                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
43281ad6265SDimitry Andric                    Register NewDstReg) {
43381ad6265SDimitry Andric         DebugLoc DL = MI.getDebugLoc();
43481ad6265SDimitry Andric         MachineBasicBlock *MBB = MI.getParent();
43581ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
43681ad6265SDimitry Andric             .addReg(SrcReg)
43781ad6265SDimitry Andric             .addImm(Imm0)
43881ad6265SDimitry Andric             .addImm(12);
43981ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
44004eeddc0SDimitry Andric             .addReg(NewTmpReg)
44104eeddc0SDimitry Andric             .addImm(Imm1)
44204eeddc0SDimitry Andric             .addImm(0);
44304eeddc0SDimitry Andric       });
44404eeddc0SDimitry Andric }
44504eeddc0SDimitry Andric 
44604eeddc0SDimitry Andric // Checks if the corresponding MOV immediate instruction is applicable for
44704eeddc0SDimitry Andric // this peephole optimization.
checkMovImmInstr(MachineInstr & MI,MachineInstr * & MovMI,MachineInstr * & SubregToRegMI)44804eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
44904eeddc0SDimitry Andric                                             MachineInstr *&MovMI,
45004eeddc0SDimitry Andric                                             MachineInstr *&SubregToRegMI) {
45104eeddc0SDimitry Andric   // Check whether current MBB is in loop and the AND is loop invariant.
45204eeddc0SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
45304eeddc0SDimitry Andric   MachineLoop *L = MLI->getLoopFor(MBB);
45404eeddc0SDimitry Andric   if (L && !L->isLoopInvariant(MI))
45504eeddc0SDimitry Andric     return false;
45604eeddc0SDimitry Andric 
45704eeddc0SDimitry Andric   // Check whether current MI's operand is MOV with immediate.
45804eeddc0SDimitry Andric   MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
45904eeddc0SDimitry Andric   if (!MovMI)
46004eeddc0SDimitry Andric     return false;
46104eeddc0SDimitry Andric 
46204eeddc0SDimitry Andric   // If it is SUBREG_TO_REG, check its operand.
46304eeddc0SDimitry Andric   SubregToRegMI = nullptr;
46404eeddc0SDimitry Andric   if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
46504eeddc0SDimitry Andric     SubregToRegMI = MovMI;
46604eeddc0SDimitry Andric     MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
46704eeddc0SDimitry Andric     if (!MovMI)
46804eeddc0SDimitry Andric       return false;
46904eeddc0SDimitry Andric   }
47004eeddc0SDimitry Andric 
47104eeddc0SDimitry Andric   if (MovMI->getOpcode() != AArch64::MOVi32imm &&
47204eeddc0SDimitry Andric       MovMI->getOpcode() != AArch64::MOVi64imm)
47304eeddc0SDimitry Andric     return false;
47404eeddc0SDimitry Andric 
47504eeddc0SDimitry Andric   // If the MOV has multiple uses, do not split the immediate because it causes
47604eeddc0SDimitry Andric   // more instructions.
47704eeddc0SDimitry Andric   if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
47804eeddc0SDimitry Andric     return false;
47904eeddc0SDimitry Andric   if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
48004eeddc0SDimitry Andric     return false;
48104eeddc0SDimitry Andric 
48204eeddc0SDimitry Andric   // It is OK to perform this peephole optimization.
48304eeddc0SDimitry Andric   return true;
48404eeddc0SDimitry Andric }
48504eeddc0SDimitry Andric 
48604eeddc0SDimitry Andric template <typename T>
splitTwoPartImm(MachineInstr & MI,SplitAndOpcFunc<T> SplitAndOpc,BuildMIFunc BuildInstr)48704eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::splitTwoPartImm(
48881ad6265SDimitry Andric     MachineInstr &MI,
48904eeddc0SDimitry Andric     SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
49004eeddc0SDimitry Andric   unsigned RegSize = sizeof(T) * 8;
49104eeddc0SDimitry Andric   assert((RegSize == 32 || RegSize == 64) &&
49204eeddc0SDimitry Andric          "Invalid RegSize for legal immediate peephole optimization");
49304eeddc0SDimitry Andric 
49404eeddc0SDimitry Andric   // Perform several essential checks against current MI.
49504eeddc0SDimitry Andric   MachineInstr *MovMI, *SubregToRegMI;
49604eeddc0SDimitry Andric   if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
49704eeddc0SDimitry Andric     return false;
49804eeddc0SDimitry Andric 
49904eeddc0SDimitry Andric   // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
50004eeddc0SDimitry Andric   T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
50104eeddc0SDimitry Andric   // For the 32 bit form of instruction, the upper 32 bits of the destination
50204eeddc0SDimitry Andric   // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
50304eeddc0SDimitry Andric   // of Imm to zero. This is essential if the Immediate value was a negative
50404eeddc0SDimitry Andric   // number since it was sign extended when we assign to the 64-bit Imm.
50504eeddc0SDimitry Andric   if (SubregToRegMI)
50604eeddc0SDimitry Andric     Imm &= 0xFFFFFFFF;
50781ad6265SDimitry Andric   OpcodePair Opcode;
50804eeddc0SDimitry Andric   if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
50981ad6265SDimitry Andric     Opcode = *R;
51004eeddc0SDimitry Andric   else
51104eeddc0SDimitry Andric     return false;
51204eeddc0SDimitry Andric 
51381ad6265SDimitry Andric   // Create new MIs using the first and second opcodes. Opcodes might differ for
51481ad6265SDimitry Andric   // flag setting operations that should only set flags on second instruction.
51581ad6265SDimitry Andric   // NewTmpReg = Opcode.first SrcReg Imm0
51681ad6265SDimitry Andric   // NewDstReg = Opcode.second NewTmpReg Imm1
51781ad6265SDimitry Andric 
51881ad6265SDimitry Andric   // Determine register classes for destinations and register operands
51904eeddc0SDimitry Andric   MachineFunction *MF = MI.getMF();
52081ad6265SDimitry Andric   const TargetRegisterClass *FirstInstrDstRC =
52181ad6265SDimitry Andric       TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
52281ad6265SDimitry Andric   const TargetRegisterClass *FirstInstrOperandRC =
52381ad6265SDimitry Andric       TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
52481ad6265SDimitry Andric   const TargetRegisterClass *SecondInstrDstRC =
52581ad6265SDimitry Andric       (Opcode.first == Opcode.second)
52681ad6265SDimitry Andric           ? FirstInstrDstRC
52781ad6265SDimitry Andric           : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
52881ad6265SDimitry Andric   const TargetRegisterClass *SecondInstrOperandRC =
52981ad6265SDimitry Andric       (Opcode.first == Opcode.second)
53081ad6265SDimitry Andric           ? FirstInstrOperandRC
53181ad6265SDimitry Andric           : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
53281ad6265SDimitry Andric 
53381ad6265SDimitry Andric   // Get old registers destinations and new register destinations
53404eeddc0SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
53504eeddc0SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
53681ad6265SDimitry Andric   Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
53781ad6265SDimitry Andric   // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
53881ad6265SDimitry Andric   // reuse that same destination register.
53981ad6265SDimitry Andric   Register NewDstReg = DstReg.isVirtual()
54081ad6265SDimitry Andric                            ? MRI->createVirtualRegister(SecondInstrDstRC)
54181ad6265SDimitry Andric                            : DstReg;
54204eeddc0SDimitry Andric 
54381ad6265SDimitry Andric   // Constrain registers based on their new uses
54481ad6265SDimitry Andric   MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
54581ad6265SDimitry Andric   MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
54681ad6265SDimitry Andric   if (DstReg != NewDstReg)
54704eeddc0SDimitry Andric     MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
54804eeddc0SDimitry Andric 
54981ad6265SDimitry Andric   // Call the delegating operation to build the instruction
55004eeddc0SDimitry Andric   BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
55104eeddc0SDimitry Andric 
55204eeddc0SDimitry Andric   // replaceRegWith changes MI's definition register. Keep it for SSA form until
55381ad6265SDimitry Andric   // deleting MI. Only if we made a new destination register.
55481ad6265SDimitry Andric   if (DstReg != NewDstReg) {
55581ad6265SDimitry Andric     MRI->replaceRegWith(DstReg, NewDstReg);
55604eeddc0SDimitry Andric     MI.getOperand(0).setReg(DstReg);
55781ad6265SDimitry Andric   }
55804eeddc0SDimitry Andric 
55904eeddc0SDimitry Andric   // Record the MIs need to be removed.
56081ad6265SDimitry Andric   MI.eraseFromParent();
56104eeddc0SDimitry Andric   if (SubregToRegMI)
56281ad6265SDimitry Andric     SubregToRegMI->eraseFromParent();
56381ad6265SDimitry Andric   MovMI->eraseFromParent();
564349cc55cSDimitry Andric 
565349cc55cSDimitry Andric   return true;
566349cc55cSDimitry Andric }
567349cc55cSDimitry Andric 
visitINSviGPR(MachineInstr & MI,unsigned Opc)56806c3fb27SDimitry Andric bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
56906c3fb27SDimitry Andric   // Check if this INSvi[X]gpr comes from COPY of a source FPR128
57006c3fb27SDimitry Andric   //
57106c3fb27SDimitry Andric   // From
57206c3fb27SDimitry Andric   //  %intermediate1:gpr64 = COPY %src:fpr128
57306c3fb27SDimitry Andric   //  %intermediate2:gpr32 = COPY %intermediate1:gpr64
57406c3fb27SDimitry Andric   //  %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
57506c3fb27SDimitry Andric   // To
57606c3fb27SDimitry Andric   //  %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
57706c3fb27SDimitry Andric   //  src_index
57806c3fb27SDimitry Andric   // where src_index = 0, X = [8|16|32|64]
57906c3fb27SDimitry Andric 
58006c3fb27SDimitry Andric   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
58106c3fb27SDimitry Andric 
58206c3fb27SDimitry Andric   // For a chain of COPY instructions, find the initial source register
58306c3fb27SDimitry Andric   // and check if it's an FPR128
58406c3fb27SDimitry Andric   while (true) {
58506c3fb27SDimitry Andric     if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)
58606c3fb27SDimitry Andric       return false;
58706c3fb27SDimitry Andric 
58806c3fb27SDimitry Andric     if (!SrcMI->getOperand(1).getReg().isVirtual())
58906c3fb27SDimitry Andric       return false;
59006c3fb27SDimitry Andric 
59106c3fb27SDimitry Andric     if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) ==
59206c3fb27SDimitry Andric         &AArch64::FPR128RegClass) {
59306c3fb27SDimitry Andric       break;
59406c3fb27SDimitry Andric     }
59506c3fb27SDimitry Andric     SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
59606c3fb27SDimitry Andric   }
59706c3fb27SDimitry Andric 
59806c3fb27SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
59906c3fb27SDimitry Andric   Register SrcReg = SrcMI->getOperand(1).getReg();
60006c3fb27SDimitry Andric   MachineInstr *INSvilaneMI =
60106c3fb27SDimitry Andric       BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg)
60206c3fb27SDimitry Andric           .add(MI.getOperand(1))
60306c3fb27SDimitry Andric           .add(MI.getOperand(2))
60406c3fb27SDimitry Andric           .addUse(SrcReg, getRegState(SrcMI->getOperand(1)))
60506c3fb27SDimitry Andric           .addImm(0);
60606c3fb27SDimitry Andric 
60706c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << MI << "  replace by:\n: " << *INSvilaneMI << "\n");
60806c3fb27SDimitry Andric   (void)INSvilaneMI;
60906c3fb27SDimitry Andric   MI.eraseFromParent();
61006c3fb27SDimitry Andric   return true;
61106c3fb27SDimitry Andric }
61206c3fb27SDimitry Andric 
61306c3fb27SDimitry Andric // All instructions that set a FPR64 will implicitly zero the top bits of the
61406c3fb27SDimitry Andric // register.
is64bitDefwithZeroHigh64bit(MachineInstr * MI,MachineRegisterInfo * MRI)61506c3fb27SDimitry Andric static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,
61606c3fb27SDimitry Andric                                         MachineRegisterInfo *MRI) {
61706c3fb27SDimitry Andric   if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
61806c3fb27SDimitry Andric     return false;
61906c3fb27SDimitry Andric   const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
62006c3fb27SDimitry Andric   if (RC != &AArch64::FPR64RegClass)
62106c3fb27SDimitry Andric     return false;
62206c3fb27SDimitry Andric   return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
62306c3fb27SDimitry Andric }
62406c3fb27SDimitry Andric 
visitINSvi64lane(MachineInstr & MI)62506c3fb27SDimitry Andric bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
62606c3fb27SDimitry Andric   // Check the MI for low 64-bits sets zero for high 64-bits implicitly.
62706c3fb27SDimitry Andric   // We are expecting below case.
62806c3fb27SDimitry Andric   //
62906c3fb27SDimitry Andric   //  %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
63006c3fb27SDimitry Andric   //  %6:fpr128 = IMPLICIT_DEF
63106c3fb27SDimitry Andric   //  %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
63206c3fb27SDimitry Andric   //  %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
63306c3fb27SDimitry Andric   MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
63406c3fb27SDimitry Andric   if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
63506c3fb27SDimitry Andric     return false;
63606c3fb27SDimitry Andric   Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
63706c3fb27SDimitry Andric   if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
63806c3fb27SDimitry Andric     return false;
63906c3fb27SDimitry Andric 
64006c3fb27SDimitry Andric   // Check there is `mov 0` MI for high 64-bits.
64106c3fb27SDimitry Andric   // We are expecting below cases.
64206c3fb27SDimitry Andric   //
64306c3fb27SDimitry Andric   //  %2:fpr64 = MOVID 0
64406c3fb27SDimitry Andric   //  %4:fpr128 = IMPLICIT_DEF
64506c3fb27SDimitry Andric   //  %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
64606c3fb27SDimitry Andric   //  %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
64706c3fb27SDimitry Andric   // or
64806c3fb27SDimitry Andric   //  %5:fpr128 = MOVIv2d_ns 0
64906c3fb27SDimitry Andric   //  %6:fpr64 = COPY %5.dsub:fpr128
65006c3fb27SDimitry Andric   //  %8:fpr128 = IMPLICIT_DEF
65106c3fb27SDimitry Andric   //  %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
65206c3fb27SDimitry Andric   //  %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
65306c3fb27SDimitry Andric   MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
65406c3fb27SDimitry Andric   if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
65506c3fb27SDimitry Andric     return false;
65606c3fb27SDimitry Andric   High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());
65706c3fb27SDimitry Andric   if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
65806c3fb27SDimitry Andric     High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());
65906c3fb27SDimitry Andric   if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
66006c3fb27SDimitry Andric                     High64MI->getOpcode() != AArch64::MOVIv2d_ns))
66106c3fb27SDimitry Andric     return false;
66206c3fb27SDimitry Andric   if (High64MI->getOperand(1).getImm() != 0)
66306c3fb27SDimitry Andric     return false;
66406c3fb27SDimitry Andric 
66506c3fb27SDimitry Andric   // Let's remove MIs for high 64-bits.
66606c3fb27SDimitry Andric   Register OldDef = MI.getOperand(0).getReg();
66706c3fb27SDimitry Andric   Register NewDef = MI.getOperand(1).getReg();
66806c3fb27SDimitry Andric   MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
66906c3fb27SDimitry Andric   MRI->replaceRegWith(OldDef, NewDef);
67006c3fb27SDimitry Andric   MI.eraseFromParent();
67106c3fb27SDimitry Andric 
67206c3fb27SDimitry Andric   return true;
67306c3fb27SDimitry Andric }
67406c3fb27SDimitry Andric 
visitFMOVDr(MachineInstr & MI)675*0fca6ea1SDimitry Andric bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
676*0fca6ea1SDimitry Andric   // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.
677*0fca6ea1SDimitry Andric   MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
678*0fca6ea1SDimitry Andric   if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
679*0fca6ea1SDimitry Andric     return false;
680*0fca6ea1SDimitry Andric 
681*0fca6ea1SDimitry Andric   // Let's remove MIs for high 64-bits.
682*0fca6ea1SDimitry Andric   Register OldDef = MI.getOperand(0).getReg();
683*0fca6ea1SDimitry Andric   Register NewDef = MI.getOperand(1).getReg();
684*0fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");
685*0fca6ea1SDimitry Andric   MRI->clearKillFlags(OldDef);
686*0fca6ea1SDimitry Andric   MRI->clearKillFlags(NewDef);
687*0fca6ea1SDimitry Andric   MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
688*0fca6ea1SDimitry Andric   MRI->replaceRegWith(OldDef, NewDef);
689*0fca6ea1SDimitry Andric   MI.eraseFromParent();
690*0fca6ea1SDimitry Andric 
691*0fca6ea1SDimitry Andric   return true;
692*0fca6ea1SDimitry Andric }
693*0fca6ea1SDimitry Andric 
694*0fca6ea1SDimitry Andric // Across a basic-block we might have in i32 extract from a value that only
695*0fca6ea1SDimitry Andric // operates on upper bits (for example a sxtw). We can replace the COPY with a
696*0fca6ea1SDimitry Andric // new version skipping the sxtw.
visitCopy(MachineInstr & MI)697*0fca6ea1SDimitry Andric bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
698*0fca6ea1SDimitry Andric   Register InputReg = MI.getOperand(1).getReg();
699*0fca6ea1SDimitry Andric   if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
700*0fca6ea1SDimitry Andric       !MRI->hasOneNonDBGUse(InputReg))
701*0fca6ea1SDimitry Andric     return false;
702*0fca6ea1SDimitry Andric 
703*0fca6ea1SDimitry Andric   MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
704*0fca6ea1SDimitry Andric   SmallPtrSet<MachineInstr *, 4> DeadInstrs;
705*0fca6ea1SDimitry Andric   DeadInstrs.insert(SrcMI);
706*0fca6ea1SDimitry Andric   while (SrcMI && SrcMI->isFullCopy() &&
707*0fca6ea1SDimitry Andric          MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) {
708*0fca6ea1SDimitry Andric     SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
709*0fca6ea1SDimitry Andric     DeadInstrs.insert(SrcMI);
710*0fca6ea1SDimitry Andric   }
711*0fca6ea1SDimitry Andric 
712*0fca6ea1SDimitry Andric   if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri ||
713*0fca6ea1SDimitry Andric       SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31)
714*0fca6ea1SDimitry Andric     return false;
715*0fca6ea1SDimitry Andric 
716*0fca6ea1SDimitry Andric   Register SrcReg = SrcMI->getOperand(1).getReg();
717*0fca6ea1SDimitry Andric   MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
718*0fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
719*0fca6ea1SDimitry Andric   MI.getOperand(1).setReg(SrcReg);
720*0fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "        to: " << MI);
721*0fca6ea1SDimitry Andric   for (auto *DeadMI : DeadInstrs) {
722*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Removing: " << *DeadMI);
723*0fca6ea1SDimitry Andric     DeadMI->eraseFromParent();
724*0fca6ea1SDimitry Andric   }
725*0fca6ea1SDimitry Andric   return true;
726*0fca6ea1SDimitry Andric }
727*0fca6ea1SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)728349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
729349cc55cSDimitry Andric   if (skipFunction(MF.getFunction()))
730349cc55cSDimitry Andric     return false;
731349cc55cSDimitry Andric 
732349cc55cSDimitry Andric   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
73304eeddc0SDimitry Andric   TRI = static_cast<const AArch64RegisterInfo *>(
73404eeddc0SDimitry Andric       MF.getSubtarget().getRegisterInfo());
735*0fca6ea1SDimitry Andric   MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
736349cc55cSDimitry Andric   MRI = &MF.getRegInfo();
737349cc55cSDimitry Andric 
73804eeddc0SDimitry Andric   assert(MRI->isSSA() && "Expected to be run on SSA form!");
739349cc55cSDimitry Andric 
740349cc55cSDimitry Andric   bool Changed = false;
741349cc55cSDimitry Andric 
742349cc55cSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
74381ad6265SDimitry Andric     for (MachineInstr &MI : make_early_inc_range(MBB)) {
744349cc55cSDimitry Andric       switch (MI.getOpcode()) {
745349cc55cSDimitry Andric       default:
746349cc55cSDimitry Andric         break;
747bdd1243dSDimitry Andric       case AArch64::INSERT_SUBREG:
74806c3fb27SDimitry Andric         Changed |= visitINSERT(MI);
749bdd1243dSDimitry Andric         break;
750349cc55cSDimitry Andric       case AArch64::ANDWrr:
75106c3fb27SDimitry Andric         Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI);
752349cc55cSDimitry Andric         break;
753349cc55cSDimitry Andric       case AArch64::ANDXrr:
75406c3fb27SDimitry Andric         Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
755349cc55cSDimitry Andric         break;
756349cc55cSDimitry Andric       case AArch64::ORRWrs:
75706c3fb27SDimitry Andric         Changed |= visitORR(MI);
75804eeddc0SDimitry Andric         break;
75904eeddc0SDimitry Andric       case AArch64::ADDWrr:
76006c3fb27SDimitry Andric         Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
76104eeddc0SDimitry Andric         break;
76204eeddc0SDimitry Andric       case AArch64::SUBWrr:
76306c3fb27SDimitry Andric         Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
76404eeddc0SDimitry Andric         break;
76504eeddc0SDimitry Andric       case AArch64::ADDXrr:
76606c3fb27SDimitry Andric         Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
76704eeddc0SDimitry Andric         break;
76804eeddc0SDimitry Andric       case AArch64::SUBXrr:
76906c3fb27SDimitry Andric         Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
77081ad6265SDimitry Andric         break;
77181ad6265SDimitry Andric       case AArch64::ADDSWrr:
77206c3fb27SDimitry Andric         Changed |=
77306c3fb27SDimitry Andric             visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
77406c3fb27SDimitry Andric                                     {AArch64::SUBWri, AArch64::SUBSWri}, MI);
77581ad6265SDimitry Andric         break;
77681ad6265SDimitry Andric       case AArch64::SUBSWrr:
77706c3fb27SDimitry Andric         Changed |=
77806c3fb27SDimitry Andric             visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
77906c3fb27SDimitry Andric                                     {AArch64::ADDWri, AArch64::ADDSWri}, MI);
78081ad6265SDimitry Andric         break;
78181ad6265SDimitry Andric       case AArch64::ADDSXrr:
78206c3fb27SDimitry Andric         Changed |=
78306c3fb27SDimitry Andric             visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
78406c3fb27SDimitry Andric                                     {AArch64::SUBXri, AArch64::SUBSXri}, MI);
78581ad6265SDimitry Andric         break;
78681ad6265SDimitry Andric       case AArch64::SUBSXrr:
78706c3fb27SDimitry Andric         Changed |=
78806c3fb27SDimitry Andric             visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
78906c3fb27SDimitry Andric                                     {AArch64::ADDXri, AArch64::ADDSXri}, MI);
79006c3fb27SDimitry Andric         break;
79106c3fb27SDimitry Andric       case AArch64::INSvi64gpr:
79206c3fb27SDimitry Andric         Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
79306c3fb27SDimitry Andric         break;
79406c3fb27SDimitry Andric       case AArch64::INSvi32gpr:
79506c3fb27SDimitry Andric         Changed |= visitINSviGPR(MI, AArch64::INSvi32lane);
79606c3fb27SDimitry Andric         break;
79706c3fb27SDimitry Andric       case AArch64::INSvi16gpr:
79806c3fb27SDimitry Andric         Changed |= visitINSviGPR(MI, AArch64::INSvi16lane);
79906c3fb27SDimitry Andric         break;
80006c3fb27SDimitry Andric       case AArch64::INSvi8gpr:
80106c3fb27SDimitry Andric         Changed |= visitINSviGPR(MI, AArch64::INSvi8lane);
80206c3fb27SDimitry Andric         break;
80306c3fb27SDimitry Andric       case AArch64::INSvi64lane:
80406c3fb27SDimitry Andric         Changed |= visitINSvi64lane(MI);
80504eeddc0SDimitry Andric         break;
806*0fca6ea1SDimitry Andric       case AArch64::FMOVDr:
807*0fca6ea1SDimitry Andric         Changed |= visitFMOVDr(MI);
808*0fca6ea1SDimitry Andric         break;
809*0fca6ea1SDimitry Andric       case AArch64::COPY:
810*0fca6ea1SDimitry Andric         Changed |= visitCopy(MI);
811*0fca6ea1SDimitry Andric         break;
812349cc55cSDimitry Andric       }
813349cc55cSDimitry Andric     }
814349cc55cSDimitry Andric   }
815349cc55cSDimitry Andric 
816349cc55cSDimitry Andric   return Changed;
817349cc55cSDimitry Andric }
818349cc55cSDimitry Andric 
createAArch64MIPeepholeOptPass()819349cc55cSDimitry Andric FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
820349cc55cSDimitry Andric   return new AArch64MIPeepholeOpt();
821349cc55cSDimitry Andric }
822