xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1349cc55cSDimitry Andric //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric //
9349cc55cSDimitry Andric // This pass performs below peephole optimizations on MIR level.
10349cc55cSDimitry Andric //
11349cc55cSDimitry Andric // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12349cc55cSDimitry Andric //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13349cc55cSDimitry Andric //
1404eeddc0SDimitry Andric // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
1504eeddc0SDimitry Andric //    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
1604eeddc0SDimitry Andric //
1704eeddc0SDimitry Andric // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
1804eeddc0SDimitry Andric //    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
1904eeddc0SDimitry Andric //
20349cc55cSDimitry Andric //    The mov pseudo instruction could be expanded to multiple mov instructions
21349cc55cSDimitry Andric //    later. In this case, we could try to split the constant  operand of mov
2204eeddc0SDimitry Andric //    instruction into two immediates which can be directly encoded into
2304eeddc0SDimitry Andric //    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
2404eeddc0SDimitry Andric //    multiple `mov` + `and/add/sub` instructions.
25349cc55cSDimitry Andric //
2604eeddc0SDimitry Andric // 4. Remove redundant ORRWrs which is generated by zero-extend.
27349cc55cSDimitry Andric //
28349cc55cSDimitry Andric //    %3:gpr32 = ORRWrs $wzr, %2, 0
29349cc55cSDimitry Andric //    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30349cc55cSDimitry Andric //
31349cc55cSDimitry Andric //    If AArch64's 32-bit form of instruction defines the source operand of
32349cc55cSDimitry Andric //    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33349cc55cSDimitry Andric //    operand are set to zero.
34349cc55cSDimitry Andric //
35*bdd1243dSDimitry Andric // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36*bdd1243dSDimitry Andric //     ==> %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
37*bdd1243dSDimitry Andric //
38349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
39349cc55cSDimitry Andric 
40349cc55cSDimitry Andric #include "AArch64ExpandImm.h"
41349cc55cSDimitry Andric #include "AArch64InstrInfo.h"
42349cc55cSDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
43349cc55cSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
44349cc55cSDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
45349cc55cSDimitry Andric 
46349cc55cSDimitry Andric using namespace llvm;
47349cc55cSDimitry Andric 
48349cc55cSDimitry Andric #define DEBUG_TYPE "aarch64-mi-peephole-opt"
49349cc55cSDimitry Andric 
50349cc55cSDimitry Andric namespace {
51349cc55cSDimitry Andric 
52349cc55cSDimitry Andric struct AArch64MIPeepholeOpt : public MachineFunctionPass {
53349cc55cSDimitry Andric   static char ID;
54349cc55cSDimitry Andric 
55349cc55cSDimitry Andric   AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
56349cc55cSDimitry Andric     initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
57349cc55cSDimitry Andric   }
58349cc55cSDimitry Andric 
59349cc55cSDimitry Andric   const AArch64InstrInfo *TII;
6004eeddc0SDimitry Andric   const AArch64RegisterInfo *TRI;
61349cc55cSDimitry Andric   MachineLoopInfo *MLI;
62349cc55cSDimitry Andric   MachineRegisterInfo *MRI;
63349cc55cSDimitry Andric 
6481ad6265SDimitry Andric   using OpcodePair = std::pair<unsigned, unsigned>;
65349cc55cSDimitry Andric   template <typename T>
6604eeddc0SDimitry Andric   using SplitAndOpcFunc =
67*bdd1243dSDimitry Andric       std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
6804eeddc0SDimitry Andric   using BuildMIFunc =
6981ad6265SDimitry Andric       std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
7081ad6265SDimitry Andric                          Register, Register, Register)>;
7104eeddc0SDimitry Andric 
7204eeddc0SDimitry Andric   /// For instructions where an immediate operand could be split into two
7304eeddc0SDimitry Andric   /// separate immediate instructions, use the splitTwoPartImm two handle the
7404eeddc0SDimitry Andric   /// optimization.
7504eeddc0SDimitry Andric   ///
7604eeddc0SDimitry Andric   /// To implement, the following function types must be passed to
7704eeddc0SDimitry Andric   /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
7804eeddc0SDimitry Andric   /// splitting the immediate is valid and returns the associated new opcode. A
7904eeddc0SDimitry Andric   /// BuildMIFunc must be implemented to build the two immediate instructions.
8004eeddc0SDimitry Andric   ///
8104eeddc0SDimitry Andric   /// Example Pattern (where IMM would require 2+ MOV instructions):
8204eeddc0SDimitry Andric   ///     %dst = <Instr>rr %src IMM [...]
8304eeddc0SDimitry Andric   /// becomes:
8404eeddc0SDimitry Andric   ///     %tmp = <Instr>ri %src (encode half IMM) [...]
8504eeddc0SDimitry Andric   ///     %dst = <Instr>ri %tmp (encode half IMM) [...]
8604eeddc0SDimitry Andric   template <typename T>
8704eeddc0SDimitry Andric   bool splitTwoPartImm(MachineInstr &MI,
8804eeddc0SDimitry Andric                        SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
8904eeddc0SDimitry Andric 
9004eeddc0SDimitry Andric   bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
9104eeddc0SDimitry Andric                         MachineInstr *&SubregToRegMI);
9204eeddc0SDimitry Andric 
9304eeddc0SDimitry Andric   template <typename T>
9481ad6265SDimitry Andric   bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
9504eeddc0SDimitry Andric   template <typename T>
9681ad6265SDimitry Andric   bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
9781ad6265SDimitry Andric 
9881ad6265SDimitry Andric   template <typename T>
9981ad6265SDimitry Andric   bool visitAND(unsigned Opc, MachineInstr &MI);
10081ad6265SDimitry Andric   bool visitORR(MachineInstr &MI);
101*bdd1243dSDimitry Andric   bool visitINSERT(MachineInstr &MI);
102349cc55cSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
103349cc55cSDimitry Andric 
104349cc55cSDimitry Andric   StringRef getPassName() const override {
105349cc55cSDimitry Andric     return "AArch64 MI Peephole Optimization pass";
106349cc55cSDimitry Andric   }
107349cc55cSDimitry Andric 
108349cc55cSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
109349cc55cSDimitry Andric     AU.setPreservesCFG();
110349cc55cSDimitry Andric     AU.addRequired<MachineLoopInfo>();
111349cc55cSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
112349cc55cSDimitry Andric   }
113349cc55cSDimitry Andric };
114349cc55cSDimitry Andric 
115349cc55cSDimitry Andric char AArch64MIPeepholeOpt::ID = 0;
116349cc55cSDimitry Andric 
117349cc55cSDimitry Andric } // end anonymous namespace
118349cc55cSDimitry Andric 
119349cc55cSDimitry Andric INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
120349cc55cSDimitry Andric                 "AArch64 MI Peephole Optimization", false, false)
121349cc55cSDimitry Andric 
122349cc55cSDimitry Andric template <typename T>
123349cc55cSDimitry Andric static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
124349cc55cSDimitry Andric   T UImm = static_cast<T>(Imm);
125349cc55cSDimitry Andric   if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
126349cc55cSDimitry Andric     return false;
127349cc55cSDimitry Andric 
128349cc55cSDimitry Andric   // If this immediate can be handled by one instruction, do not split it.
129349cc55cSDimitry Andric   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
130349cc55cSDimitry Andric   AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
131349cc55cSDimitry Andric   if (Insn.size() == 1)
132349cc55cSDimitry Andric     return false;
133349cc55cSDimitry Andric 
134349cc55cSDimitry Andric   // The bitmask immediate consists of consecutive ones.  Let's say there is
135349cc55cSDimitry Andric   // constant 0b00000000001000000000010000000000 which does not consist of
136349cc55cSDimitry Andric   // consecutive ones. We can split it in to two bitmask immediate like
137349cc55cSDimitry Andric   // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
138349cc55cSDimitry Andric   // If we do AND with these two bitmask immediate, we can see original one.
139349cc55cSDimitry Andric   unsigned LowestBitSet = countTrailingZeros(UImm);
140349cc55cSDimitry Andric   unsigned HighestBitSet = Log2_64(UImm);
141349cc55cSDimitry Andric 
142349cc55cSDimitry Andric   // Create a mask which is filled with one from the position of lowest bit set
143349cc55cSDimitry Andric   // to the position of highest bit set.
144349cc55cSDimitry Andric   T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
145349cc55cSDimitry Andric               (static_cast<T>(1) << LowestBitSet);
146349cc55cSDimitry Andric   // Create a mask which is filled with one outside the position of lowest bit
147349cc55cSDimitry Andric   // set and the position of highest bit set.
148349cc55cSDimitry Andric   T NewImm2 = UImm | ~NewImm1;
149349cc55cSDimitry Andric 
150349cc55cSDimitry Andric   // If the split value is not valid bitmask immediate, do not split this
151349cc55cSDimitry Andric   // constant.
152349cc55cSDimitry Andric   if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
153349cc55cSDimitry Andric     return false;
154349cc55cSDimitry Andric 
155349cc55cSDimitry Andric   Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
156349cc55cSDimitry Andric   Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
157349cc55cSDimitry Andric   return true;
158349cc55cSDimitry Andric }
159349cc55cSDimitry Andric 
160349cc55cSDimitry Andric template <typename T>
161349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::visitAND(
16281ad6265SDimitry Andric     unsigned Opc, MachineInstr &MI) {
163349cc55cSDimitry Andric   // Try below transformation.
164349cc55cSDimitry Andric   //
165349cc55cSDimitry Andric   // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
166349cc55cSDimitry Andric   // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
167349cc55cSDimitry Andric   //
168349cc55cSDimitry Andric   // The mov pseudo instruction could be expanded to multiple mov instructions
169349cc55cSDimitry Andric   // later. Let's try to split the constant operand of mov instruction into two
170349cc55cSDimitry Andric   // bitmask immediates. It makes only two AND instructions intead of multiple
171349cc55cSDimitry Andric   // mov + and instructions.
172349cc55cSDimitry Andric 
17304eeddc0SDimitry Andric   return splitTwoPartImm<T>(
17481ad6265SDimitry Andric       MI,
175*bdd1243dSDimitry Andric       [Opc](T Imm, unsigned RegSize, T &Imm0,
176*bdd1243dSDimitry Andric             T &Imm1) -> std::optional<OpcodePair> {
17704eeddc0SDimitry Andric         if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
17881ad6265SDimitry Andric           return std::make_pair(Opc, Opc);
179*bdd1243dSDimitry Andric         return std::nullopt;
18004eeddc0SDimitry Andric       },
18181ad6265SDimitry Andric       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
18204eeddc0SDimitry Andric                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
18304eeddc0SDimitry Andric                    Register NewDstReg) {
184349cc55cSDimitry Andric         DebugLoc DL = MI.getDebugLoc();
18504eeddc0SDimitry Andric         MachineBasicBlock *MBB = MI.getParent();
18681ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
187349cc55cSDimitry Andric             .addReg(SrcReg)
18804eeddc0SDimitry Andric             .addImm(Imm0);
18981ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
190349cc55cSDimitry Andric             .addReg(NewTmpReg)
19104eeddc0SDimitry Andric             .addImm(Imm1);
19204eeddc0SDimitry Andric       });
193349cc55cSDimitry Andric }
194349cc55cSDimitry Andric 
19581ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
196349cc55cSDimitry Andric   // Check this ORR comes from below zero-extend pattern.
197349cc55cSDimitry Andric   //
198349cc55cSDimitry Andric   // def : Pat<(i64 (zext GPR32:$src)),
199349cc55cSDimitry Andric   //           (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
200349cc55cSDimitry Andric   if (MI.getOperand(3).getImm() != 0)
201349cc55cSDimitry Andric     return false;
202349cc55cSDimitry Andric 
203349cc55cSDimitry Andric   if (MI.getOperand(1).getReg() != AArch64::WZR)
204349cc55cSDimitry Andric     return false;
205349cc55cSDimitry Andric 
206349cc55cSDimitry Andric   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
207349cc55cSDimitry Andric   if (!SrcMI)
208349cc55cSDimitry Andric     return false;
209349cc55cSDimitry Andric 
210349cc55cSDimitry Andric   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
211349cc55cSDimitry Andric   //
212349cc55cSDimitry Andric   // When you use the 32-bit form of an instruction, the upper 32 bits of the
213349cc55cSDimitry Andric   // source registers are ignored and the upper 32 bits of the destination
214349cc55cSDimitry Andric   // register are set to zero.
215349cc55cSDimitry Andric   //
216349cc55cSDimitry Andric   // If AArch64's 32-bit form of instruction defines the source operand of
217349cc55cSDimitry Andric   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
218349cc55cSDimitry Andric   // real AArch64 instruction and if it is not, do not process the opcode
219349cc55cSDimitry Andric   // conservatively.
22081ad6265SDimitry Andric   if (SrcMI->getOpcode() == TargetOpcode::COPY &&
22181ad6265SDimitry Andric       SrcMI->getOperand(1).getReg().isVirtual()) {
22281ad6265SDimitry Andric     const TargetRegisterClass *RC =
22381ad6265SDimitry Andric         MRI->getRegClass(SrcMI->getOperand(1).getReg());
22481ad6265SDimitry Andric 
22581ad6265SDimitry Andric     // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
22681ad6265SDimitry Andric     // that the upper bits are zero.
22781ad6265SDimitry Andric     if (RC != &AArch64::FPR32RegClass &&
22881ad6265SDimitry Andric         ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
22981ad6265SDimitry Andric          SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
23081ad6265SDimitry Andric       return false;
23181ad6265SDimitry Andric     Register CpySrc = SrcMI->getOperand(1).getReg();
23281ad6265SDimitry Andric     if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
23381ad6265SDimitry Andric       CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
23481ad6265SDimitry Andric       BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
23581ad6265SDimitry Andric               TII->get(TargetOpcode::COPY), CpySrc)
23681ad6265SDimitry Andric           .add(SrcMI->getOperand(1));
23781ad6265SDimitry Andric     }
23881ad6265SDimitry Andric     BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
23981ad6265SDimitry Andric             TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
24081ad6265SDimitry Andric         .addReg(CpySrc);
24181ad6265SDimitry Andric     SrcMI->eraseFromParent();
24281ad6265SDimitry Andric   }
24381ad6265SDimitry Andric   else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
244349cc55cSDimitry Andric     return false;
245349cc55cSDimitry Andric 
246349cc55cSDimitry Andric   Register DefReg = MI.getOperand(0).getReg();
247349cc55cSDimitry Andric   Register SrcReg = MI.getOperand(2).getReg();
248349cc55cSDimitry Andric   MRI->replaceRegWith(DefReg, SrcReg);
249349cc55cSDimitry Andric   MRI->clearKillFlags(SrcReg);
25004eeddc0SDimitry Andric   LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
25181ad6265SDimitry Andric   MI.eraseFromParent();
25204eeddc0SDimitry Andric 
25304eeddc0SDimitry Andric   return true;
25404eeddc0SDimitry Andric }
25504eeddc0SDimitry Andric 
256*bdd1243dSDimitry Andric bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
257*bdd1243dSDimitry Andric   // Check this INSERT_SUBREG comes from below zero-extend pattern.
258*bdd1243dSDimitry Andric   //
259*bdd1243dSDimitry Andric   // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
260*bdd1243dSDimitry Andric   // To   %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
261*bdd1243dSDimitry Andric   //
262*bdd1243dSDimitry Andric   // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
263*bdd1243dSDimitry Andric   // COPY would destroy the upper part of the register anyway
264*bdd1243dSDimitry Andric   if (!MI.isRegTiedToDefOperand(1))
265*bdd1243dSDimitry Andric     return false;
266*bdd1243dSDimitry Andric 
267*bdd1243dSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
268*bdd1243dSDimitry Andric   const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
269*bdd1243dSDimitry Andric   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
270*bdd1243dSDimitry Andric   if (!SrcMI)
271*bdd1243dSDimitry Andric     return false;
272*bdd1243dSDimitry Andric 
273*bdd1243dSDimitry Andric   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
274*bdd1243dSDimitry Andric   //
275*bdd1243dSDimitry Andric   // When you use the 32-bit form of an instruction, the upper 32 bits of the
276*bdd1243dSDimitry Andric   // source registers are ignored and the upper 32 bits of the destination
277*bdd1243dSDimitry Andric   // register are set to zero.
278*bdd1243dSDimitry Andric   //
279*bdd1243dSDimitry Andric   // If AArch64's 32-bit form of instruction defines the source operand of
280*bdd1243dSDimitry Andric   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
281*bdd1243dSDimitry Andric   // real AArch64 instruction and if it is not, do not process the opcode
282*bdd1243dSDimitry Andric   // conservatively.
283*bdd1243dSDimitry Andric   if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
284*bdd1243dSDimitry Andric       !AArch64::GPR64allRegClass.hasSubClassEq(RC))
285*bdd1243dSDimitry Andric     return false;
286*bdd1243dSDimitry Andric 
287*bdd1243dSDimitry Andric   // Build a SUBREG_TO_REG instruction
288*bdd1243dSDimitry Andric   MachineInstr *SubregMI =
289*bdd1243dSDimitry Andric       BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
290*bdd1243dSDimitry Andric               TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
291*bdd1243dSDimitry Andric           .addImm(0)
292*bdd1243dSDimitry Andric           .add(MI.getOperand(2))
293*bdd1243dSDimitry Andric           .add(MI.getOperand(3));
294*bdd1243dSDimitry Andric   LLVM_DEBUG(dbgs() << MI << "  replace by:\n: " << *SubregMI << "\n");
295*bdd1243dSDimitry Andric   (void)SubregMI;
296*bdd1243dSDimitry Andric   MI.eraseFromParent();
297*bdd1243dSDimitry Andric 
298*bdd1243dSDimitry Andric   return true;
299*bdd1243dSDimitry Andric }
300*bdd1243dSDimitry Andric 
30104eeddc0SDimitry Andric template <typename T>
30204eeddc0SDimitry Andric static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
30304eeddc0SDimitry Andric   // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
30404eeddc0SDimitry Andric   // imm0 and imm1 are non-zero 12-bit unsigned int.
30504eeddc0SDimitry Andric   if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
30604eeddc0SDimitry Andric       (Imm & ~static_cast<T>(0xffffff)) != 0)
30704eeddc0SDimitry Andric     return false;
30804eeddc0SDimitry Andric 
30904eeddc0SDimitry Andric   // The immediate can not be composed via a single instruction.
31004eeddc0SDimitry Andric   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
31104eeddc0SDimitry Andric   AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
31204eeddc0SDimitry Andric   if (Insn.size() == 1)
31304eeddc0SDimitry Andric     return false;
31404eeddc0SDimitry Andric 
31504eeddc0SDimitry Andric   // Split Imm into (Imm0 << 12) + Imm1;
31604eeddc0SDimitry Andric   Imm0 = (Imm >> 12) & 0xfff;
31704eeddc0SDimitry Andric   Imm1 = Imm & 0xfff;
31804eeddc0SDimitry Andric   return true;
31904eeddc0SDimitry Andric }
32004eeddc0SDimitry Andric 
32104eeddc0SDimitry Andric template <typename T>
32204eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSUB(
32381ad6265SDimitry Andric     unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
32404eeddc0SDimitry Andric   // Try below transformation.
32504eeddc0SDimitry Andric   //
32604eeddc0SDimitry Andric   // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
32704eeddc0SDimitry Andric   // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
32804eeddc0SDimitry Andric   //
32904eeddc0SDimitry Andric   // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
33004eeddc0SDimitry Andric   // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
33104eeddc0SDimitry Andric   //
33204eeddc0SDimitry Andric   // The mov pseudo instruction could be expanded to multiple mov instructions
33304eeddc0SDimitry Andric   // later. Let's try to split the constant operand of mov instruction into two
33404eeddc0SDimitry Andric   // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
33504eeddc0SDimitry Andric   // multiple `mov` + `and/sub` instructions.
33604eeddc0SDimitry Andric 
33704eeddc0SDimitry Andric   return splitTwoPartImm<T>(
33881ad6265SDimitry Andric       MI,
33904eeddc0SDimitry Andric       [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
340*bdd1243dSDimitry Andric                        T &Imm1) -> std::optional<OpcodePair> {
34104eeddc0SDimitry Andric         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
34281ad6265SDimitry Andric           return std::make_pair(PosOpc, PosOpc);
34304eeddc0SDimitry Andric         if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
34481ad6265SDimitry Andric           return std::make_pair(NegOpc, NegOpc);
345*bdd1243dSDimitry Andric         return std::nullopt;
34604eeddc0SDimitry Andric       },
34781ad6265SDimitry Andric       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
34804eeddc0SDimitry Andric                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
34904eeddc0SDimitry Andric                    Register NewDstReg) {
35004eeddc0SDimitry Andric         DebugLoc DL = MI.getDebugLoc();
35104eeddc0SDimitry Andric         MachineBasicBlock *MBB = MI.getParent();
35281ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
35304eeddc0SDimitry Andric             .addReg(SrcReg)
35404eeddc0SDimitry Andric             .addImm(Imm0)
35504eeddc0SDimitry Andric             .addImm(12);
35681ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
35781ad6265SDimitry Andric             .addReg(NewTmpReg)
35881ad6265SDimitry Andric             .addImm(Imm1)
35981ad6265SDimitry Andric             .addImm(0);
36081ad6265SDimitry Andric       });
36181ad6265SDimitry Andric }
36281ad6265SDimitry Andric 
36381ad6265SDimitry Andric template <typename T>
36481ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSSUBS(
36581ad6265SDimitry Andric     OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
36681ad6265SDimitry Andric   // Try the same transformation as ADDSUB but with additional requirement
36781ad6265SDimitry Andric   // that the condition code usages are only for Equal and Not Equal
36881ad6265SDimitry Andric   return splitTwoPartImm<T>(
36981ad6265SDimitry Andric       MI,
370*bdd1243dSDimitry Andric       [PosOpcs, NegOpcs, &MI, &TRI = TRI,
371*bdd1243dSDimitry Andric        &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
372*bdd1243dSDimitry Andric                    T &Imm1) -> std::optional<OpcodePair> {
37381ad6265SDimitry Andric         OpcodePair OP;
37481ad6265SDimitry Andric         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
37581ad6265SDimitry Andric           OP = PosOpcs;
37681ad6265SDimitry Andric         else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
37781ad6265SDimitry Andric           OP = NegOpcs;
37881ad6265SDimitry Andric         else
379*bdd1243dSDimitry Andric           return std::nullopt;
38081ad6265SDimitry Andric         // Check conditional uses last since it is expensive for scanning
38181ad6265SDimitry Andric         // proceeding instructions
38281ad6265SDimitry Andric         MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
383*bdd1243dSDimitry Andric         std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
38481ad6265SDimitry Andric         if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
385*bdd1243dSDimitry Andric           return std::nullopt;
38681ad6265SDimitry Andric         return OP;
38781ad6265SDimitry Andric       },
38881ad6265SDimitry Andric       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
38981ad6265SDimitry Andric                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
39081ad6265SDimitry Andric                    Register NewDstReg) {
39181ad6265SDimitry Andric         DebugLoc DL = MI.getDebugLoc();
39281ad6265SDimitry Andric         MachineBasicBlock *MBB = MI.getParent();
39381ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
39481ad6265SDimitry Andric             .addReg(SrcReg)
39581ad6265SDimitry Andric             .addImm(Imm0)
39681ad6265SDimitry Andric             .addImm(12);
39781ad6265SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
39804eeddc0SDimitry Andric             .addReg(NewTmpReg)
39904eeddc0SDimitry Andric             .addImm(Imm1)
40004eeddc0SDimitry Andric             .addImm(0);
40104eeddc0SDimitry Andric       });
40204eeddc0SDimitry Andric }
40304eeddc0SDimitry Andric 
40404eeddc0SDimitry Andric // Checks if the corresponding MOV immediate instruction is applicable for
40504eeddc0SDimitry Andric // this peephole optimization.
40604eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
40704eeddc0SDimitry Andric                                             MachineInstr *&MovMI,
40804eeddc0SDimitry Andric                                             MachineInstr *&SubregToRegMI) {
40904eeddc0SDimitry Andric   // Check whether current MBB is in loop and the AND is loop invariant.
41004eeddc0SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
41104eeddc0SDimitry Andric   MachineLoop *L = MLI->getLoopFor(MBB);
41204eeddc0SDimitry Andric   if (L && !L->isLoopInvariant(MI))
41304eeddc0SDimitry Andric     return false;
41404eeddc0SDimitry Andric 
41504eeddc0SDimitry Andric   // Check whether current MI's operand is MOV with immediate.
41604eeddc0SDimitry Andric   MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
41704eeddc0SDimitry Andric   if (!MovMI)
41804eeddc0SDimitry Andric     return false;
41904eeddc0SDimitry Andric 
42004eeddc0SDimitry Andric   // If it is SUBREG_TO_REG, check its operand.
42104eeddc0SDimitry Andric   SubregToRegMI = nullptr;
42204eeddc0SDimitry Andric   if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
42304eeddc0SDimitry Andric     SubregToRegMI = MovMI;
42404eeddc0SDimitry Andric     MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
42504eeddc0SDimitry Andric     if (!MovMI)
42604eeddc0SDimitry Andric       return false;
42704eeddc0SDimitry Andric   }
42804eeddc0SDimitry Andric 
42904eeddc0SDimitry Andric   if (MovMI->getOpcode() != AArch64::MOVi32imm &&
43004eeddc0SDimitry Andric       MovMI->getOpcode() != AArch64::MOVi64imm)
43104eeddc0SDimitry Andric     return false;
43204eeddc0SDimitry Andric 
43304eeddc0SDimitry Andric   // If the MOV has multiple uses, do not split the immediate because it causes
43404eeddc0SDimitry Andric   // more instructions.
43504eeddc0SDimitry Andric   if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
43604eeddc0SDimitry Andric     return false;
43704eeddc0SDimitry Andric   if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
43804eeddc0SDimitry Andric     return false;
43904eeddc0SDimitry Andric 
44004eeddc0SDimitry Andric   // It is OK to perform this peephole optimization.
44104eeddc0SDimitry Andric   return true;
44204eeddc0SDimitry Andric }
44304eeddc0SDimitry Andric 
44404eeddc0SDimitry Andric template <typename T>
44504eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::splitTwoPartImm(
44681ad6265SDimitry Andric     MachineInstr &MI,
44704eeddc0SDimitry Andric     SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
44804eeddc0SDimitry Andric   unsigned RegSize = sizeof(T) * 8;
44904eeddc0SDimitry Andric   assert((RegSize == 32 || RegSize == 64) &&
45004eeddc0SDimitry Andric          "Invalid RegSize for legal immediate peephole optimization");
45104eeddc0SDimitry Andric 
45204eeddc0SDimitry Andric   // Perform several essential checks against current MI.
45304eeddc0SDimitry Andric   MachineInstr *MovMI, *SubregToRegMI;
45404eeddc0SDimitry Andric   if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
45504eeddc0SDimitry Andric     return false;
45604eeddc0SDimitry Andric 
45704eeddc0SDimitry Andric   // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
45804eeddc0SDimitry Andric   T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
45904eeddc0SDimitry Andric   // For the 32 bit form of instruction, the upper 32 bits of the destination
46004eeddc0SDimitry Andric   // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
46104eeddc0SDimitry Andric   // of Imm to zero. This is essential if the Immediate value was a negative
46204eeddc0SDimitry Andric   // number since it was sign extended when we assign to the 64-bit Imm.
46304eeddc0SDimitry Andric   if (SubregToRegMI)
46404eeddc0SDimitry Andric     Imm &= 0xFFFFFFFF;
46581ad6265SDimitry Andric   OpcodePair Opcode;
46604eeddc0SDimitry Andric   if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
46781ad6265SDimitry Andric     Opcode = *R;
46804eeddc0SDimitry Andric   else
46904eeddc0SDimitry Andric     return false;
47004eeddc0SDimitry Andric 
47181ad6265SDimitry Andric   // Create new MIs using the first and second opcodes. Opcodes might differ for
47281ad6265SDimitry Andric   // flag setting operations that should only set flags on second instruction.
47381ad6265SDimitry Andric   // NewTmpReg = Opcode.first SrcReg Imm0
47481ad6265SDimitry Andric   // NewDstReg = Opcode.second NewTmpReg Imm1
47581ad6265SDimitry Andric 
47681ad6265SDimitry Andric   // Determine register classes for destinations and register operands
47704eeddc0SDimitry Andric   MachineFunction *MF = MI.getMF();
47881ad6265SDimitry Andric   const TargetRegisterClass *FirstInstrDstRC =
47981ad6265SDimitry Andric       TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
48081ad6265SDimitry Andric   const TargetRegisterClass *FirstInstrOperandRC =
48181ad6265SDimitry Andric       TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
48281ad6265SDimitry Andric   const TargetRegisterClass *SecondInstrDstRC =
48381ad6265SDimitry Andric       (Opcode.first == Opcode.second)
48481ad6265SDimitry Andric           ? FirstInstrDstRC
48581ad6265SDimitry Andric           : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
48681ad6265SDimitry Andric   const TargetRegisterClass *SecondInstrOperandRC =
48781ad6265SDimitry Andric       (Opcode.first == Opcode.second)
48881ad6265SDimitry Andric           ? FirstInstrOperandRC
48981ad6265SDimitry Andric           : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
49081ad6265SDimitry Andric 
49181ad6265SDimitry Andric   // Get old registers destinations and new register destinations
49204eeddc0SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
49304eeddc0SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
49481ad6265SDimitry Andric   Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
49581ad6265SDimitry Andric   // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
49681ad6265SDimitry Andric   // reuse that same destination register.
49781ad6265SDimitry Andric   Register NewDstReg = DstReg.isVirtual()
49881ad6265SDimitry Andric                            ? MRI->createVirtualRegister(SecondInstrDstRC)
49981ad6265SDimitry Andric                            : DstReg;
50004eeddc0SDimitry Andric 
50181ad6265SDimitry Andric   // Constrain registers based on their new uses
50281ad6265SDimitry Andric   MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
50381ad6265SDimitry Andric   MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
50481ad6265SDimitry Andric   if (DstReg != NewDstReg)
50504eeddc0SDimitry Andric     MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
50604eeddc0SDimitry Andric 
50781ad6265SDimitry Andric   // Call the delegating operation to build the instruction
50804eeddc0SDimitry Andric   BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
50904eeddc0SDimitry Andric 
51004eeddc0SDimitry Andric   // replaceRegWith changes MI's definition register. Keep it for SSA form until
51181ad6265SDimitry Andric   // deleting MI. Only if we made a new destination register.
51281ad6265SDimitry Andric   if (DstReg != NewDstReg) {
51381ad6265SDimitry Andric     MRI->replaceRegWith(DstReg, NewDstReg);
51404eeddc0SDimitry Andric     MI.getOperand(0).setReg(DstReg);
51581ad6265SDimitry Andric   }
51604eeddc0SDimitry Andric 
51704eeddc0SDimitry Andric   // Record the MIs need to be removed.
51881ad6265SDimitry Andric   MI.eraseFromParent();
51904eeddc0SDimitry Andric   if (SubregToRegMI)
52081ad6265SDimitry Andric     SubregToRegMI->eraseFromParent();
52181ad6265SDimitry Andric   MovMI->eraseFromParent();
522349cc55cSDimitry Andric 
523349cc55cSDimitry Andric   return true;
524349cc55cSDimitry Andric }
525349cc55cSDimitry Andric 
526349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
527349cc55cSDimitry Andric   if (skipFunction(MF.getFunction()))
528349cc55cSDimitry Andric     return false;
529349cc55cSDimitry Andric 
530349cc55cSDimitry Andric   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
53104eeddc0SDimitry Andric   TRI = static_cast<const AArch64RegisterInfo *>(
53204eeddc0SDimitry Andric       MF.getSubtarget().getRegisterInfo());
533349cc55cSDimitry Andric   MLI = &getAnalysis<MachineLoopInfo>();
534349cc55cSDimitry Andric   MRI = &MF.getRegInfo();
535349cc55cSDimitry Andric 
53604eeddc0SDimitry Andric   assert(MRI->isSSA() && "Expected to be run on SSA form!");
537349cc55cSDimitry Andric 
538349cc55cSDimitry Andric   bool Changed = false;
539349cc55cSDimitry Andric 
540349cc55cSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
54181ad6265SDimitry Andric     for (MachineInstr &MI : make_early_inc_range(MBB)) {
542349cc55cSDimitry Andric       switch (MI.getOpcode()) {
543349cc55cSDimitry Andric       default:
544349cc55cSDimitry Andric         break;
545*bdd1243dSDimitry Andric       case AArch64::INSERT_SUBREG:
546*bdd1243dSDimitry Andric         Changed = visitINSERT(MI);
547*bdd1243dSDimitry Andric         break;
548349cc55cSDimitry Andric       case AArch64::ANDWrr:
54981ad6265SDimitry Andric         Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
550349cc55cSDimitry Andric         break;
551349cc55cSDimitry Andric       case AArch64::ANDXrr:
55281ad6265SDimitry Andric         Changed = visitAND<uint64_t>(AArch64::ANDXri, MI);
553349cc55cSDimitry Andric         break;
554349cc55cSDimitry Andric       case AArch64::ORRWrs:
55581ad6265SDimitry Andric         Changed = visitORR(MI);
55604eeddc0SDimitry Andric         break;
55704eeddc0SDimitry Andric       case AArch64::ADDWrr:
55881ad6265SDimitry Andric         Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
55904eeddc0SDimitry Andric         break;
56004eeddc0SDimitry Andric       case AArch64::SUBWrr:
56181ad6265SDimitry Andric         Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
56204eeddc0SDimitry Andric         break;
56304eeddc0SDimitry Andric       case AArch64::ADDXrr:
56481ad6265SDimitry Andric         Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
56504eeddc0SDimitry Andric         break;
56604eeddc0SDimitry Andric       case AArch64::SUBXrr:
56781ad6265SDimitry Andric         Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
56881ad6265SDimitry Andric         break;
56981ad6265SDimitry Andric       case AArch64::ADDSWrr:
57081ad6265SDimitry Andric         Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
57181ad6265SDimitry Andric                                           {AArch64::SUBWri, AArch64::SUBSWri},
57281ad6265SDimitry Andric                                           MI);
57381ad6265SDimitry Andric         break;
57481ad6265SDimitry Andric       case AArch64::SUBSWrr:
57581ad6265SDimitry Andric         Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
57681ad6265SDimitry Andric                                           {AArch64::ADDWri, AArch64::ADDSWri},
57781ad6265SDimitry Andric                                           MI);
57881ad6265SDimitry Andric         break;
57981ad6265SDimitry Andric       case AArch64::ADDSXrr:
58081ad6265SDimitry Andric         Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
58181ad6265SDimitry Andric                                           {AArch64::SUBXri, AArch64::SUBSXri},
58281ad6265SDimitry Andric                                           MI);
58381ad6265SDimitry Andric         break;
58481ad6265SDimitry Andric       case AArch64::SUBSXrr:
58581ad6265SDimitry Andric         Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
58681ad6265SDimitry Andric                                           {AArch64::ADDXri, AArch64::ADDSXri},
58781ad6265SDimitry Andric                                           MI);
58804eeddc0SDimitry Andric         break;
589349cc55cSDimitry Andric       }
590349cc55cSDimitry Andric     }
591349cc55cSDimitry Andric   }
592349cc55cSDimitry Andric 
593349cc55cSDimitry Andric   return Changed;
594349cc55cSDimitry Andric }
595349cc55cSDimitry Andric 
596349cc55cSDimitry Andric FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
597349cc55cSDimitry Andric   return new AArch64MIPeepholeOpt();
598349cc55cSDimitry Andric }
599