xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs below peephole optimizations on MIR level.
10 //
11 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12 //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13 //
14 //    The mov pseudo instruction could be expanded to multiple mov instructions
15 //    later. In this case, we could try to split the constant  operand of mov
16 //    instruction into two bitmask immediates. It makes two AND instructions
17 //    intead of multiple `mov` + `and` instructions.
18 //
19 // 2. Remove redundant ORRWrs which is generated by zero-extend.
20 //
21 //    %3:gpr32 = ORRWrs $wzr, %2, 0
22 //    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
23 //
24 //    If AArch64's 32-bit form of instruction defines the source operand of
25 //    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
26 //    operand are set to zero.
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "AArch64ExpandImm.h"
31 #include "AArch64InstrInfo.h"
32 #include "MCTargetDesc/AArch64AddressingModes.h"
33 #include "llvm/ADT/SetVector.h"
34 #include "llvm/CodeGen/MachineDominators.h"
35 #include "llvm/CodeGen/MachineLoopInfo.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "aarch64-mi-peephole-opt"
40 
41 namespace {
42 
43 struct AArch64MIPeepholeOpt : public MachineFunctionPass {
44   static char ID;
45 
46   AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
47     initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
48   }
49 
50   const AArch64InstrInfo *TII;
51   MachineLoopInfo *MLI;
52   MachineRegisterInfo *MRI;
53 
54   template <typename T>
55   bool visitAND(MachineInstr &MI,
56                 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
57   bool visitORR(MachineInstr &MI,
58                 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
59   bool runOnMachineFunction(MachineFunction &MF) override;
60 
61   StringRef getPassName() const override {
62     return "AArch64 MI Peephole Optimization pass";
63   }
64 
65   void getAnalysisUsage(AnalysisUsage &AU) const override {
66     AU.setPreservesCFG();
67     AU.addRequired<MachineLoopInfo>();
68     MachineFunctionPass::getAnalysisUsage(AU);
69   }
70 };
71 
72 char AArch64MIPeepholeOpt::ID = 0;
73 
74 } // end anonymous namespace
75 
76 INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
77                 "AArch64 MI Peephole Optimization", false, false)
78 
79 template <typename T>
80 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
81   T UImm = static_cast<T>(Imm);
82   if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
83     return false;
84 
85   // If this immediate can be handled by one instruction, do not split it.
86   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
87   AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
88   if (Insn.size() == 1)
89     return false;
90 
91   // The bitmask immediate consists of consecutive ones.  Let's say there is
92   // constant 0b00000000001000000000010000000000 which does not consist of
93   // consecutive ones. We can split it in to two bitmask immediate like
94   // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
95   // If we do AND with these two bitmask immediate, we can see original one.
96   unsigned LowestBitSet = countTrailingZeros(UImm);
97   unsigned HighestBitSet = Log2_64(UImm);
98 
99   // Create a mask which is filled with one from the position of lowest bit set
100   // to the position of highest bit set.
101   T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
102               (static_cast<T>(1) << LowestBitSet);
103   // Create a mask which is filled with one outside the position of lowest bit
104   // set and the position of highest bit set.
105   T NewImm2 = UImm | ~NewImm1;
106 
107   // If the split value is not valid bitmask immediate, do not split this
108   // constant.
109   if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
110     return false;
111 
112   Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
113   Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
114   return true;
115 }
116 
117 template <typename T>
118 bool AArch64MIPeepholeOpt::visitAND(
119     MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
120   // Try below transformation.
121   //
122   // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
123   // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
124   //
125   // The mov pseudo instruction could be expanded to multiple mov instructions
126   // later. Let's try to split the constant operand of mov instruction into two
127   // bitmask immediates. It makes only two AND instructions intead of multiple
128   // mov + and instructions.
129 
130   unsigned RegSize = sizeof(T) * 8;
131   assert((RegSize == 32 || RegSize == 64) &&
132          "Invalid RegSize for AND bitmask peephole optimization");
133 
134   // Check whether AND's MBB is in loop and the AND is loop invariant.
135   MachineBasicBlock *MBB = MI.getParent();
136   MachineLoop *L = MLI->getLoopFor(MBB);
137   if (L && !L->isLoopInvariant(MI))
138     return false;
139 
140   // Check whether AND's operand is MOV with immediate.
141   MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
142   if (!MovMI)
143     return false;
144 
145   MachineInstr *SubregToRegMI = nullptr;
146   // If it is SUBREG_TO_REG, check its operand.
147   if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
148     SubregToRegMI = MovMI;
149     MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
150     if (!MovMI)
151       return false;
152   }
153 
154   if (MovMI->getOpcode() != AArch64::MOVi32imm &&
155       MovMI->getOpcode() != AArch64::MOVi64imm)
156     return false;
157 
158   // If the MOV has multiple uses, do not split the immediate because it causes
159   // more instructions.
160   if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
161     return false;
162 
163   if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
164     return false;
165 
166   // Split the bitmask immediate into two.
167   T UImm = static_cast<T>(MovMI->getOperand(1).getImm());
168   // For the 32 bit form of instruction, the upper 32 bits of the destination
169   // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
170   // of UImm to zero.
171   if (SubregToRegMI)
172     UImm &= 0xFFFFFFFF;
173   T Imm1Enc;
174   T Imm2Enc;
175   if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc))
176     return false;
177 
178   // Create new AND MIs.
179   DebugLoc DL = MI.getDebugLoc();
180   const TargetRegisterClass *ANDImmRC =
181       (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
182   Register DstReg = MI.getOperand(0).getReg();
183   Register SrcReg = MI.getOperand(1).getReg();
184   Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC);
185   Register NewDstReg = MRI->createVirtualRegister(ANDImmRC);
186   unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;
187 
188   MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg));
189   BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
190       .addReg(SrcReg)
191       .addImm(Imm1Enc);
192 
193   MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
194   BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
195       .addReg(NewTmpReg)
196       .addImm(Imm2Enc);
197 
198   MRI->replaceRegWith(DstReg, NewDstReg);
199   // replaceRegWith changes MI's definition register. Keep it for SSA form until
200   // deleting MI.
201   MI.getOperand(0).setReg(DstReg);
202 
203   ToBeRemoved.insert(&MI);
204   if (SubregToRegMI)
205     ToBeRemoved.insert(SubregToRegMI);
206   ToBeRemoved.insert(MovMI);
207 
208   return true;
209 }
210 
211 bool AArch64MIPeepholeOpt::visitORR(
212     MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
213   // Check this ORR comes from below zero-extend pattern.
214   //
215   // def : Pat<(i64 (zext GPR32:$src)),
216   //           (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
217   if (MI.getOperand(3).getImm() != 0)
218     return false;
219 
220   if (MI.getOperand(1).getReg() != AArch64::WZR)
221     return false;
222 
223   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
224   if (!SrcMI)
225     return false;
226 
227   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
228   //
229   // When you use the 32-bit form of an instruction, the upper 32 bits of the
230   // source registers are ignored and the upper 32 bits of the destination
231   // register are set to zero.
232   //
233   // If AArch64's 32-bit form of instruction defines the source operand of
234   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
235   // real AArch64 instruction and if it is not, do not process the opcode
236   // conservatively.
237   if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
238     return false;
239 
240   Register DefReg = MI.getOperand(0).getReg();
241   Register SrcReg = MI.getOperand(2).getReg();
242   MRI->replaceRegWith(DefReg, SrcReg);
243   MRI->clearKillFlags(SrcReg);
244   // replaceRegWith changes MI's definition register. Keep it for SSA form until
245   // deleting MI.
246   MI.getOperand(0).setReg(DefReg);
247   ToBeRemoved.insert(&MI);
248 
249   LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; });
250 
251   return true;
252 }
253 
254 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
255   if (skipFunction(MF.getFunction()))
256     return false;
257 
258   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
259   MLI = &getAnalysis<MachineLoopInfo>();
260   MRI = &MF.getRegInfo();
261 
262   if (!MRI->isSSA())
263     return false;
264 
265   bool Changed = false;
266   SmallSetVector<MachineInstr *, 8> ToBeRemoved;
267 
268   for (MachineBasicBlock &MBB : MF) {
269     for (MachineInstr &MI : MBB) {
270       switch (MI.getOpcode()) {
271       default:
272         break;
273       case AArch64::ANDWrr:
274         Changed = visitAND<uint32_t>(MI, ToBeRemoved);
275         break;
276       case AArch64::ANDXrr:
277         Changed = visitAND<uint64_t>(MI, ToBeRemoved);
278         break;
279       case AArch64::ORRWrs:
280         Changed = visitORR(MI, ToBeRemoved);
281       }
282     }
283   }
284 
285   for (MachineInstr *MI : ToBeRemoved)
286     MI->eraseFromParent();
287 
288   return Changed;
289 }
290 
291 FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
292   return new AArch64MIPeepholeOpt();
293 }
294