1 //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs below peephole optimizations on MIR level. 10 // 11 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri 12 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 13 // 14 // The mov pseudo instruction could be expanded to multiple mov instructions 15 // later. In this case, we could try to split the constant operand of mov 16 // instruction into two bitmask immediates. It makes two AND instructions 17 // intead of multiple `mov` + `and` instructions. 18 // 19 // 2. Remove redundant ORRWrs which is generated by zero-extend. 20 // 21 // %3:gpr32 = ORRWrs $wzr, %2, 0 22 // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 23 // 24 // If AArch64's 32-bit form of instruction defines the source operand of 25 // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source 26 // operand are set to zero. 27 // 28 //===----------------------------------------------------------------------===// 29 30 #include "AArch64ExpandImm.h" 31 #include "AArch64InstrInfo.h" 32 #include "MCTargetDesc/AArch64AddressingModes.h" 33 #include "llvm/ADT/SetVector.h" 34 #include "llvm/CodeGen/MachineDominators.h" 35 #include "llvm/CodeGen/MachineLoopInfo.h" 36 37 using namespace llvm; 38 39 #define DEBUG_TYPE "aarch64-mi-peephole-opt" 40 41 namespace { 42 43 struct AArch64MIPeepholeOpt : public MachineFunctionPass { 44 static char ID; 45 46 AArch64MIPeepholeOpt() : MachineFunctionPass(ID) { 47 initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry()); 48 } 49 50 const AArch64InstrInfo *TII; 51 MachineLoopInfo *MLI; 52 MachineRegisterInfo *MRI; 53 54 template <typename T> 55 bool visitAND(MachineInstr &MI, 56 SmallSetVector<MachineInstr *, 8> &ToBeRemoved); 57 bool visitORR(MachineInstr &MI, 58 SmallSetVector<MachineInstr *, 8> &ToBeRemoved); 59 bool runOnMachineFunction(MachineFunction &MF) override; 60 61 StringRef getPassName() const override { 62 return "AArch64 MI Peephole Optimization pass"; 63 } 64 65 void getAnalysisUsage(AnalysisUsage &AU) const override { 66 AU.setPreservesCFG(); 67 AU.addRequired<MachineLoopInfo>(); 68 MachineFunctionPass::getAnalysisUsage(AU); 69 } 70 }; 71 72 char AArch64MIPeepholeOpt::ID = 0; 73 74 } // end anonymous namespace 75 76 INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", 77 "AArch64 MI Peephole Optimization", false, false) 78 79 template <typename T> 80 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { 81 T UImm = static_cast<T>(Imm); 82 if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) 83 return false; 84 85 // If this immediate can be handled by one instruction, do not split it. 86 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 87 AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); 88 if (Insn.size() == 1) 89 return false; 90 91 // The bitmask immediate consists of consecutive ones. Let's say there is 92 // constant 0b00000000001000000000010000000000 which does not consist of 93 // consecutive ones. We can split it in to two bitmask immediate like 94 // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. 95 // If we do AND with these two bitmask immediate, we can see original one. 96 unsigned LowestBitSet = countTrailingZeros(UImm); 97 unsigned HighestBitSet = Log2_64(UImm); 98 99 // Create a mask which is filled with one from the position of lowest bit set 100 // to the position of highest bit set. 101 T NewImm1 = (static_cast<T>(2) << HighestBitSet) - 102 (static_cast<T>(1) << LowestBitSet); 103 // Create a mask which is filled with one outside the position of lowest bit 104 // set and the position of highest bit set. 105 T NewImm2 = UImm | ~NewImm1; 106 107 // If the split value is not valid bitmask immediate, do not split this 108 // constant. 109 if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) 110 return false; 111 112 Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); 113 Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); 114 return true; 115 } 116 117 template <typename T> 118 bool AArch64MIPeepholeOpt::visitAND( 119 MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { 120 // Try below transformation. 121 // 122 // MOVi32imm + ANDWrr ==> ANDWri + ANDWri 123 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 124 // 125 // The mov pseudo instruction could be expanded to multiple mov instructions 126 // later. Let's try to split the constant operand of mov instruction into two 127 // bitmask immediates. It makes only two AND instructions intead of multiple 128 // mov + and instructions. 129 130 unsigned RegSize = sizeof(T) * 8; 131 assert((RegSize == 32 || RegSize == 64) && 132 "Invalid RegSize for AND bitmask peephole optimization"); 133 134 // Check whether AND's MBB is in loop and the AND is loop invariant. 135 MachineBasicBlock *MBB = MI.getParent(); 136 MachineLoop *L = MLI->getLoopFor(MBB); 137 if (L && !L->isLoopInvariant(MI)) 138 return false; 139 140 // Check whether AND's operand is MOV with immediate. 141 MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 142 if (!MovMI) 143 return false; 144 145 MachineInstr *SubregToRegMI = nullptr; 146 // If it is SUBREG_TO_REG, check its operand. 147 if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { 148 SubregToRegMI = MovMI; 149 MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); 150 if (!MovMI) 151 return false; 152 } 153 154 if (MovMI->getOpcode() != AArch64::MOVi32imm && 155 MovMI->getOpcode() != AArch64::MOVi64imm) 156 return false; 157 158 // If the MOV has multiple uses, do not split the immediate because it causes 159 // more instructions. 160 if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) 161 return false; 162 163 if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) 164 return false; 165 166 // Split the bitmask immediate into two. 167 T UImm = static_cast<T>(MovMI->getOperand(1).getImm()); 168 // For the 32 bit form of instruction, the upper 32 bits of the destination 169 // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits 170 // of UImm to zero. 171 if (SubregToRegMI) 172 UImm &= 0xFFFFFFFF; 173 T Imm1Enc; 174 T Imm2Enc; 175 if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc)) 176 return false; 177 178 // Create new AND MIs. 179 DebugLoc DL = MI.getDebugLoc(); 180 const TargetRegisterClass *ANDImmRC = 181 (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass; 182 Register DstReg = MI.getOperand(0).getReg(); 183 Register SrcReg = MI.getOperand(1).getReg(); 184 Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC); 185 Register NewDstReg = MRI->createVirtualRegister(ANDImmRC); 186 unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri; 187 188 MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg)); 189 BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) 190 .addReg(SrcReg) 191 .addImm(Imm1Enc); 192 193 MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); 194 BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) 195 .addReg(NewTmpReg) 196 .addImm(Imm2Enc); 197 198 MRI->replaceRegWith(DstReg, NewDstReg); 199 // replaceRegWith changes MI's definition register. Keep it for SSA form until 200 // deleting MI. 201 MI.getOperand(0).setReg(DstReg); 202 203 ToBeRemoved.insert(&MI); 204 if (SubregToRegMI) 205 ToBeRemoved.insert(SubregToRegMI); 206 ToBeRemoved.insert(MovMI); 207 208 return true; 209 } 210 211 bool AArch64MIPeepholeOpt::visitORR( 212 MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { 213 // Check this ORR comes from below zero-extend pattern. 214 // 215 // def : Pat<(i64 (zext GPR32:$src)), 216 // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 217 if (MI.getOperand(3).getImm() != 0) 218 return false; 219 220 if (MI.getOperand(1).getReg() != AArch64::WZR) 221 return false; 222 223 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 224 if (!SrcMI) 225 return false; 226 227 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 228 // 229 // When you use the 32-bit form of an instruction, the upper 32 bits of the 230 // source registers are ignored and the upper 32 bits of the destination 231 // register are set to zero. 232 // 233 // If AArch64's 32-bit form of instruction defines the source operand of 234 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 235 // real AArch64 instruction and if it is not, do not process the opcode 236 // conservatively. 237 if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) 238 return false; 239 240 Register DefReg = MI.getOperand(0).getReg(); 241 Register SrcReg = MI.getOperand(2).getReg(); 242 MRI->replaceRegWith(DefReg, SrcReg); 243 MRI->clearKillFlags(SrcReg); 244 // replaceRegWith changes MI's definition register. Keep it for SSA form until 245 // deleting MI. 246 MI.getOperand(0).setReg(DefReg); 247 ToBeRemoved.insert(&MI); 248 249 LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; }); 250 251 return true; 252 } 253 254 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { 255 if (skipFunction(MF.getFunction())) 256 return false; 257 258 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 259 MLI = &getAnalysis<MachineLoopInfo>(); 260 MRI = &MF.getRegInfo(); 261 262 if (!MRI->isSSA()) 263 return false; 264 265 bool Changed = false; 266 SmallSetVector<MachineInstr *, 8> ToBeRemoved; 267 268 for (MachineBasicBlock &MBB : MF) { 269 for (MachineInstr &MI : MBB) { 270 switch (MI.getOpcode()) { 271 default: 272 break; 273 case AArch64::ANDWrr: 274 Changed = visitAND<uint32_t>(MI, ToBeRemoved); 275 break; 276 case AArch64::ANDXrr: 277 Changed = visitAND<uint64_t>(MI, ToBeRemoved); 278 break; 279 case AArch64::ORRWrs: 280 Changed = visitORR(MI, ToBeRemoved); 281 } 282 } 283 } 284 285 for (MachineInstr *MI : ToBeRemoved) 286 MI->eraseFromParent(); 287 288 return Changed; 289 } 290 291 FunctionPass *llvm::createAArch64MIPeepholeOptPass() { 292 return new AArch64MIPeepholeOpt(); 293 } 294