1 //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs below peephole optimizations on MIR level. 10 // 11 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri 12 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 13 // 14 // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi 15 // MOVi64imm + ADDXrr ==> ANDXri + ANDXri 16 // 17 // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi 18 // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 19 // 20 // The mov pseudo instruction could be expanded to multiple mov instructions 21 // later. In this case, we could try to split the constant operand of mov 22 // instruction into two immediates which can be directly encoded into 23 // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of 24 // multiple `mov` + `and/add/sub` instructions. 25 // 26 // 4. Remove redundant ORRWrs which is generated by zero-extend. 27 // 28 // %3:gpr32 = ORRWrs $wzr, %2, 0 29 // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 30 // 31 // If AArch64's 32-bit form of instruction defines the source operand of 32 // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source 33 // operand are set to zero. 34 // 35 // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx 36 // ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx 37 // 38 //===----------------------------------------------------------------------===// 39 40 #include "AArch64ExpandImm.h" 41 #include "AArch64InstrInfo.h" 42 #include "MCTargetDesc/AArch64AddressingModes.h" 43 #include "llvm/CodeGen/MachineDominators.h" 44 #include "llvm/CodeGen/MachineLoopInfo.h" 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "aarch64-mi-peephole-opt" 49 50 namespace { 51 52 struct AArch64MIPeepholeOpt : public MachineFunctionPass { 53 static char ID; 54 55 AArch64MIPeepholeOpt() : MachineFunctionPass(ID) { 56 initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry()); 57 } 58 59 const AArch64InstrInfo *TII; 60 const AArch64RegisterInfo *TRI; 61 MachineLoopInfo *MLI; 62 MachineRegisterInfo *MRI; 63 64 using OpcodePair = std::pair<unsigned, unsigned>; 65 template <typename T> 66 using SplitAndOpcFunc = 67 std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>; 68 using BuildMIFunc = 69 std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned, 70 Register, Register, Register)>; 71 72 /// For instructions where an immediate operand could be split into two 73 /// separate immediate instructions, use the splitTwoPartImm two handle the 74 /// optimization. 75 /// 76 /// To implement, the following function types must be passed to 77 /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if 78 /// splitting the immediate is valid and returns the associated new opcode. A 79 /// BuildMIFunc must be implemented to build the two immediate instructions. 80 /// 81 /// Example Pattern (where IMM would require 2+ MOV instructions): 82 /// %dst = <Instr>rr %src IMM [...] 83 /// becomes: 84 /// %tmp = <Instr>ri %src (encode half IMM) [...] 85 /// %dst = <Instr>ri %tmp (encode half IMM) [...] 86 template <typename T> 87 bool splitTwoPartImm(MachineInstr &MI, 88 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr); 89 90 bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, 91 MachineInstr *&SubregToRegMI); 92 93 template <typename T> 94 bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI); 95 template <typename T> 96 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI); 97 98 template <typename T> 99 bool visitAND(unsigned Opc, MachineInstr &MI); 100 bool visitORR(MachineInstr &MI); 101 bool visitINSERT(MachineInstr &MI); 102 bool runOnMachineFunction(MachineFunction &MF) override; 103 104 StringRef getPassName() const override { 105 return "AArch64 MI Peephole Optimization pass"; 106 } 107 108 void getAnalysisUsage(AnalysisUsage &AU) const override { 109 AU.setPreservesCFG(); 110 AU.addRequired<MachineLoopInfo>(); 111 MachineFunctionPass::getAnalysisUsage(AU); 112 } 113 }; 114 115 char AArch64MIPeepholeOpt::ID = 0; 116 117 } // end anonymous namespace 118 119 INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", 120 "AArch64 MI Peephole Optimization", false, false) 121 122 template <typename T> 123 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { 124 T UImm = static_cast<T>(Imm); 125 if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) 126 return false; 127 128 // If this immediate can be handled by one instruction, do not split it. 129 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 130 AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); 131 if (Insn.size() == 1) 132 return false; 133 134 // The bitmask immediate consists of consecutive ones. Let's say there is 135 // constant 0b00000000001000000000010000000000 which does not consist of 136 // consecutive ones. We can split it in to two bitmask immediate like 137 // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. 138 // If we do AND with these two bitmask immediate, we can see original one. 139 unsigned LowestBitSet = countTrailingZeros(UImm); 140 unsigned HighestBitSet = Log2_64(UImm); 141 142 // Create a mask which is filled with one from the position of lowest bit set 143 // to the position of highest bit set. 144 T NewImm1 = (static_cast<T>(2) << HighestBitSet) - 145 (static_cast<T>(1) << LowestBitSet); 146 // Create a mask which is filled with one outside the position of lowest bit 147 // set and the position of highest bit set. 148 T NewImm2 = UImm | ~NewImm1; 149 150 // If the split value is not valid bitmask immediate, do not split this 151 // constant. 152 if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) 153 return false; 154 155 Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); 156 Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); 157 return true; 158 } 159 160 template <typename T> 161 bool AArch64MIPeepholeOpt::visitAND( 162 unsigned Opc, MachineInstr &MI) { 163 // Try below transformation. 164 // 165 // MOVi32imm + ANDWrr ==> ANDWri + ANDWri 166 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 167 // 168 // The mov pseudo instruction could be expanded to multiple mov instructions 169 // later. Let's try to split the constant operand of mov instruction into two 170 // bitmask immediates. It makes only two AND instructions intead of multiple 171 // mov + and instructions. 172 173 return splitTwoPartImm<T>( 174 MI, 175 [Opc](T Imm, unsigned RegSize, T &Imm0, 176 T &Imm1) -> std::optional<OpcodePair> { 177 if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) 178 return std::make_pair(Opc, Opc); 179 return std::nullopt; 180 }, 181 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 182 unsigned Imm1, Register SrcReg, Register NewTmpReg, 183 Register NewDstReg) { 184 DebugLoc DL = MI.getDebugLoc(); 185 MachineBasicBlock *MBB = MI.getParent(); 186 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 187 .addReg(SrcReg) 188 .addImm(Imm0); 189 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 190 .addReg(NewTmpReg) 191 .addImm(Imm1); 192 }); 193 } 194 195 bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { 196 // Check this ORR comes from below zero-extend pattern. 197 // 198 // def : Pat<(i64 (zext GPR32:$src)), 199 // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 200 if (MI.getOperand(3).getImm() != 0) 201 return false; 202 203 if (MI.getOperand(1).getReg() != AArch64::WZR) 204 return false; 205 206 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 207 if (!SrcMI) 208 return false; 209 210 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 211 // 212 // When you use the 32-bit form of an instruction, the upper 32 bits of the 213 // source registers are ignored and the upper 32 bits of the destination 214 // register are set to zero. 215 // 216 // If AArch64's 32-bit form of instruction defines the source operand of 217 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 218 // real AArch64 instruction and if it is not, do not process the opcode 219 // conservatively. 220 if (SrcMI->getOpcode() == TargetOpcode::COPY && 221 SrcMI->getOperand(1).getReg().isVirtual()) { 222 const TargetRegisterClass *RC = 223 MRI->getRegClass(SrcMI->getOperand(1).getReg()); 224 225 // A COPY from an FPR will become a FMOVSWr, so do so now so that we know 226 // that the upper bits are zero. 227 if (RC != &AArch64::FPR32RegClass && 228 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) || 229 SrcMI->getOperand(1).getSubReg() != AArch64::ssub)) 230 return false; 231 Register CpySrc = SrcMI->getOperand(1).getReg(); 232 if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) { 233 CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass); 234 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), 235 TII->get(TargetOpcode::COPY), CpySrc) 236 .add(SrcMI->getOperand(1)); 237 } 238 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), 239 TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg()) 240 .addReg(CpySrc); 241 SrcMI->eraseFromParent(); 242 } 243 else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) 244 return false; 245 246 Register DefReg = MI.getOperand(0).getReg(); 247 Register SrcReg = MI.getOperand(2).getReg(); 248 MRI->replaceRegWith(DefReg, SrcReg); 249 MRI->clearKillFlags(SrcReg); 250 LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n"); 251 MI.eraseFromParent(); 252 253 return true; 254 } 255 256 bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { 257 // Check this INSERT_SUBREG comes from below zero-extend pattern. 258 // 259 // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx 260 // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx 261 // 262 // We're assuming the first operand to INSERT_SUBREG is irrelevant because a 263 // COPY would destroy the upper part of the register anyway 264 if (!MI.isRegTiedToDefOperand(1)) 265 return false; 266 267 Register DstReg = MI.getOperand(0).getReg(); 268 const TargetRegisterClass *RC = MRI->getRegClass(DstReg); 269 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 270 if (!SrcMI) 271 return false; 272 273 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 274 // 275 // When you use the 32-bit form of an instruction, the upper 32 bits of the 276 // source registers are ignored and the upper 32 bits of the destination 277 // register are set to zero. 278 // 279 // If AArch64's 32-bit form of instruction defines the source operand of 280 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 281 // real AArch64 instruction and if it is not, do not process the opcode 282 // conservatively. 283 if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) || 284 !AArch64::GPR64allRegClass.hasSubClassEq(RC)) 285 return false; 286 287 // Build a SUBREG_TO_REG instruction 288 MachineInstr *SubregMI = 289 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), 290 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg) 291 .addImm(0) 292 .add(MI.getOperand(2)) 293 .add(MI.getOperand(3)); 294 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n"); 295 (void)SubregMI; 296 MI.eraseFromParent(); 297 298 return true; 299 } 300 301 template <typename T> 302 static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { 303 // The immediate must be in the form of ((imm0 << 12) + imm1), in which both 304 // imm0 and imm1 are non-zero 12-bit unsigned int. 305 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 || 306 (Imm & ~static_cast<T>(0xffffff)) != 0) 307 return false; 308 309 // The immediate can not be composed via a single instruction. 310 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 311 AArch64_IMM::expandMOVImm(Imm, RegSize, Insn); 312 if (Insn.size() == 1) 313 return false; 314 315 // Split Imm into (Imm0 << 12) + Imm1; 316 Imm0 = (Imm >> 12) & 0xfff; 317 Imm1 = Imm & 0xfff; 318 return true; 319 } 320 321 template <typename T> 322 bool AArch64MIPeepholeOpt::visitADDSUB( 323 unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) { 324 // Try below transformation. 325 // 326 // MOVi32imm + ADDWrr ==> ADDWri + ADDWri 327 // MOVi64imm + ADDXrr ==> ADDXri + ADDXri 328 // 329 // MOVi32imm + SUBWrr ==> SUBWri + SUBWri 330 // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 331 // 332 // The mov pseudo instruction could be expanded to multiple mov instructions 333 // later. Let's try to split the constant operand of mov instruction into two 334 // legal add/sub immediates. It makes only two ADD/SUB instructions intead of 335 // multiple `mov` + `and/sub` instructions. 336 337 return splitTwoPartImm<T>( 338 MI, 339 [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, 340 T &Imm1) -> std::optional<OpcodePair> { 341 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 342 return std::make_pair(PosOpc, PosOpc); 343 if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 344 return std::make_pair(NegOpc, NegOpc); 345 return std::nullopt; 346 }, 347 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 348 unsigned Imm1, Register SrcReg, Register NewTmpReg, 349 Register NewDstReg) { 350 DebugLoc DL = MI.getDebugLoc(); 351 MachineBasicBlock *MBB = MI.getParent(); 352 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 353 .addReg(SrcReg) 354 .addImm(Imm0) 355 .addImm(12); 356 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 357 .addReg(NewTmpReg) 358 .addImm(Imm1) 359 .addImm(0); 360 }); 361 } 362 363 template <typename T> 364 bool AArch64MIPeepholeOpt::visitADDSSUBS( 365 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) { 366 // Try the same transformation as ADDSUB but with additional requirement 367 // that the condition code usages are only for Equal and Not Equal 368 return splitTwoPartImm<T>( 369 MI, 370 [PosOpcs, NegOpcs, &MI, &TRI = TRI, 371 &MRI = MRI](T Imm, unsigned RegSize, T &Imm0, 372 T &Imm1) -> std::optional<OpcodePair> { 373 OpcodePair OP; 374 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 375 OP = PosOpcs; 376 else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 377 OP = NegOpcs; 378 else 379 return std::nullopt; 380 // Check conditional uses last since it is expensive for scanning 381 // proceeding instructions 382 MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); 383 std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI); 384 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V) 385 return std::nullopt; 386 return OP; 387 }, 388 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 389 unsigned Imm1, Register SrcReg, Register NewTmpReg, 390 Register NewDstReg) { 391 DebugLoc DL = MI.getDebugLoc(); 392 MachineBasicBlock *MBB = MI.getParent(); 393 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 394 .addReg(SrcReg) 395 .addImm(Imm0) 396 .addImm(12); 397 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 398 .addReg(NewTmpReg) 399 .addImm(Imm1) 400 .addImm(0); 401 }); 402 } 403 404 // Checks if the corresponding MOV immediate instruction is applicable for 405 // this peephole optimization. 406 bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, 407 MachineInstr *&MovMI, 408 MachineInstr *&SubregToRegMI) { 409 // Check whether current MBB is in loop and the AND is loop invariant. 410 MachineBasicBlock *MBB = MI.getParent(); 411 MachineLoop *L = MLI->getLoopFor(MBB); 412 if (L && !L->isLoopInvariant(MI)) 413 return false; 414 415 // Check whether current MI's operand is MOV with immediate. 416 MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 417 if (!MovMI) 418 return false; 419 420 // If it is SUBREG_TO_REG, check its operand. 421 SubregToRegMI = nullptr; 422 if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { 423 SubregToRegMI = MovMI; 424 MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); 425 if (!MovMI) 426 return false; 427 } 428 429 if (MovMI->getOpcode() != AArch64::MOVi32imm && 430 MovMI->getOpcode() != AArch64::MOVi64imm) 431 return false; 432 433 // If the MOV has multiple uses, do not split the immediate because it causes 434 // more instructions. 435 if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) 436 return false; 437 if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) 438 return false; 439 440 // It is OK to perform this peephole optimization. 441 return true; 442 } 443 444 template <typename T> 445 bool AArch64MIPeepholeOpt::splitTwoPartImm( 446 MachineInstr &MI, 447 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) { 448 unsigned RegSize = sizeof(T) * 8; 449 assert((RegSize == 32 || RegSize == 64) && 450 "Invalid RegSize for legal immediate peephole optimization"); 451 452 // Perform several essential checks against current MI. 453 MachineInstr *MovMI, *SubregToRegMI; 454 if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) 455 return false; 456 457 // Split the immediate to Imm0 and Imm1, and calculate the Opcode. 458 T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1; 459 // For the 32 bit form of instruction, the upper 32 bits of the destination 460 // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits 461 // of Imm to zero. This is essential if the Immediate value was a negative 462 // number since it was sign extended when we assign to the 64-bit Imm. 463 if (SubregToRegMI) 464 Imm &= 0xFFFFFFFF; 465 OpcodePair Opcode; 466 if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) 467 Opcode = *R; 468 else 469 return false; 470 471 // Create new MIs using the first and second opcodes. Opcodes might differ for 472 // flag setting operations that should only set flags on second instruction. 473 // NewTmpReg = Opcode.first SrcReg Imm0 474 // NewDstReg = Opcode.second NewTmpReg Imm1 475 476 // Determine register classes for destinations and register operands 477 MachineFunction *MF = MI.getMF(); 478 const TargetRegisterClass *FirstInstrDstRC = 479 TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF); 480 const TargetRegisterClass *FirstInstrOperandRC = 481 TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF); 482 const TargetRegisterClass *SecondInstrDstRC = 483 (Opcode.first == Opcode.second) 484 ? FirstInstrDstRC 485 : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF); 486 const TargetRegisterClass *SecondInstrOperandRC = 487 (Opcode.first == Opcode.second) 488 ? FirstInstrOperandRC 489 : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF); 490 491 // Get old registers destinations and new register destinations 492 Register DstReg = MI.getOperand(0).getReg(); 493 Register SrcReg = MI.getOperand(1).getReg(); 494 Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC); 495 // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to 496 // reuse that same destination register. 497 Register NewDstReg = DstReg.isVirtual() 498 ? MRI->createVirtualRegister(SecondInstrDstRC) 499 : DstReg; 500 501 // Constrain registers based on their new uses 502 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC); 503 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC); 504 if (DstReg != NewDstReg) 505 MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); 506 507 // Call the delegating operation to build the instruction 508 BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg); 509 510 // replaceRegWith changes MI's definition register. Keep it for SSA form until 511 // deleting MI. Only if we made a new destination register. 512 if (DstReg != NewDstReg) { 513 MRI->replaceRegWith(DstReg, NewDstReg); 514 MI.getOperand(0).setReg(DstReg); 515 } 516 517 // Record the MIs need to be removed. 518 MI.eraseFromParent(); 519 if (SubregToRegMI) 520 SubregToRegMI->eraseFromParent(); 521 MovMI->eraseFromParent(); 522 523 return true; 524 } 525 526 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { 527 if (skipFunction(MF.getFunction())) 528 return false; 529 530 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 531 TRI = static_cast<const AArch64RegisterInfo *>( 532 MF.getSubtarget().getRegisterInfo()); 533 MLI = &getAnalysis<MachineLoopInfo>(); 534 MRI = &MF.getRegInfo(); 535 536 assert(MRI->isSSA() && "Expected to be run on SSA form!"); 537 538 bool Changed = false; 539 540 for (MachineBasicBlock &MBB : MF) { 541 for (MachineInstr &MI : make_early_inc_range(MBB)) { 542 switch (MI.getOpcode()) { 543 default: 544 break; 545 case AArch64::INSERT_SUBREG: 546 Changed = visitINSERT(MI); 547 break; 548 case AArch64::ANDWrr: 549 Changed = visitAND<uint32_t>(AArch64::ANDWri, MI); 550 break; 551 case AArch64::ANDXrr: 552 Changed = visitAND<uint64_t>(AArch64::ANDXri, MI); 553 break; 554 case AArch64::ORRWrs: 555 Changed = visitORR(MI); 556 break; 557 case AArch64::ADDWrr: 558 Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI); 559 break; 560 case AArch64::SUBWrr: 561 Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI); 562 break; 563 case AArch64::ADDXrr: 564 Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI); 565 break; 566 case AArch64::SUBXrr: 567 Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI); 568 break; 569 case AArch64::ADDSWrr: 570 Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri}, 571 {AArch64::SUBWri, AArch64::SUBSWri}, 572 MI); 573 break; 574 case AArch64::SUBSWrr: 575 Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri}, 576 {AArch64::ADDWri, AArch64::ADDSWri}, 577 MI); 578 break; 579 case AArch64::ADDSXrr: 580 Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri}, 581 {AArch64::SUBXri, AArch64::SUBSXri}, 582 MI); 583 break; 584 case AArch64::SUBSXrr: 585 Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri}, 586 {AArch64::ADDXri, AArch64::ADDSXri}, 587 MI); 588 break; 589 } 590 } 591 } 592 593 return Changed; 594 } 595 596 FunctionPass *llvm::createAArch64MIPeepholeOptPass() { 597 return new AArch64MIPeepholeOpt(); 598 } 599