1349cc55cSDimitry Andric //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// 2349cc55cSDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6349cc55cSDimitry Andric // 7349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8349cc55cSDimitry Andric // 9349cc55cSDimitry Andric // This pass performs below peephole optimizations on MIR level. 10349cc55cSDimitry Andric // 11349cc55cSDimitry Andric // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri 12349cc55cSDimitry Andric // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 13349cc55cSDimitry Andric // 1404eeddc0SDimitry Andric // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi 1504eeddc0SDimitry Andric // MOVi64imm + ADDXrr ==> ANDXri + ANDXri 1604eeddc0SDimitry Andric // 1704eeddc0SDimitry Andric // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi 1804eeddc0SDimitry Andric // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 1904eeddc0SDimitry Andric // 20349cc55cSDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions 21349cc55cSDimitry Andric // later. In this case, we could try to split the constant operand of mov 2204eeddc0SDimitry Andric // instruction into two immediates which can be directly encoded into 2304eeddc0SDimitry Andric // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of 2404eeddc0SDimitry Andric // multiple `mov` + `and/add/sub` instructions. 25349cc55cSDimitry Andric // 2604eeddc0SDimitry Andric // 4. Remove redundant ORRWrs which is generated by zero-extend. 27349cc55cSDimitry Andric // 28349cc55cSDimitry Andric // %3:gpr32 = ORRWrs $wzr, %2, 0 29349cc55cSDimitry Andric // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 30349cc55cSDimitry Andric // 31349cc55cSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of 32349cc55cSDimitry Andric // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source 33349cc55cSDimitry Andric // operand are set to zero. 34349cc55cSDimitry Andric // 35*bdd1243dSDimitry Andric // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx 36*bdd1243dSDimitry Andric // ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx 37*bdd1243dSDimitry Andric // 38349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 39349cc55cSDimitry Andric 40349cc55cSDimitry Andric #include "AArch64ExpandImm.h" 41349cc55cSDimitry Andric #include "AArch64InstrInfo.h" 42349cc55cSDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 43349cc55cSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 44349cc55cSDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 45349cc55cSDimitry Andric 46349cc55cSDimitry Andric using namespace llvm; 47349cc55cSDimitry Andric 48349cc55cSDimitry Andric #define DEBUG_TYPE "aarch64-mi-peephole-opt" 49349cc55cSDimitry Andric 50349cc55cSDimitry Andric namespace { 51349cc55cSDimitry Andric 52349cc55cSDimitry Andric struct AArch64MIPeepholeOpt : public MachineFunctionPass { 53349cc55cSDimitry Andric static char ID; 54349cc55cSDimitry Andric 55349cc55cSDimitry Andric AArch64MIPeepholeOpt() : MachineFunctionPass(ID) { 56349cc55cSDimitry Andric initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry()); 57349cc55cSDimitry Andric } 58349cc55cSDimitry Andric 59349cc55cSDimitry Andric const AArch64InstrInfo *TII; 6004eeddc0SDimitry Andric const AArch64RegisterInfo *TRI; 61349cc55cSDimitry Andric MachineLoopInfo *MLI; 62349cc55cSDimitry Andric MachineRegisterInfo *MRI; 63349cc55cSDimitry Andric 6481ad6265SDimitry Andric using OpcodePair = std::pair<unsigned, unsigned>; 65349cc55cSDimitry Andric template <typename T> 6604eeddc0SDimitry Andric using SplitAndOpcFunc = 67*bdd1243dSDimitry Andric std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>; 6804eeddc0SDimitry Andric using BuildMIFunc = 6981ad6265SDimitry Andric std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned, 7081ad6265SDimitry Andric Register, Register, Register)>; 7104eeddc0SDimitry Andric 7204eeddc0SDimitry Andric /// For instructions where an immediate operand could be split into two 7304eeddc0SDimitry Andric /// separate immediate instructions, use the splitTwoPartImm two handle the 7404eeddc0SDimitry Andric /// optimization. 7504eeddc0SDimitry Andric /// 7604eeddc0SDimitry Andric /// To implement, the following function types must be passed to 7704eeddc0SDimitry Andric /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if 7804eeddc0SDimitry Andric /// splitting the immediate is valid and returns the associated new opcode. A 7904eeddc0SDimitry Andric /// BuildMIFunc must be implemented to build the two immediate instructions. 8004eeddc0SDimitry Andric /// 8104eeddc0SDimitry Andric /// Example Pattern (where IMM would require 2+ MOV instructions): 8204eeddc0SDimitry Andric /// %dst = <Instr>rr %src IMM [...] 8304eeddc0SDimitry Andric /// becomes: 8404eeddc0SDimitry Andric /// %tmp = <Instr>ri %src (encode half IMM) [...] 8504eeddc0SDimitry Andric /// %dst = <Instr>ri %tmp (encode half IMM) [...] 8604eeddc0SDimitry Andric template <typename T> 8704eeddc0SDimitry Andric bool splitTwoPartImm(MachineInstr &MI, 8804eeddc0SDimitry Andric SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr); 8904eeddc0SDimitry Andric 9004eeddc0SDimitry Andric bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, 9104eeddc0SDimitry Andric MachineInstr *&SubregToRegMI); 9204eeddc0SDimitry Andric 9304eeddc0SDimitry Andric template <typename T> 9481ad6265SDimitry Andric bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI); 9504eeddc0SDimitry Andric template <typename T> 9681ad6265SDimitry Andric bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI); 9781ad6265SDimitry Andric 9881ad6265SDimitry Andric template <typename T> 9981ad6265SDimitry Andric bool visitAND(unsigned Opc, MachineInstr &MI); 10081ad6265SDimitry Andric bool visitORR(MachineInstr &MI); 101*bdd1243dSDimitry Andric bool visitINSERT(MachineInstr &MI); 102349cc55cSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 103349cc55cSDimitry Andric 104349cc55cSDimitry Andric StringRef getPassName() const override { 105349cc55cSDimitry Andric return "AArch64 MI Peephole Optimization pass"; 106349cc55cSDimitry Andric } 107349cc55cSDimitry Andric 108349cc55cSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 109349cc55cSDimitry Andric AU.setPreservesCFG(); 110349cc55cSDimitry Andric AU.addRequired<MachineLoopInfo>(); 111349cc55cSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 112349cc55cSDimitry Andric } 113349cc55cSDimitry Andric }; 114349cc55cSDimitry Andric 115349cc55cSDimitry Andric char AArch64MIPeepholeOpt::ID = 0; 116349cc55cSDimitry Andric 117349cc55cSDimitry Andric } // end anonymous namespace 118349cc55cSDimitry Andric 119349cc55cSDimitry Andric INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", 120349cc55cSDimitry Andric "AArch64 MI Peephole Optimization", false, false) 121349cc55cSDimitry Andric 122349cc55cSDimitry Andric template <typename T> 123349cc55cSDimitry Andric static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { 124349cc55cSDimitry Andric T UImm = static_cast<T>(Imm); 125349cc55cSDimitry Andric if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) 126349cc55cSDimitry Andric return false; 127349cc55cSDimitry Andric 128349cc55cSDimitry Andric // If this immediate can be handled by one instruction, do not split it. 129349cc55cSDimitry Andric SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 130349cc55cSDimitry Andric AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); 131349cc55cSDimitry Andric if (Insn.size() == 1) 132349cc55cSDimitry Andric return false; 133349cc55cSDimitry Andric 134349cc55cSDimitry Andric // The bitmask immediate consists of consecutive ones. Let's say there is 135349cc55cSDimitry Andric // constant 0b00000000001000000000010000000000 which does not consist of 136349cc55cSDimitry Andric // consecutive ones. We can split it in to two bitmask immediate like 137349cc55cSDimitry Andric // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. 138349cc55cSDimitry Andric // If we do AND with these two bitmask immediate, we can see original one. 139349cc55cSDimitry Andric unsigned LowestBitSet = countTrailingZeros(UImm); 140349cc55cSDimitry Andric unsigned HighestBitSet = Log2_64(UImm); 141349cc55cSDimitry Andric 142349cc55cSDimitry Andric // Create a mask which is filled with one from the position of lowest bit set 143349cc55cSDimitry Andric // to the position of highest bit set. 144349cc55cSDimitry Andric T NewImm1 = (static_cast<T>(2) << HighestBitSet) - 145349cc55cSDimitry Andric (static_cast<T>(1) << LowestBitSet); 146349cc55cSDimitry Andric // Create a mask which is filled with one outside the position of lowest bit 147349cc55cSDimitry Andric // set and the position of highest bit set. 148349cc55cSDimitry Andric T NewImm2 = UImm | ~NewImm1; 149349cc55cSDimitry Andric 150349cc55cSDimitry Andric // If the split value is not valid bitmask immediate, do not split this 151349cc55cSDimitry Andric // constant. 152349cc55cSDimitry Andric if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) 153349cc55cSDimitry Andric return false; 154349cc55cSDimitry Andric 155349cc55cSDimitry Andric Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); 156349cc55cSDimitry Andric Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); 157349cc55cSDimitry Andric return true; 158349cc55cSDimitry Andric } 159349cc55cSDimitry Andric 160349cc55cSDimitry Andric template <typename T> 161349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::visitAND( 16281ad6265SDimitry Andric unsigned Opc, MachineInstr &MI) { 163349cc55cSDimitry Andric // Try below transformation. 164349cc55cSDimitry Andric // 165349cc55cSDimitry Andric // MOVi32imm + ANDWrr ==> ANDWri + ANDWri 166349cc55cSDimitry Andric // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 167349cc55cSDimitry Andric // 168349cc55cSDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions 169349cc55cSDimitry Andric // later. Let's try to split the constant operand of mov instruction into two 170349cc55cSDimitry Andric // bitmask immediates. It makes only two AND instructions intead of multiple 171349cc55cSDimitry Andric // mov + and instructions. 172349cc55cSDimitry Andric 17304eeddc0SDimitry Andric return splitTwoPartImm<T>( 17481ad6265SDimitry Andric MI, 175*bdd1243dSDimitry Andric [Opc](T Imm, unsigned RegSize, T &Imm0, 176*bdd1243dSDimitry Andric T &Imm1) -> std::optional<OpcodePair> { 17704eeddc0SDimitry Andric if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) 17881ad6265SDimitry Andric return std::make_pair(Opc, Opc); 179*bdd1243dSDimitry Andric return std::nullopt; 18004eeddc0SDimitry Andric }, 18181ad6265SDimitry Andric [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 18204eeddc0SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg, 18304eeddc0SDimitry Andric Register NewDstReg) { 184349cc55cSDimitry Andric DebugLoc DL = MI.getDebugLoc(); 18504eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 18681ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 187349cc55cSDimitry Andric .addReg(SrcReg) 18804eeddc0SDimitry Andric .addImm(Imm0); 18981ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 190349cc55cSDimitry Andric .addReg(NewTmpReg) 19104eeddc0SDimitry Andric .addImm(Imm1); 19204eeddc0SDimitry Andric }); 193349cc55cSDimitry Andric } 194349cc55cSDimitry Andric 19581ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { 196349cc55cSDimitry Andric // Check this ORR comes from below zero-extend pattern. 197349cc55cSDimitry Andric // 198349cc55cSDimitry Andric // def : Pat<(i64 (zext GPR32:$src)), 199349cc55cSDimitry Andric // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 200349cc55cSDimitry Andric if (MI.getOperand(3).getImm() != 0) 201349cc55cSDimitry Andric return false; 202349cc55cSDimitry Andric 203349cc55cSDimitry Andric if (MI.getOperand(1).getReg() != AArch64::WZR) 204349cc55cSDimitry Andric return false; 205349cc55cSDimitry Andric 206349cc55cSDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 207349cc55cSDimitry Andric if (!SrcMI) 208349cc55cSDimitry Andric return false; 209349cc55cSDimitry Andric 210349cc55cSDimitry Andric // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 211349cc55cSDimitry Andric // 212349cc55cSDimitry Andric // When you use the 32-bit form of an instruction, the upper 32 bits of the 213349cc55cSDimitry Andric // source registers are ignored and the upper 32 bits of the destination 214349cc55cSDimitry Andric // register are set to zero. 215349cc55cSDimitry Andric // 216349cc55cSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of 217349cc55cSDimitry Andric // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 218349cc55cSDimitry Andric // real AArch64 instruction and if it is not, do not process the opcode 219349cc55cSDimitry Andric // conservatively. 22081ad6265SDimitry Andric if (SrcMI->getOpcode() == TargetOpcode::COPY && 22181ad6265SDimitry Andric SrcMI->getOperand(1).getReg().isVirtual()) { 22281ad6265SDimitry Andric const TargetRegisterClass *RC = 22381ad6265SDimitry Andric MRI->getRegClass(SrcMI->getOperand(1).getReg()); 22481ad6265SDimitry Andric 22581ad6265SDimitry Andric // A COPY from an FPR will become a FMOVSWr, so do so now so that we know 22681ad6265SDimitry Andric // that the upper bits are zero. 22781ad6265SDimitry Andric if (RC != &AArch64::FPR32RegClass && 22881ad6265SDimitry Andric ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) || 22981ad6265SDimitry Andric SrcMI->getOperand(1).getSubReg() != AArch64::ssub)) 23081ad6265SDimitry Andric return false; 23181ad6265SDimitry Andric Register CpySrc = SrcMI->getOperand(1).getReg(); 23281ad6265SDimitry Andric if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) { 23381ad6265SDimitry Andric CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass); 23481ad6265SDimitry Andric BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), 23581ad6265SDimitry Andric TII->get(TargetOpcode::COPY), CpySrc) 23681ad6265SDimitry Andric .add(SrcMI->getOperand(1)); 23781ad6265SDimitry Andric } 23881ad6265SDimitry Andric BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), 23981ad6265SDimitry Andric TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg()) 24081ad6265SDimitry Andric .addReg(CpySrc); 24181ad6265SDimitry Andric SrcMI->eraseFromParent(); 24281ad6265SDimitry Andric } 24381ad6265SDimitry Andric else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) 244349cc55cSDimitry Andric return false; 245349cc55cSDimitry Andric 246349cc55cSDimitry Andric Register DefReg = MI.getOperand(0).getReg(); 247349cc55cSDimitry Andric Register SrcReg = MI.getOperand(2).getReg(); 248349cc55cSDimitry Andric MRI->replaceRegWith(DefReg, SrcReg); 249349cc55cSDimitry Andric MRI->clearKillFlags(SrcReg); 25004eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n"); 25181ad6265SDimitry Andric MI.eraseFromParent(); 25204eeddc0SDimitry Andric 25304eeddc0SDimitry Andric return true; 25404eeddc0SDimitry Andric } 25504eeddc0SDimitry Andric 256*bdd1243dSDimitry Andric bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { 257*bdd1243dSDimitry Andric // Check this INSERT_SUBREG comes from below zero-extend pattern. 258*bdd1243dSDimitry Andric // 259*bdd1243dSDimitry Andric // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx 260*bdd1243dSDimitry Andric // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx 261*bdd1243dSDimitry Andric // 262*bdd1243dSDimitry Andric // We're assuming the first operand to INSERT_SUBREG is irrelevant because a 263*bdd1243dSDimitry Andric // COPY would destroy the upper part of the register anyway 264*bdd1243dSDimitry Andric if (!MI.isRegTiedToDefOperand(1)) 265*bdd1243dSDimitry Andric return false; 266*bdd1243dSDimitry Andric 267*bdd1243dSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 268*bdd1243dSDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(DstReg); 269*bdd1243dSDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 270*bdd1243dSDimitry Andric if (!SrcMI) 271*bdd1243dSDimitry Andric return false; 272*bdd1243dSDimitry Andric 273*bdd1243dSDimitry Andric // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 274*bdd1243dSDimitry Andric // 275*bdd1243dSDimitry Andric // When you use the 32-bit form of an instruction, the upper 32 bits of the 276*bdd1243dSDimitry Andric // source registers are ignored and the upper 32 bits of the destination 277*bdd1243dSDimitry Andric // register are set to zero. 278*bdd1243dSDimitry Andric // 279*bdd1243dSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of 280*bdd1243dSDimitry Andric // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 281*bdd1243dSDimitry Andric // real AArch64 instruction and if it is not, do not process the opcode 282*bdd1243dSDimitry Andric // conservatively. 283*bdd1243dSDimitry Andric if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) || 284*bdd1243dSDimitry Andric !AArch64::GPR64allRegClass.hasSubClassEq(RC)) 285*bdd1243dSDimitry Andric return false; 286*bdd1243dSDimitry Andric 287*bdd1243dSDimitry Andric // Build a SUBREG_TO_REG instruction 288*bdd1243dSDimitry Andric MachineInstr *SubregMI = 289*bdd1243dSDimitry Andric BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), 290*bdd1243dSDimitry Andric TII->get(TargetOpcode::SUBREG_TO_REG), DstReg) 291*bdd1243dSDimitry Andric .addImm(0) 292*bdd1243dSDimitry Andric .add(MI.getOperand(2)) 293*bdd1243dSDimitry Andric .add(MI.getOperand(3)); 294*bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n"); 295*bdd1243dSDimitry Andric (void)SubregMI; 296*bdd1243dSDimitry Andric MI.eraseFromParent(); 297*bdd1243dSDimitry Andric 298*bdd1243dSDimitry Andric return true; 299*bdd1243dSDimitry Andric } 300*bdd1243dSDimitry Andric 30104eeddc0SDimitry Andric template <typename T> 30204eeddc0SDimitry Andric static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { 30304eeddc0SDimitry Andric // The immediate must be in the form of ((imm0 << 12) + imm1), in which both 30404eeddc0SDimitry Andric // imm0 and imm1 are non-zero 12-bit unsigned int. 30504eeddc0SDimitry Andric if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 || 30604eeddc0SDimitry Andric (Imm & ~static_cast<T>(0xffffff)) != 0) 30704eeddc0SDimitry Andric return false; 30804eeddc0SDimitry Andric 30904eeddc0SDimitry Andric // The immediate can not be composed via a single instruction. 31004eeddc0SDimitry Andric SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 31104eeddc0SDimitry Andric AArch64_IMM::expandMOVImm(Imm, RegSize, Insn); 31204eeddc0SDimitry Andric if (Insn.size() == 1) 31304eeddc0SDimitry Andric return false; 31404eeddc0SDimitry Andric 31504eeddc0SDimitry Andric // Split Imm into (Imm0 << 12) + Imm1; 31604eeddc0SDimitry Andric Imm0 = (Imm >> 12) & 0xfff; 31704eeddc0SDimitry Andric Imm1 = Imm & 0xfff; 31804eeddc0SDimitry Andric return true; 31904eeddc0SDimitry Andric } 32004eeddc0SDimitry Andric 32104eeddc0SDimitry Andric template <typename T> 32204eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSUB( 32381ad6265SDimitry Andric unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) { 32404eeddc0SDimitry Andric // Try below transformation. 32504eeddc0SDimitry Andric // 32604eeddc0SDimitry Andric // MOVi32imm + ADDWrr ==> ADDWri + ADDWri 32704eeddc0SDimitry Andric // MOVi64imm + ADDXrr ==> ADDXri + ADDXri 32804eeddc0SDimitry Andric // 32904eeddc0SDimitry Andric // MOVi32imm + SUBWrr ==> SUBWri + SUBWri 33004eeddc0SDimitry Andric // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 33104eeddc0SDimitry Andric // 33204eeddc0SDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions 33304eeddc0SDimitry Andric // later. Let's try to split the constant operand of mov instruction into two 33404eeddc0SDimitry Andric // legal add/sub immediates. It makes only two ADD/SUB instructions intead of 33504eeddc0SDimitry Andric // multiple `mov` + `and/sub` instructions. 33604eeddc0SDimitry Andric 33704eeddc0SDimitry Andric return splitTwoPartImm<T>( 33881ad6265SDimitry Andric MI, 33904eeddc0SDimitry Andric [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, 340*bdd1243dSDimitry Andric T &Imm1) -> std::optional<OpcodePair> { 34104eeddc0SDimitry Andric if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 34281ad6265SDimitry Andric return std::make_pair(PosOpc, PosOpc); 34304eeddc0SDimitry Andric if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 34481ad6265SDimitry Andric return std::make_pair(NegOpc, NegOpc); 345*bdd1243dSDimitry Andric return std::nullopt; 34604eeddc0SDimitry Andric }, 34781ad6265SDimitry Andric [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 34804eeddc0SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg, 34904eeddc0SDimitry Andric Register NewDstReg) { 35004eeddc0SDimitry Andric DebugLoc DL = MI.getDebugLoc(); 35104eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 35281ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 35304eeddc0SDimitry Andric .addReg(SrcReg) 35404eeddc0SDimitry Andric .addImm(Imm0) 35504eeddc0SDimitry Andric .addImm(12); 35681ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 35781ad6265SDimitry Andric .addReg(NewTmpReg) 35881ad6265SDimitry Andric .addImm(Imm1) 35981ad6265SDimitry Andric .addImm(0); 36081ad6265SDimitry Andric }); 36181ad6265SDimitry Andric } 36281ad6265SDimitry Andric 36381ad6265SDimitry Andric template <typename T> 36481ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSSUBS( 36581ad6265SDimitry Andric OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) { 36681ad6265SDimitry Andric // Try the same transformation as ADDSUB but with additional requirement 36781ad6265SDimitry Andric // that the condition code usages are only for Equal and Not Equal 36881ad6265SDimitry Andric return splitTwoPartImm<T>( 36981ad6265SDimitry Andric MI, 370*bdd1243dSDimitry Andric [PosOpcs, NegOpcs, &MI, &TRI = TRI, 371*bdd1243dSDimitry Andric &MRI = MRI](T Imm, unsigned RegSize, T &Imm0, 372*bdd1243dSDimitry Andric T &Imm1) -> std::optional<OpcodePair> { 37381ad6265SDimitry Andric OpcodePair OP; 37481ad6265SDimitry Andric if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 37581ad6265SDimitry Andric OP = PosOpcs; 37681ad6265SDimitry Andric else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 37781ad6265SDimitry Andric OP = NegOpcs; 37881ad6265SDimitry Andric else 379*bdd1243dSDimitry Andric return std::nullopt; 38081ad6265SDimitry Andric // Check conditional uses last since it is expensive for scanning 38181ad6265SDimitry Andric // proceeding instructions 38281ad6265SDimitry Andric MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); 383*bdd1243dSDimitry Andric std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI); 38481ad6265SDimitry Andric if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V) 385*bdd1243dSDimitry Andric return std::nullopt; 38681ad6265SDimitry Andric return OP; 38781ad6265SDimitry Andric }, 38881ad6265SDimitry Andric [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 38981ad6265SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg, 39081ad6265SDimitry Andric Register NewDstReg) { 39181ad6265SDimitry Andric DebugLoc DL = MI.getDebugLoc(); 39281ad6265SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 39381ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 39481ad6265SDimitry Andric .addReg(SrcReg) 39581ad6265SDimitry Andric .addImm(Imm0) 39681ad6265SDimitry Andric .addImm(12); 39781ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 39804eeddc0SDimitry Andric .addReg(NewTmpReg) 39904eeddc0SDimitry Andric .addImm(Imm1) 40004eeddc0SDimitry Andric .addImm(0); 40104eeddc0SDimitry Andric }); 40204eeddc0SDimitry Andric } 40304eeddc0SDimitry Andric 40404eeddc0SDimitry Andric // Checks if the corresponding MOV immediate instruction is applicable for 40504eeddc0SDimitry Andric // this peephole optimization. 40604eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, 40704eeddc0SDimitry Andric MachineInstr *&MovMI, 40804eeddc0SDimitry Andric MachineInstr *&SubregToRegMI) { 40904eeddc0SDimitry Andric // Check whether current MBB is in loop and the AND is loop invariant. 41004eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 41104eeddc0SDimitry Andric MachineLoop *L = MLI->getLoopFor(MBB); 41204eeddc0SDimitry Andric if (L && !L->isLoopInvariant(MI)) 41304eeddc0SDimitry Andric return false; 41404eeddc0SDimitry Andric 41504eeddc0SDimitry Andric // Check whether current MI's operand is MOV with immediate. 41604eeddc0SDimitry Andric MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 41704eeddc0SDimitry Andric if (!MovMI) 41804eeddc0SDimitry Andric return false; 41904eeddc0SDimitry Andric 42004eeddc0SDimitry Andric // If it is SUBREG_TO_REG, check its operand. 42104eeddc0SDimitry Andric SubregToRegMI = nullptr; 42204eeddc0SDimitry Andric if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { 42304eeddc0SDimitry Andric SubregToRegMI = MovMI; 42404eeddc0SDimitry Andric MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); 42504eeddc0SDimitry Andric if (!MovMI) 42604eeddc0SDimitry Andric return false; 42704eeddc0SDimitry Andric } 42804eeddc0SDimitry Andric 42904eeddc0SDimitry Andric if (MovMI->getOpcode() != AArch64::MOVi32imm && 43004eeddc0SDimitry Andric MovMI->getOpcode() != AArch64::MOVi64imm) 43104eeddc0SDimitry Andric return false; 43204eeddc0SDimitry Andric 43304eeddc0SDimitry Andric // If the MOV has multiple uses, do not split the immediate because it causes 43404eeddc0SDimitry Andric // more instructions. 43504eeddc0SDimitry Andric if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) 43604eeddc0SDimitry Andric return false; 43704eeddc0SDimitry Andric if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) 43804eeddc0SDimitry Andric return false; 43904eeddc0SDimitry Andric 44004eeddc0SDimitry Andric // It is OK to perform this peephole optimization. 44104eeddc0SDimitry Andric return true; 44204eeddc0SDimitry Andric } 44304eeddc0SDimitry Andric 44404eeddc0SDimitry Andric template <typename T> 44504eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::splitTwoPartImm( 44681ad6265SDimitry Andric MachineInstr &MI, 44704eeddc0SDimitry Andric SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) { 44804eeddc0SDimitry Andric unsigned RegSize = sizeof(T) * 8; 44904eeddc0SDimitry Andric assert((RegSize == 32 || RegSize == 64) && 45004eeddc0SDimitry Andric "Invalid RegSize for legal immediate peephole optimization"); 45104eeddc0SDimitry Andric 45204eeddc0SDimitry Andric // Perform several essential checks against current MI. 45304eeddc0SDimitry Andric MachineInstr *MovMI, *SubregToRegMI; 45404eeddc0SDimitry Andric if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) 45504eeddc0SDimitry Andric return false; 45604eeddc0SDimitry Andric 45704eeddc0SDimitry Andric // Split the immediate to Imm0 and Imm1, and calculate the Opcode. 45804eeddc0SDimitry Andric T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1; 45904eeddc0SDimitry Andric // For the 32 bit form of instruction, the upper 32 bits of the destination 46004eeddc0SDimitry Andric // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits 46104eeddc0SDimitry Andric // of Imm to zero. This is essential if the Immediate value was a negative 46204eeddc0SDimitry Andric // number since it was sign extended when we assign to the 64-bit Imm. 46304eeddc0SDimitry Andric if (SubregToRegMI) 46404eeddc0SDimitry Andric Imm &= 0xFFFFFFFF; 46581ad6265SDimitry Andric OpcodePair Opcode; 46604eeddc0SDimitry Andric if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) 46781ad6265SDimitry Andric Opcode = *R; 46804eeddc0SDimitry Andric else 46904eeddc0SDimitry Andric return false; 47004eeddc0SDimitry Andric 47181ad6265SDimitry Andric // Create new MIs using the first and second opcodes. Opcodes might differ for 47281ad6265SDimitry Andric // flag setting operations that should only set flags on second instruction. 47381ad6265SDimitry Andric // NewTmpReg = Opcode.first SrcReg Imm0 47481ad6265SDimitry Andric // NewDstReg = Opcode.second NewTmpReg Imm1 47581ad6265SDimitry Andric 47681ad6265SDimitry Andric // Determine register classes for destinations and register operands 47704eeddc0SDimitry Andric MachineFunction *MF = MI.getMF(); 47881ad6265SDimitry Andric const TargetRegisterClass *FirstInstrDstRC = 47981ad6265SDimitry Andric TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF); 48081ad6265SDimitry Andric const TargetRegisterClass *FirstInstrOperandRC = 48181ad6265SDimitry Andric TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF); 48281ad6265SDimitry Andric const TargetRegisterClass *SecondInstrDstRC = 48381ad6265SDimitry Andric (Opcode.first == Opcode.second) 48481ad6265SDimitry Andric ? FirstInstrDstRC 48581ad6265SDimitry Andric : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF); 48681ad6265SDimitry Andric const TargetRegisterClass *SecondInstrOperandRC = 48781ad6265SDimitry Andric (Opcode.first == Opcode.second) 48881ad6265SDimitry Andric ? FirstInstrOperandRC 48981ad6265SDimitry Andric : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF); 49081ad6265SDimitry Andric 49181ad6265SDimitry Andric // Get old registers destinations and new register destinations 49204eeddc0SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 49304eeddc0SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 49481ad6265SDimitry Andric Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC); 49581ad6265SDimitry Andric // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to 49681ad6265SDimitry Andric // reuse that same destination register. 49781ad6265SDimitry Andric Register NewDstReg = DstReg.isVirtual() 49881ad6265SDimitry Andric ? MRI->createVirtualRegister(SecondInstrDstRC) 49981ad6265SDimitry Andric : DstReg; 50004eeddc0SDimitry Andric 50181ad6265SDimitry Andric // Constrain registers based on their new uses 50281ad6265SDimitry Andric MRI->constrainRegClass(SrcReg, FirstInstrOperandRC); 50381ad6265SDimitry Andric MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC); 50481ad6265SDimitry Andric if (DstReg != NewDstReg) 50504eeddc0SDimitry Andric MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); 50604eeddc0SDimitry Andric 50781ad6265SDimitry Andric // Call the delegating operation to build the instruction 50804eeddc0SDimitry Andric BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg); 50904eeddc0SDimitry Andric 51004eeddc0SDimitry Andric // replaceRegWith changes MI's definition register. Keep it for SSA form until 51181ad6265SDimitry Andric // deleting MI. Only if we made a new destination register. 51281ad6265SDimitry Andric if (DstReg != NewDstReg) { 51381ad6265SDimitry Andric MRI->replaceRegWith(DstReg, NewDstReg); 51404eeddc0SDimitry Andric MI.getOperand(0).setReg(DstReg); 51581ad6265SDimitry Andric } 51604eeddc0SDimitry Andric 51704eeddc0SDimitry Andric // Record the MIs need to be removed. 51881ad6265SDimitry Andric MI.eraseFromParent(); 51904eeddc0SDimitry Andric if (SubregToRegMI) 52081ad6265SDimitry Andric SubregToRegMI->eraseFromParent(); 52181ad6265SDimitry Andric MovMI->eraseFromParent(); 522349cc55cSDimitry Andric 523349cc55cSDimitry Andric return true; 524349cc55cSDimitry Andric } 525349cc55cSDimitry Andric 526349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { 527349cc55cSDimitry Andric if (skipFunction(MF.getFunction())) 528349cc55cSDimitry Andric return false; 529349cc55cSDimitry Andric 530349cc55cSDimitry Andric TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 53104eeddc0SDimitry Andric TRI = static_cast<const AArch64RegisterInfo *>( 53204eeddc0SDimitry Andric MF.getSubtarget().getRegisterInfo()); 533349cc55cSDimitry Andric MLI = &getAnalysis<MachineLoopInfo>(); 534349cc55cSDimitry Andric MRI = &MF.getRegInfo(); 535349cc55cSDimitry Andric 53604eeddc0SDimitry Andric assert(MRI->isSSA() && "Expected to be run on SSA form!"); 537349cc55cSDimitry Andric 538349cc55cSDimitry Andric bool Changed = false; 539349cc55cSDimitry Andric 540349cc55cSDimitry Andric for (MachineBasicBlock &MBB : MF) { 54181ad6265SDimitry Andric for (MachineInstr &MI : make_early_inc_range(MBB)) { 542349cc55cSDimitry Andric switch (MI.getOpcode()) { 543349cc55cSDimitry Andric default: 544349cc55cSDimitry Andric break; 545*bdd1243dSDimitry Andric case AArch64::INSERT_SUBREG: 546*bdd1243dSDimitry Andric Changed = visitINSERT(MI); 547*bdd1243dSDimitry Andric break; 548349cc55cSDimitry Andric case AArch64::ANDWrr: 54981ad6265SDimitry Andric Changed = visitAND<uint32_t>(AArch64::ANDWri, MI); 550349cc55cSDimitry Andric break; 551349cc55cSDimitry Andric case AArch64::ANDXrr: 55281ad6265SDimitry Andric Changed = visitAND<uint64_t>(AArch64::ANDXri, MI); 553349cc55cSDimitry Andric break; 554349cc55cSDimitry Andric case AArch64::ORRWrs: 55581ad6265SDimitry Andric Changed = visitORR(MI); 55604eeddc0SDimitry Andric break; 55704eeddc0SDimitry Andric case AArch64::ADDWrr: 55881ad6265SDimitry Andric Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI); 55904eeddc0SDimitry Andric break; 56004eeddc0SDimitry Andric case AArch64::SUBWrr: 56181ad6265SDimitry Andric Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI); 56204eeddc0SDimitry Andric break; 56304eeddc0SDimitry Andric case AArch64::ADDXrr: 56481ad6265SDimitry Andric Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI); 56504eeddc0SDimitry Andric break; 56604eeddc0SDimitry Andric case AArch64::SUBXrr: 56781ad6265SDimitry Andric Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI); 56881ad6265SDimitry Andric break; 56981ad6265SDimitry Andric case AArch64::ADDSWrr: 57081ad6265SDimitry Andric Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri}, 57181ad6265SDimitry Andric {AArch64::SUBWri, AArch64::SUBSWri}, 57281ad6265SDimitry Andric MI); 57381ad6265SDimitry Andric break; 57481ad6265SDimitry Andric case AArch64::SUBSWrr: 57581ad6265SDimitry Andric Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri}, 57681ad6265SDimitry Andric {AArch64::ADDWri, AArch64::ADDSWri}, 57781ad6265SDimitry Andric MI); 57881ad6265SDimitry Andric break; 57981ad6265SDimitry Andric case AArch64::ADDSXrr: 58081ad6265SDimitry Andric Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri}, 58181ad6265SDimitry Andric {AArch64::SUBXri, AArch64::SUBSXri}, 58281ad6265SDimitry Andric MI); 58381ad6265SDimitry Andric break; 58481ad6265SDimitry Andric case AArch64::SUBSXrr: 58581ad6265SDimitry Andric Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri}, 58681ad6265SDimitry Andric {AArch64::ADDXri, AArch64::ADDSXri}, 58781ad6265SDimitry Andric MI); 58804eeddc0SDimitry Andric break; 589349cc55cSDimitry Andric } 590349cc55cSDimitry Andric } 591349cc55cSDimitry Andric } 592349cc55cSDimitry Andric 593349cc55cSDimitry Andric return Changed; 594349cc55cSDimitry Andric } 595349cc55cSDimitry Andric 596349cc55cSDimitry Andric FunctionPass *llvm::createAArch64MIPeepholeOptPass() { 597349cc55cSDimitry Andric return new AArch64MIPeepholeOpt(); 598349cc55cSDimitry Andric } 599