1 //=- AArch64RedundantCopyElimination.cpp - Remove useless copy for AArch64 -=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 // This pass removes unnecessary copies/moves in BBs based on a dominating 8 // condition. 9 // 10 // We handle three cases: 11 // 1. For BBs that are targets of CBZ/CBNZ instructions, we know the value of 12 // the CBZ/CBNZ source register is zero on the taken/not-taken path. For 13 // instance, the copy instruction in the code below can be removed because 14 // the CBZW jumps to %bb.2 when w0 is zero. 15 // 16 // %bb.1: 17 // cbz w0, .LBB0_2 18 // .LBB0_2: 19 // mov w0, wzr ; <-- redundant 20 // 21 // 2. If the flag setting instruction defines a register other than WZR/XZR, we 22 // can remove a zero copy in some cases. 23 // 24 // %bb.0: 25 // subs w0, w1, w2 26 // str w0, [x1] 27 // b.ne .LBB0_2 28 // %bb.1: 29 // mov w0, wzr ; <-- redundant 30 // str w0, [x2] 31 // .LBB0_2 32 // 33 // 3. Finally, if the flag setting instruction is a comparison against a 34 // constant (i.e., ADDS[W|X]ri, SUBS[W|X]ri), we can remove a mov immediate 35 // in some cases. 36 // 37 // %bb.0: 38 // subs xzr, x0, #1 39 // b.eq .LBB0_1 40 // .LBB0_1: 41 // orr x0, xzr, #0x1 ; <-- redundant 42 // 43 // This pass should be run after register allocation. 44 // 45 // FIXME: This could also be extended to check the whole dominance subtree below 46 // the comparison if the compile time regression is acceptable. 47 // 48 // FIXME: Add support for handling CCMP instructions. 49 // FIXME: If the known register value is zero, we should be able to rewrite uses 50 // to use WZR/XZR directly in some cases. 51 //===----------------------------------------------------------------------===// 52 #include "AArch64.h" 53 #include "llvm/ADT/Optional.h" 54 #include "llvm/ADT/SetVector.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/ADT/iterator_range.h" 57 #include "llvm/CodeGen/LiveRegUnits.h" 58 #include "llvm/CodeGen/MachineFunctionPass.h" 59 #include "llvm/CodeGen/MachineRegisterInfo.h" 60 #include "llvm/Support/Debug.h" 61 62 using namespace llvm; 63 64 #define DEBUG_TYPE "aarch64-copyelim" 65 66 STATISTIC(NumCopiesRemoved, "Number of copies removed."); 67 68 namespace { 69 class AArch64RedundantCopyElimination : public MachineFunctionPass { 70 const MachineRegisterInfo *MRI; 71 const TargetRegisterInfo *TRI; 72 73 // DomBBClobberedRegs is used when computing known values in the dominating 74 // BB. 75 LiveRegUnits DomBBClobberedRegs, DomBBUsedRegs; 76 77 // OptBBClobberedRegs is used when optimizing away redundant copies/moves. 78 LiveRegUnits OptBBClobberedRegs, OptBBUsedRegs; 79 80 public: 81 static char ID; 82 AArch64RedundantCopyElimination() : MachineFunctionPass(ID) { 83 initializeAArch64RedundantCopyEliminationPass( 84 *PassRegistry::getPassRegistry()); 85 } 86 87 struct RegImm { 88 MCPhysReg Reg; 89 int32_t Imm; 90 RegImm(MCPhysReg Reg, int32_t Imm) : Reg(Reg), Imm(Imm) {} 91 }; 92 93 bool knownRegValInBlock(MachineInstr &CondBr, MachineBasicBlock *MBB, 94 SmallVectorImpl<RegImm> &KnownRegs, 95 MachineBasicBlock::iterator &FirstUse); 96 bool optimizeBlock(MachineBasicBlock *MBB); 97 bool runOnMachineFunction(MachineFunction &MF) override; 98 MachineFunctionProperties getRequiredProperties() const override { 99 return MachineFunctionProperties().set( 100 MachineFunctionProperties::Property::NoVRegs); 101 } 102 StringRef getPassName() const override { 103 return "AArch64 Redundant Copy Elimination"; 104 } 105 }; 106 char AArch64RedundantCopyElimination::ID = 0; 107 } 108 109 INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim", 110 "AArch64 redundant copy elimination pass", false, false) 111 112 /// It's possible to determine the value of a register based on a dominating 113 /// condition. To do so, this function checks to see if the basic block \p MBB 114 /// is the target of a conditional branch \p CondBr with an equality comparison. 115 /// If the branch is a CBZ/CBNZ, we know the value of its source operand is zero 116 /// in \p MBB for some cases. Otherwise, we find and inspect the NZCV setting 117 /// instruction (e.g., SUBS, ADDS). If this instruction defines a register 118 /// other than WZR/XZR, we know the value of the destination register is zero in 119 /// \p MMB for some cases. In addition, if the NZCV setting instruction is 120 /// comparing against a constant we know the other source register is equal to 121 /// the constant in \p MBB for some cases. If we find any constant values, push 122 /// a physical register and constant value pair onto the KnownRegs vector and 123 /// return true. Otherwise, return false if no known values were found. 124 bool AArch64RedundantCopyElimination::knownRegValInBlock( 125 MachineInstr &CondBr, MachineBasicBlock *MBB, 126 SmallVectorImpl<RegImm> &KnownRegs, MachineBasicBlock::iterator &FirstUse) { 127 unsigned Opc = CondBr.getOpcode(); 128 129 // Check if the current basic block is the target block to which the 130 // CBZ/CBNZ instruction jumps when its Wt/Xt is zero. 131 if (((Opc == AArch64::CBZW || Opc == AArch64::CBZX) && 132 MBB == CondBr.getOperand(1).getMBB()) || 133 ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) && 134 MBB != CondBr.getOperand(1).getMBB())) { 135 FirstUse = CondBr; 136 KnownRegs.push_back(RegImm(CondBr.getOperand(0).getReg(), 0)); 137 return true; 138 } 139 140 // Otherwise, must be a conditional branch. 141 if (Opc != AArch64::Bcc) 142 return false; 143 144 // Must be an equality check (i.e., == or !=). 145 AArch64CC::CondCode CC = (AArch64CC::CondCode)CondBr.getOperand(0).getImm(); 146 if (CC != AArch64CC::EQ && CC != AArch64CC::NE) 147 return false; 148 149 MachineBasicBlock *BrTarget = CondBr.getOperand(1).getMBB(); 150 if ((CC == AArch64CC::EQ && BrTarget != MBB) || 151 (CC == AArch64CC::NE && BrTarget == MBB)) 152 return false; 153 154 // Stop if we get to the beginning of PredMBB. 155 MachineBasicBlock *PredMBB = *MBB->pred_begin(); 156 assert(PredMBB == CondBr.getParent() && 157 "Conditional branch not in predecessor block!"); 158 if (CondBr == PredMBB->begin()) 159 return false; 160 161 // Registers clobbered in PredMBB between CondBr instruction and current 162 // instruction being checked in loop. 163 DomBBClobberedRegs.clear(); 164 DomBBUsedRegs.clear(); 165 166 // Find compare instruction that sets NZCV used by CondBr. 167 MachineBasicBlock::reverse_iterator RIt = CondBr.getReverseIterator(); 168 for (MachineInstr &PredI : make_range(std::next(RIt), PredMBB->rend())) { 169 170 bool IsCMN = false; 171 switch (PredI.getOpcode()) { 172 default: 173 break; 174 175 // CMN is an alias for ADDS with a dead destination register. 176 case AArch64::ADDSWri: 177 case AArch64::ADDSXri: 178 IsCMN = true; 179 LLVM_FALLTHROUGH; 180 // CMP is an alias for SUBS with a dead destination register. 181 case AArch64::SUBSWri: 182 case AArch64::SUBSXri: { 183 // Sometimes the first operand is a FrameIndex. Bail if tht happens. 184 if (!PredI.getOperand(1).isReg()) 185 return false; 186 MCPhysReg DstReg = PredI.getOperand(0).getReg(); 187 MCPhysReg SrcReg = PredI.getOperand(1).getReg(); 188 189 bool Res = false; 190 // If we're comparing against a non-symbolic immediate and the source 191 // register of the compare is not modified (including a self-clobbering 192 // compare) between the compare and conditional branch we known the value 193 // of the 1st source operand. 194 if (PredI.getOperand(2).isImm() && DomBBClobberedRegs.available(SrcReg) && 195 SrcReg != DstReg) { 196 // We've found the instruction that sets NZCV. 197 int32_t KnownImm = PredI.getOperand(2).getImm(); 198 int32_t Shift = PredI.getOperand(3).getImm(); 199 KnownImm <<= Shift; 200 if (IsCMN) 201 KnownImm = -KnownImm; 202 FirstUse = PredI; 203 KnownRegs.push_back(RegImm(SrcReg, KnownImm)); 204 Res = true; 205 } 206 207 // If this instructions defines something other than WZR/XZR, we know it's 208 // result is zero in some cases. 209 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) 210 return Res; 211 212 // The destination register must not be modified between the NZCV setting 213 // instruction and the conditional branch. 214 if (!DomBBClobberedRegs.available(DstReg)) 215 return Res; 216 217 FirstUse = PredI; 218 KnownRegs.push_back(RegImm(DstReg, 0)); 219 return true; 220 } 221 222 // Look for NZCV setting instructions that define something other than 223 // WZR/XZR. 224 case AArch64::ADCSWr: 225 case AArch64::ADCSXr: 226 case AArch64::ADDSWrr: 227 case AArch64::ADDSWrs: 228 case AArch64::ADDSWrx: 229 case AArch64::ADDSXrr: 230 case AArch64::ADDSXrs: 231 case AArch64::ADDSXrx: 232 case AArch64::ADDSXrx64: 233 case AArch64::ANDSWri: 234 case AArch64::ANDSWrr: 235 case AArch64::ANDSWrs: 236 case AArch64::ANDSXri: 237 case AArch64::ANDSXrr: 238 case AArch64::ANDSXrs: 239 case AArch64::BICSWrr: 240 case AArch64::BICSWrs: 241 case AArch64::BICSXrs: 242 case AArch64::BICSXrr: 243 case AArch64::SBCSWr: 244 case AArch64::SBCSXr: 245 case AArch64::SUBSWrr: 246 case AArch64::SUBSWrs: 247 case AArch64::SUBSWrx: 248 case AArch64::SUBSXrr: 249 case AArch64::SUBSXrs: 250 case AArch64::SUBSXrx: 251 case AArch64::SUBSXrx64: { 252 MCPhysReg DstReg = PredI.getOperand(0).getReg(); 253 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) 254 return false; 255 256 // The destination register of the NZCV setting instruction must not be 257 // modified before the conditional branch. 258 if (!DomBBClobberedRegs.available(DstReg)) 259 return false; 260 261 // We've found the instruction that sets NZCV whose DstReg == 0. 262 FirstUse = PredI; 263 KnownRegs.push_back(RegImm(DstReg, 0)); 264 return true; 265 } 266 } 267 268 // Bail if we see an instruction that defines NZCV that we don't handle. 269 if (PredI.definesRegister(AArch64::NZCV)) 270 return false; 271 272 // Track clobbered and used registers. 273 LiveRegUnits::accumulateUsedDefed(PredI, DomBBClobberedRegs, DomBBUsedRegs, 274 TRI); 275 } 276 return false; 277 } 278 279 bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) { 280 // Check if the current basic block has a single predecessor. 281 if (MBB->pred_size() != 1) 282 return false; 283 284 // Check if the predecessor has two successors, implying the block ends in a 285 // conditional branch. 286 MachineBasicBlock *PredMBB = *MBB->pred_begin(); 287 if (PredMBB->succ_size() != 2) 288 return false; 289 290 MachineBasicBlock::iterator CondBr = PredMBB->getLastNonDebugInstr(); 291 if (CondBr == PredMBB->end()) 292 return false; 293 294 // Keep track of the earliest point in the PredMBB block where kill markers 295 // need to be removed if a COPY is removed. 296 MachineBasicBlock::iterator FirstUse; 297 // After calling knownRegValInBlock, FirstUse will either point to a CBZ/CBNZ 298 // or a compare (i.e., SUBS). In the latter case, we must take care when 299 // updating FirstUse when scanning for COPY instructions. In particular, if 300 // there's a COPY in between the compare and branch the COPY should not 301 // update FirstUse. 302 bool SeenFirstUse = false; 303 // Registers that contain a known value at the start of MBB. 304 SmallVector<RegImm, 4> KnownRegs; 305 306 MachineBasicBlock::iterator Itr = std::next(CondBr); 307 do { 308 --Itr; 309 310 if (!knownRegValInBlock(*Itr, MBB, KnownRegs, FirstUse)) 311 continue; 312 313 // Reset the clobbered and used register units. 314 OptBBClobberedRegs.clear(); 315 OptBBUsedRegs.clear(); 316 317 // Look backward in PredMBB for COPYs from the known reg to find other 318 // registers that are known to be a constant value. 319 for (auto PredI = Itr;; --PredI) { 320 if (FirstUse == PredI) 321 SeenFirstUse = true; 322 323 if (PredI->isCopy()) { 324 MCPhysReg CopyDstReg = PredI->getOperand(0).getReg(); 325 MCPhysReg CopySrcReg = PredI->getOperand(1).getReg(); 326 for (auto &KnownReg : KnownRegs) { 327 if (!OptBBClobberedRegs.available(KnownReg.Reg)) 328 continue; 329 // If we have X = COPY Y, and Y is known to be zero, then now X is 330 // known to be zero. 331 if (CopySrcReg == KnownReg.Reg && 332 OptBBClobberedRegs.available(CopyDstReg)) { 333 KnownRegs.push_back(RegImm(CopyDstReg, KnownReg.Imm)); 334 if (SeenFirstUse) 335 FirstUse = PredI; 336 break; 337 } 338 // If we have X = COPY Y, and X is known to be zero, then now Y is 339 // known to be zero. 340 if (CopyDstReg == KnownReg.Reg && 341 OptBBClobberedRegs.available(CopySrcReg)) { 342 KnownRegs.push_back(RegImm(CopySrcReg, KnownReg.Imm)); 343 if (SeenFirstUse) 344 FirstUse = PredI; 345 break; 346 } 347 } 348 } 349 350 // Stop if we get to the beginning of PredMBB. 351 if (PredI == PredMBB->begin()) 352 break; 353 354 LiveRegUnits::accumulateUsedDefed(*PredI, OptBBClobberedRegs, 355 OptBBUsedRegs, TRI); 356 // Stop if all of the known-zero regs have been clobbered. 357 if (all_of(KnownRegs, [&](RegImm KnownReg) { 358 return !OptBBClobberedRegs.available(KnownReg.Reg); 359 })) 360 break; 361 } 362 break; 363 364 } while (Itr != PredMBB->begin() && Itr->isTerminator()); 365 366 // We've not found a registers with a known value, time to bail out. 367 if (KnownRegs.empty()) 368 return false; 369 370 bool Changed = false; 371 // UsedKnownRegs is the set of KnownRegs that have had uses added to MBB. 372 SmallSetVector<unsigned, 4> UsedKnownRegs; 373 MachineBasicBlock::iterator LastChange = MBB->begin(); 374 // Remove redundant copy/move instructions unless KnownReg is modified. 375 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { 376 MachineInstr *MI = &*I; 377 ++I; 378 bool RemovedMI = false; 379 bool IsCopy = MI->isCopy(); 380 bool IsMoveImm = MI->isMoveImmediate(); 381 if (IsCopy || IsMoveImm) { 382 Register DefReg = MI->getOperand(0).getReg(); 383 Register SrcReg = IsCopy ? MI->getOperand(1).getReg() : Register(); 384 int64_t SrcImm = IsMoveImm ? MI->getOperand(1).getImm() : 0; 385 if (!MRI->isReserved(DefReg) && 386 ((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) || 387 IsMoveImm)) { 388 for (RegImm &KnownReg : KnownRegs) { 389 if (KnownReg.Reg != DefReg && 390 !TRI->isSuperRegister(DefReg, KnownReg.Reg)) 391 continue; 392 393 // For a copy, the known value must be a zero. 394 if (IsCopy && KnownReg.Imm != 0) 395 continue; 396 397 if (IsMoveImm) { 398 // For a move immediate, the known immediate must match the source 399 // immediate. 400 if (KnownReg.Imm != SrcImm) 401 continue; 402 403 // Don't remove a move immediate that implicitly defines the upper 404 // bits when only the lower 32 bits are known. 405 MCPhysReg CmpReg = KnownReg.Reg; 406 if (any_of(MI->implicit_operands(), [CmpReg](MachineOperand &O) { 407 return !O.isDead() && O.isReg() && O.isDef() && 408 O.getReg() != CmpReg; 409 })) 410 continue; 411 } 412 413 if (IsCopy) 414 LLVM_DEBUG(dbgs() << "Remove redundant Copy : " << *MI); 415 else 416 LLVM_DEBUG(dbgs() << "Remove redundant Move : " << *MI); 417 418 MI->eraseFromParent(); 419 Changed = true; 420 LastChange = I; 421 NumCopiesRemoved++; 422 UsedKnownRegs.insert(KnownReg.Reg); 423 RemovedMI = true; 424 break; 425 } 426 } 427 } 428 429 // Skip to the next instruction if we removed the COPY/MovImm. 430 if (RemovedMI) 431 continue; 432 433 // Remove any regs the MI clobbers from the KnownConstRegs set. 434 for (unsigned RI = 0; RI < KnownRegs.size();) 435 if (MI->modifiesRegister(KnownRegs[RI].Reg, TRI)) { 436 std::swap(KnownRegs[RI], KnownRegs[KnownRegs.size() - 1]); 437 KnownRegs.pop_back(); 438 // Don't increment RI since we need to now check the swapped-in 439 // KnownRegs[RI]. 440 } else { 441 ++RI; 442 } 443 444 // Continue until the KnownRegs set is empty. 445 if (KnownRegs.empty()) 446 break; 447 } 448 449 if (!Changed) 450 return false; 451 452 // Add newly used regs to the block's live-in list if they aren't there 453 // already. 454 for (MCPhysReg KnownReg : UsedKnownRegs) 455 if (!MBB->isLiveIn(KnownReg)) 456 MBB->addLiveIn(KnownReg); 457 458 // Clear kills in the range where changes were made. This is conservative, 459 // but should be okay since kill markers are being phased out. 460 LLVM_DEBUG(dbgs() << "Clearing kill flags.\n\tFirstUse: " << *FirstUse 461 << "\tLastChange: " << *LastChange); 462 for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end())) 463 MMI.clearKillInfo(); 464 for (MachineInstr &MMI : make_range(MBB->begin(), LastChange)) 465 MMI.clearKillInfo(); 466 467 return true; 468 } 469 470 bool AArch64RedundantCopyElimination::runOnMachineFunction( 471 MachineFunction &MF) { 472 if (skipFunction(MF.getFunction())) 473 return false; 474 TRI = MF.getSubtarget().getRegisterInfo(); 475 MRI = &MF.getRegInfo(); 476 477 // Resize the clobbered and used register unit trackers. We do this once per 478 // function. 479 DomBBClobberedRegs.init(*TRI); 480 DomBBUsedRegs.init(*TRI); 481 OptBBClobberedRegs.init(*TRI); 482 OptBBUsedRegs.init(*TRI); 483 484 bool Changed = false; 485 for (MachineBasicBlock &MBB : MF) 486 Changed |= optimizeBlock(&MBB); 487 return Changed; 488 } 489 490 FunctionPass *llvm::createAArch64RedundantCopyEliminationPass() { 491 return new AArch64RedundantCopyElimination(); 492 } 493