1 //=- AArch64RedundantCopyElimination.cpp - Remove useless copy for AArch64 -=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 // This pass removes unnecessary copies/moves in BBs based on a dominating 8 // condition. 9 // 10 // We handle three cases: 11 // 1. For BBs that are targets of CBZ/CBNZ instructions, we know the value of 12 // the CBZ/CBNZ source register is zero on the taken/not-taken path. For 13 // instance, the copy instruction in the code below can be removed because 14 // the CBZW jumps to %bb.2 when w0 is zero. 15 // 16 // %bb.1: 17 // cbz w0, .LBB0_2 18 // .LBB0_2: 19 // mov w0, wzr ; <-- redundant 20 // 21 // 2. If the flag setting instruction defines a register other than WZR/XZR, we 22 // can remove a zero copy in some cases. 23 // 24 // %bb.0: 25 // subs w0, w1, w2 26 // str w0, [x1] 27 // b.ne .LBB0_2 28 // %bb.1: 29 // mov w0, wzr ; <-- redundant 30 // str w0, [x2] 31 // .LBB0_2 32 // 33 // 3. Finally, if the flag setting instruction is a comparison against a 34 // constant (i.e., ADDS[W|X]ri, SUBS[W|X]ri), we can remove a mov immediate 35 // in some cases. 36 // 37 // %bb.0: 38 // subs xzr, x0, #1 39 // b.eq .LBB0_1 40 // .LBB0_1: 41 // orr x0, xzr, #0x1 ; <-- redundant 42 // 43 // This pass should be run after register allocation. 44 // 45 // FIXME: This could also be extended to check the whole dominance subtree below 46 // the comparison if the compile time regression is acceptable. 47 // 48 // FIXME: Add support for handling CCMP instructions. 49 // FIXME: If the known register value is zero, we should be able to rewrite uses 50 // to use WZR/XZR directly in some cases. 51 //===----------------------------------------------------------------------===// 52 #include "AArch64.h" 53 #include "llvm/ADT/SetVector.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/ADT/iterator_range.h" 56 #include "llvm/CodeGen/LiveRegUnits.h" 57 #include "llvm/CodeGen/MachineFunctionPass.h" 58 #include "llvm/CodeGen/MachineRegisterInfo.h" 59 #include "llvm/Support/Debug.h" 60 61 using namespace llvm; 62 63 #define DEBUG_TYPE "aarch64-copyelim" 64 65 STATISTIC(NumCopiesRemoved, "Number of copies removed."); 66 67 namespace { 68 class AArch64RedundantCopyElimination : public MachineFunctionPass { 69 const MachineRegisterInfo *MRI; 70 const TargetRegisterInfo *TRI; 71 72 // DomBBClobberedRegs is used when computing known values in the dominating 73 // BB. 74 LiveRegUnits DomBBClobberedRegs, DomBBUsedRegs; 75 76 // OptBBClobberedRegs is used when optimizing away redundant copies/moves. 77 LiveRegUnits OptBBClobberedRegs, OptBBUsedRegs; 78 79 public: 80 static char ID; 81 AArch64RedundantCopyElimination() : MachineFunctionPass(ID) { 82 initializeAArch64RedundantCopyEliminationPass( 83 *PassRegistry::getPassRegistry()); 84 } 85 86 struct RegImm { 87 MCPhysReg Reg; 88 int32_t Imm; 89 RegImm(MCPhysReg Reg, int32_t Imm) : Reg(Reg), Imm(Imm) {} 90 }; 91 92 bool knownRegValInBlock(MachineInstr &CondBr, MachineBasicBlock *MBB, 93 SmallVectorImpl<RegImm> &KnownRegs, 94 MachineBasicBlock::iterator &FirstUse); 95 bool optimizeBlock(MachineBasicBlock *MBB); 96 bool runOnMachineFunction(MachineFunction &MF) override; 97 MachineFunctionProperties getRequiredProperties() const override { 98 return MachineFunctionProperties().set( 99 MachineFunctionProperties::Property::NoVRegs); 100 } 101 StringRef getPassName() const override { 102 return "AArch64 Redundant Copy Elimination"; 103 } 104 }; 105 char AArch64RedundantCopyElimination::ID = 0; 106 } 107 108 INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim", 109 "AArch64 redundant copy elimination pass", false, false) 110 111 /// It's possible to determine the value of a register based on a dominating 112 /// condition. To do so, this function checks to see if the basic block \p MBB 113 /// is the target of a conditional branch \p CondBr with an equality comparison. 114 /// If the branch is a CBZ/CBNZ, we know the value of its source operand is zero 115 /// in \p MBB for some cases. Otherwise, we find and inspect the NZCV setting 116 /// instruction (e.g., SUBS, ADDS). If this instruction defines a register 117 /// other than WZR/XZR, we know the value of the destination register is zero in 118 /// \p MMB for some cases. In addition, if the NZCV setting instruction is 119 /// comparing against a constant we know the other source register is equal to 120 /// the constant in \p MBB for some cases. If we find any constant values, push 121 /// a physical register and constant value pair onto the KnownRegs vector and 122 /// return true. Otherwise, return false if no known values were found. 123 bool AArch64RedundantCopyElimination::knownRegValInBlock( 124 MachineInstr &CondBr, MachineBasicBlock *MBB, 125 SmallVectorImpl<RegImm> &KnownRegs, MachineBasicBlock::iterator &FirstUse) { 126 unsigned Opc = CondBr.getOpcode(); 127 128 // Check if the current basic block is the target block to which the 129 // CBZ/CBNZ instruction jumps when its Wt/Xt is zero. 130 if (((Opc == AArch64::CBZW || Opc == AArch64::CBZX) && 131 MBB == CondBr.getOperand(1).getMBB()) || 132 ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) && 133 MBB != CondBr.getOperand(1).getMBB())) { 134 FirstUse = CondBr; 135 KnownRegs.push_back(RegImm(CondBr.getOperand(0).getReg(), 0)); 136 return true; 137 } 138 139 // Otherwise, must be a conditional branch. 140 if (Opc != AArch64::Bcc) 141 return false; 142 143 // Must be an equality check (i.e., == or !=). 144 AArch64CC::CondCode CC = (AArch64CC::CondCode)CondBr.getOperand(0).getImm(); 145 if (CC != AArch64CC::EQ && CC != AArch64CC::NE) 146 return false; 147 148 MachineBasicBlock *BrTarget = CondBr.getOperand(1).getMBB(); 149 if ((CC == AArch64CC::EQ && BrTarget != MBB) || 150 (CC == AArch64CC::NE && BrTarget == MBB)) 151 return false; 152 153 // Stop if we get to the beginning of PredMBB. 154 MachineBasicBlock *PredMBB = *MBB->pred_begin(); 155 assert(PredMBB == CondBr.getParent() && 156 "Conditional branch not in predecessor block!"); 157 if (CondBr == PredMBB->begin()) 158 return false; 159 160 // Registers clobbered in PredMBB between CondBr instruction and current 161 // instruction being checked in loop. 162 DomBBClobberedRegs.clear(); 163 DomBBUsedRegs.clear(); 164 165 // Find compare instruction that sets NZCV used by CondBr. 166 MachineBasicBlock::reverse_iterator RIt = CondBr.getReverseIterator(); 167 for (MachineInstr &PredI : make_range(std::next(RIt), PredMBB->rend())) { 168 169 bool IsCMN = false; 170 switch (PredI.getOpcode()) { 171 default: 172 break; 173 174 // CMN is an alias for ADDS with a dead destination register. 175 case AArch64::ADDSWri: 176 case AArch64::ADDSXri: 177 IsCMN = true; 178 [[fallthrough]]; 179 // CMP is an alias for SUBS with a dead destination register. 180 case AArch64::SUBSWri: 181 case AArch64::SUBSXri: { 182 // Sometimes the first operand is a FrameIndex. Bail if tht happens. 183 if (!PredI.getOperand(1).isReg()) 184 return false; 185 MCPhysReg DstReg = PredI.getOperand(0).getReg(); 186 MCPhysReg SrcReg = PredI.getOperand(1).getReg(); 187 188 bool Res = false; 189 // If we're comparing against a non-symbolic immediate and the source 190 // register of the compare is not modified (including a self-clobbering 191 // compare) between the compare and conditional branch we known the value 192 // of the 1st source operand. 193 if (PredI.getOperand(2).isImm() && DomBBClobberedRegs.available(SrcReg) && 194 SrcReg != DstReg) { 195 // We've found the instruction that sets NZCV. 196 int32_t KnownImm = PredI.getOperand(2).getImm(); 197 int32_t Shift = PredI.getOperand(3).getImm(); 198 KnownImm <<= Shift; 199 if (IsCMN) 200 KnownImm = -KnownImm; 201 FirstUse = PredI; 202 KnownRegs.push_back(RegImm(SrcReg, KnownImm)); 203 Res = true; 204 } 205 206 // If this instructions defines something other than WZR/XZR, we know it's 207 // result is zero in some cases. 208 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) 209 return Res; 210 211 // The destination register must not be modified between the NZCV setting 212 // instruction and the conditional branch. 213 if (!DomBBClobberedRegs.available(DstReg)) 214 return Res; 215 216 FirstUse = PredI; 217 KnownRegs.push_back(RegImm(DstReg, 0)); 218 return true; 219 } 220 221 // Look for NZCV setting instructions that define something other than 222 // WZR/XZR. 223 case AArch64::ADCSWr: 224 case AArch64::ADCSXr: 225 case AArch64::ADDSWrr: 226 case AArch64::ADDSWrs: 227 case AArch64::ADDSWrx: 228 case AArch64::ADDSXrr: 229 case AArch64::ADDSXrs: 230 case AArch64::ADDSXrx: 231 case AArch64::ADDSXrx64: 232 case AArch64::ANDSWri: 233 case AArch64::ANDSWrr: 234 case AArch64::ANDSWrs: 235 case AArch64::ANDSXri: 236 case AArch64::ANDSXrr: 237 case AArch64::ANDSXrs: 238 case AArch64::BICSWrr: 239 case AArch64::BICSWrs: 240 case AArch64::BICSXrs: 241 case AArch64::BICSXrr: 242 case AArch64::SBCSWr: 243 case AArch64::SBCSXr: 244 case AArch64::SUBSWrr: 245 case AArch64::SUBSWrs: 246 case AArch64::SUBSWrx: 247 case AArch64::SUBSXrr: 248 case AArch64::SUBSXrs: 249 case AArch64::SUBSXrx: 250 case AArch64::SUBSXrx64: { 251 MCPhysReg DstReg = PredI.getOperand(0).getReg(); 252 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) 253 return false; 254 255 // The destination register of the NZCV setting instruction must not be 256 // modified before the conditional branch. 257 if (!DomBBClobberedRegs.available(DstReg)) 258 return false; 259 260 // We've found the instruction that sets NZCV whose DstReg == 0. 261 FirstUse = PredI; 262 KnownRegs.push_back(RegImm(DstReg, 0)); 263 return true; 264 } 265 } 266 267 // Bail if we see an instruction that defines NZCV that we don't handle. 268 if (PredI.definesRegister(AArch64::NZCV)) 269 return false; 270 271 // Track clobbered and used registers. 272 LiveRegUnits::accumulateUsedDefed(PredI, DomBBClobberedRegs, DomBBUsedRegs, 273 TRI); 274 } 275 return false; 276 } 277 278 bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) { 279 // Check if the current basic block has a single predecessor. 280 if (MBB->pred_size() != 1) 281 return false; 282 283 // Check if the predecessor has two successors, implying the block ends in a 284 // conditional branch. 285 MachineBasicBlock *PredMBB = *MBB->pred_begin(); 286 if (PredMBB->succ_size() != 2) 287 return false; 288 289 MachineBasicBlock::iterator CondBr = PredMBB->getLastNonDebugInstr(); 290 if (CondBr == PredMBB->end()) 291 return false; 292 293 // Keep track of the earliest point in the PredMBB block where kill markers 294 // need to be removed if a COPY is removed. 295 MachineBasicBlock::iterator FirstUse; 296 // After calling knownRegValInBlock, FirstUse will either point to a CBZ/CBNZ 297 // or a compare (i.e., SUBS). In the latter case, we must take care when 298 // updating FirstUse when scanning for COPY instructions. In particular, if 299 // there's a COPY in between the compare and branch the COPY should not 300 // update FirstUse. 301 bool SeenFirstUse = false; 302 // Registers that contain a known value at the start of MBB. 303 SmallVector<RegImm, 4> KnownRegs; 304 305 MachineBasicBlock::iterator Itr = std::next(CondBr); 306 do { 307 --Itr; 308 309 if (!knownRegValInBlock(*Itr, MBB, KnownRegs, FirstUse)) 310 continue; 311 312 // Reset the clobbered and used register units. 313 OptBBClobberedRegs.clear(); 314 OptBBUsedRegs.clear(); 315 316 // Look backward in PredMBB for COPYs from the known reg to find other 317 // registers that are known to be a constant value. 318 for (auto PredI = Itr;; --PredI) { 319 if (FirstUse == PredI) 320 SeenFirstUse = true; 321 322 if (PredI->isCopy()) { 323 MCPhysReg CopyDstReg = PredI->getOperand(0).getReg(); 324 MCPhysReg CopySrcReg = PredI->getOperand(1).getReg(); 325 for (auto &KnownReg : KnownRegs) { 326 if (!OptBBClobberedRegs.available(KnownReg.Reg)) 327 continue; 328 // If we have X = COPY Y, and Y is known to be zero, then now X is 329 // known to be zero. 330 if (CopySrcReg == KnownReg.Reg && 331 OptBBClobberedRegs.available(CopyDstReg)) { 332 KnownRegs.push_back(RegImm(CopyDstReg, KnownReg.Imm)); 333 if (SeenFirstUse) 334 FirstUse = PredI; 335 break; 336 } 337 // If we have X = COPY Y, and X is known to be zero, then now Y is 338 // known to be zero. 339 if (CopyDstReg == KnownReg.Reg && 340 OptBBClobberedRegs.available(CopySrcReg)) { 341 KnownRegs.push_back(RegImm(CopySrcReg, KnownReg.Imm)); 342 if (SeenFirstUse) 343 FirstUse = PredI; 344 break; 345 } 346 } 347 } 348 349 // Stop if we get to the beginning of PredMBB. 350 if (PredI == PredMBB->begin()) 351 break; 352 353 LiveRegUnits::accumulateUsedDefed(*PredI, OptBBClobberedRegs, 354 OptBBUsedRegs, TRI); 355 // Stop if all of the known-zero regs have been clobbered. 356 if (all_of(KnownRegs, [&](RegImm KnownReg) { 357 return !OptBBClobberedRegs.available(KnownReg.Reg); 358 })) 359 break; 360 } 361 break; 362 363 } while (Itr != PredMBB->begin() && Itr->isTerminator()); 364 365 // We've not found a registers with a known value, time to bail out. 366 if (KnownRegs.empty()) 367 return false; 368 369 bool Changed = false; 370 // UsedKnownRegs is the set of KnownRegs that have had uses added to MBB. 371 SmallSetVector<unsigned, 4> UsedKnownRegs; 372 MachineBasicBlock::iterator LastChange = MBB->begin(); 373 // Remove redundant copy/move instructions unless KnownReg is modified. 374 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { 375 MachineInstr *MI = &*I; 376 ++I; 377 bool RemovedMI = false; 378 bool IsCopy = MI->isCopy(); 379 bool IsMoveImm = MI->isMoveImmediate(); 380 if (IsCopy || IsMoveImm) { 381 Register DefReg = MI->getOperand(0).getReg(); 382 Register SrcReg = IsCopy ? MI->getOperand(1).getReg() : Register(); 383 int64_t SrcImm = IsMoveImm ? MI->getOperand(1).getImm() : 0; 384 if (!MRI->isReserved(DefReg) && 385 ((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) || 386 IsMoveImm)) { 387 for (RegImm &KnownReg : KnownRegs) { 388 if (KnownReg.Reg != DefReg && 389 !TRI->isSuperRegister(DefReg, KnownReg.Reg)) 390 continue; 391 392 // For a copy, the known value must be a zero. 393 if (IsCopy && KnownReg.Imm != 0) 394 continue; 395 396 if (IsMoveImm) { 397 // For a move immediate, the known immediate must match the source 398 // immediate. 399 if (KnownReg.Imm != SrcImm) 400 continue; 401 402 // Don't remove a move immediate that implicitly defines the upper 403 // bits when only the lower 32 bits are known. 404 MCPhysReg CmpReg = KnownReg.Reg; 405 if (any_of(MI->implicit_operands(), [CmpReg](MachineOperand &O) { 406 return !O.isDead() && O.isReg() && O.isDef() && 407 O.getReg() != CmpReg; 408 })) 409 continue; 410 411 // Don't remove a move immediate that implicitly defines the upper 412 // bits as different. 413 if (TRI->isSuperRegister(DefReg, KnownReg.Reg) && KnownReg.Imm < 0) 414 continue; 415 } 416 417 if (IsCopy) 418 LLVM_DEBUG(dbgs() << "Remove redundant Copy : " << *MI); 419 else 420 LLVM_DEBUG(dbgs() << "Remove redundant Move : " << *MI); 421 422 MI->eraseFromParent(); 423 Changed = true; 424 LastChange = I; 425 NumCopiesRemoved++; 426 UsedKnownRegs.insert(KnownReg.Reg); 427 RemovedMI = true; 428 break; 429 } 430 } 431 } 432 433 // Skip to the next instruction if we removed the COPY/MovImm. 434 if (RemovedMI) 435 continue; 436 437 // Remove any regs the MI clobbers from the KnownConstRegs set. 438 for (unsigned RI = 0; RI < KnownRegs.size();) 439 if (MI->modifiesRegister(KnownRegs[RI].Reg, TRI)) { 440 std::swap(KnownRegs[RI], KnownRegs[KnownRegs.size() - 1]); 441 KnownRegs.pop_back(); 442 // Don't increment RI since we need to now check the swapped-in 443 // KnownRegs[RI]. 444 } else { 445 ++RI; 446 } 447 448 // Continue until the KnownRegs set is empty. 449 if (KnownRegs.empty()) 450 break; 451 } 452 453 if (!Changed) 454 return false; 455 456 // Add newly used regs to the block's live-in list if they aren't there 457 // already. 458 for (MCPhysReg KnownReg : UsedKnownRegs) 459 if (!MBB->isLiveIn(KnownReg)) 460 MBB->addLiveIn(KnownReg); 461 462 // Clear kills in the range where changes were made. This is conservative, 463 // but should be okay since kill markers are being phased out. 464 LLVM_DEBUG(dbgs() << "Clearing kill flags.\n\tFirstUse: " << *FirstUse 465 << "\tLastChange: " << *LastChange); 466 for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end())) 467 MMI.clearKillInfo(); 468 for (MachineInstr &MMI : make_range(MBB->begin(), LastChange)) 469 MMI.clearKillInfo(); 470 471 return true; 472 } 473 474 bool AArch64RedundantCopyElimination::runOnMachineFunction( 475 MachineFunction &MF) { 476 if (skipFunction(MF.getFunction())) 477 return false; 478 TRI = MF.getSubtarget().getRegisterInfo(); 479 MRI = &MF.getRegInfo(); 480 481 // Resize the clobbered and used register unit trackers. We do this once per 482 // function. 483 DomBBClobberedRegs.init(*TRI); 484 DomBBUsedRegs.init(*TRI); 485 OptBBClobberedRegs.init(*TRI); 486 OptBBUsedRegs.init(*TRI); 487 488 bool Changed = false; 489 for (MachineBasicBlock &MBB : MF) 490 Changed |= optimizeBlock(&MBB); 491 return Changed; 492 } 493 494 FunctionPass *llvm::createAArch64RedundantCopyEliminationPass() { 495 return new AArch64RedundantCopyElimination(); 496 } 497