1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0 9 // operand. If any of the use instruction cannot be combined with the mov the 10 // whole sequence is reverted. 11 // 12 // $old = ... 13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane, 14 // dpp_controls..., $row_mask, $bank_mask, $bound_ctrl 15 // $res = VALU $dpp_value [, src1] 16 // 17 // to 18 // 19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,] 20 // dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl 21 // 22 // Combining rules : 23 // 24 // if $row_mask and $bank_mask are fully enabled (0xF) and 25 // $bound_ctrl==DPP_BOUND_ZERO or $old==0 26 // -> $combined_old = undef, 27 // $combined_bound_ctrl = DPP_BOUND_ZERO 28 // 29 // if the VALU op is binary and 30 // $bound_ctrl==DPP_BOUND_OFF and 31 // $old==identity value (immediate) for the VALU op 32 // -> $combined_old = src1, 33 // $combined_bound_ctrl = DPP_BOUND_OFF 34 // 35 // Otherwise cancel. 36 // 37 // The mov_dpp instruction should reside in the same BB as all its uses 38 //===----------------------------------------------------------------------===// 39 40 #include "AMDGPU.h" 41 #include "GCNSubtarget.h" 42 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 43 #include "llvm/ADT/Statistic.h" 44 #include "llvm/CodeGen/MachineFunctionPass.h" 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "gcn-dpp-combine" 49 50 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined."); 51 52 namespace { 53 54 class GCNDPPCombine : public MachineFunctionPass { 55 MachineRegisterInfo *MRI; 56 const SIInstrInfo *TII; 57 58 using RegSubRegPair = TargetInstrInfo::RegSubRegPair; 59 60 MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const; 61 62 MachineInstr *createDPPInst(MachineInstr &OrigMI, 63 MachineInstr &MovMI, 64 RegSubRegPair CombOldVGPR, 65 MachineOperand *OldOpnd, 66 bool CombBCZ) const; 67 68 MachineInstr *createDPPInst(MachineInstr &OrigMI, 69 MachineInstr &MovMI, 70 RegSubRegPair CombOldVGPR, 71 bool CombBCZ) const; 72 73 bool hasNoImmOrEqual(MachineInstr &MI, 74 unsigned OpndName, 75 int64_t Value, 76 int64_t Mask = -1) const; 77 78 bool combineDPPMov(MachineInstr &MI) const; 79 80 public: 81 static char ID; 82 83 GCNDPPCombine() : MachineFunctionPass(ID) { 84 initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry()); 85 } 86 87 bool runOnMachineFunction(MachineFunction &MF) override; 88 89 StringRef getPassName() const override { return "GCN DPP Combine"; } 90 91 void getAnalysisUsage(AnalysisUsage &AU) const override { 92 AU.setPreservesCFG(); 93 MachineFunctionPass::getAnalysisUsage(AU); 94 } 95 96 MachineFunctionProperties getRequiredProperties() const override { 97 return MachineFunctionProperties() 98 .set(MachineFunctionProperties::Property::IsSSA); 99 } 100 101 private: 102 int getDPPOp(unsigned Op) const; 103 }; 104 105 } // end anonymous namespace 106 107 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false) 108 109 char GCNDPPCombine::ID = 0; 110 111 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID; 112 113 FunctionPass *llvm::createGCNDPPCombinePass() { 114 return new GCNDPPCombine(); 115 } 116 117 int GCNDPPCombine::getDPPOp(unsigned Op) const { 118 auto DPP32 = AMDGPU::getDPPOp32(Op); 119 if (DPP32 == -1) { 120 auto E32 = AMDGPU::getVOPe32(Op); 121 DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32); 122 } 123 return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32; 124 } 125 126 // tracks the register operand definition and returns: 127 // 1. immediate operand used to initialize the register if found 128 // 2. nullptr if the register operand is undef 129 // 3. the operand itself otherwise 130 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const { 131 auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI); 132 if (!Def) 133 return nullptr; 134 135 switch(Def->getOpcode()) { 136 default: break; 137 case AMDGPU::IMPLICIT_DEF: 138 return nullptr; 139 case AMDGPU::COPY: 140 case AMDGPU::V_MOV_B32_e32: { 141 auto &Op1 = Def->getOperand(1); 142 if (Op1.isImm()) 143 return &Op1; 144 break; 145 } 146 } 147 return &OldOpnd; 148 } 149 150 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, 151 MachineInstr &MovMI, 152 RegSubRegPair CombOldVGPR, 153 bool CombBCZ) const { 154 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp); 155 156 auto OrigOp = OrigMI.getOpcode(); 157 auto DPPOp = getDPPOp(OrigOp); 158 if (DPPOp == -1) { 159 LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n"); 160 return nullptr; 161 } 162 163 auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI, 164 OrigMI.getDebugLoc(), TII->get(DPPOp)) 165 .setMIFlags(OrigMI.getFlags()); 166 167 bool Fail = false; 168 do { 169 auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst); 170 assert(Dst); 171 DPPInst.add(*Dst); 172 int NumOperands = 1; 173 174 const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old); 175 if (OldIdx != -1) { 176 assert(OldIdx == NumOperands); 177 assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)); 178 auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI); 179 DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef, 180 CombOldVGPR.SubReg); 181 ++NumOperands; 182 } else { 183 // TODO: this discards MAC/FMA instructions for now, let's add it later 184 LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction," 185 " TBD\n"); 186 Fail = true; 187 break; 188 } 189 190 if (auto *Mod0 = TII->getNamedOperand(OrigMI, 191 AMDGPU::OpName::src0_modifiers)) { 192 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, 193 AMDGPU::OpName::src0_modifiers)); 194 assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); 195 DPPInst.addImm(Mod0->getImm()); 196 ++NumOperands; 197 } else if (AMDGPU::getNamedOperandIdx(DPPOp, 198 AMDGPU::OpName::src0_modifiers) != -1) { 199 DPPInst.addImm(0); 200 ++NumOperands; 201 } 202 auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); 203 assert(Src0); 204 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) { 205 LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n"); 206 Fail = true; 207 break; 208 } 209 DPPInst.add(*Src0); 210 DPPInst->getOperand(NumOperands).setIsKill(false); 211 ++NumOperands; 212 213 if (auto *Mod1 = TII->getNamedOperand(OrigMI, 214 AMDGPU::OpName::src1_modifiers)) { 215 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, 216 AMDGPU::OpName::src1_modifiers)); 217 assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); 218 DPPInst.addImm(Mod1->getImm()); 219 ++NumOperands; 220 } else if (AMDGPU::getNamedOperandIdx(DPPOp, 221 AMDGPU::OpName::src1_modifiers) != -1) { 222 DPPInst.addImm(0); 223 ++NumOperands; 224 } 225 if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { 226 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) { 227 LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n"); 228 Fail = true; 229 break; 230 } 231 DPPInst.add(*Src1); 232 ++NumOperands; 233 } 234 235 if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) { 236 if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) || 237 !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) { 238 LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n"); 239 Fail = true; 240 break; 241 } 242 DPPInst.add(*Src2); 243 } 244 245 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl)); 246 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask)); 247 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask)); 248 DPPInst.addImm(CombBCZ ? 1 : 0); 249 } while (false); 250 251 if (Fail) { 252 DPPInst.getInstr()->eraseFromParent(); 253 return nullptr; 254 } 255 LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr()); 256 return DPPInst.getInstr(); 257 } 258 259 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) { 260 assert(OldOpnd->isImm()); 261 switch (OrigMIOp) { 262 default: break; 263 case AMDGPU::V_ADD_U32_e32: 264 case AMDGPU::V_ADD_U32_e64: 265 case AMDGPU::V_ADD_CO_U32_e32: 266 case AMDGPU::V_ADD_CO_U32_e64: 267 case AMDGPU::V_OR_B32_e32: 268 case AMDGPU::V_OR_B32_e64: 269 case AMDGPU::V_SUBREV_U32_e32: 270 case AMDGPU::V_SUBREV_U32_e64: 271 case AMDGPU::V_SUBREV_CO_U32_e32: 272 case AMDGPU::V_SUBREV_CO_U32_e64: 273 case AMDGPU::V_MAX_U32_e32: 274 case AMDGPU::V_MAX_U32_e64: 275 case AMDGPU::V_XOR_B32_e32: 276 case AMDGPU::V_XOR_B32_e64: 277 if (OldOpnd->getImm() == 0) 278 return true; 279 break; 280 case AMDGPU::V_AND_B32_e32: 281 case AMDGPU::V_AND_B32_e64: 282 case AMDGPU::V_MIN_U32_e32: 283 case AMDGPU::V_MIN_U32_e64: 284 if (static_cast<uint32_t>(OldOpnd->getImm()) == 285 std::numeric_limits<uint32_t>::max()) 286 return true; 287 break; 288 case AMDGPU::V_MIN_I32_e32: 289 case AMDGPU::V_MIN_I32_e64: 290 if (static_cast<int32_t>(OldOpnd->getImm()) == 291 std::numeric_limits<int32_t>::max()) 292 return true; 293 break; 294 case AMDGPU::V_MAX_I32_e32: 295 case AMDGPU::V_MAX_I32_e64: 296 if (static_cast<int32_t>(OldOpnd->getImm()) == 297 std::numeric_limits<int32_t>::min()) 298 return true; 299 break; 300 case AMDGPU::V_MUL_I32_I24_e32: 301 case AMDGPU::V_MUL_I32_I24_e64: 302 case AMDGPU::V_MUL_U32_U24_e32: 303 case AMDGPU::V_MUL_U32_U24_e64: 304 if (OldOpnd->getImm() == 1) 305 return true; 306 break; 307 } 308 return false; 309 } 310 311 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, 312 MachineInstr &MovMI, 313 RegSubRegPair CombOldVGPR, 314 MachineOperand *OldOpndValue, 315 bool CombBCZ) const { 316 assert(CombOldVGPR.Reg); 317 if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) { 318 auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); 319 if (!Src1 || !Src1->isReg()) { 320 LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n"); 321 return nullptr; 322 } 323 if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) { 324 LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n"); 325 return nullptr; 326 } 327 CombOldVGPR = getRegSubRegPair(*Src1); 328 if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) { 329 LLVM_DEBUG(dbgs() << " failed: src1 isn't a VGPR32 register\n"); 330 return nullptr; 331 } 332 } 333 return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ); 334 } 335 336 // returns true if MI doesn't have OpndName immediate operand or the 337 // operand has Value 338 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName, 339 int64_t Value, int64_t Mask) const { 340 auto *Imm = TII->getNamedOperand(MI, OpndName); 341 if (!Imm) 342 return true; 343 344 assert(Imm->isImm()); 345 return (Imm->getImm() & Mask) == Value; 346 } 347 348 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { 349 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp); 350 LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI); 351 352 auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); 353 assert(DstOpnd && DstOpnd->isReg()); 354 auto DPPMovReg = DstOpnd->getReg(); 355 if (DPPMovReg.isPhysical()) { 356 LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n"); 357 return false; 358 } 359 if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) { 360 LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" 361 " for all uses\n"); 362 return false; 363 } 364 365 auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask); 366 assert(RowMaskOpnd && RowMaskOpnd->isImm()); 367 auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask); 368 assert(BankMaskOpnd && BankMaskOpnd->isImm()); 369 const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF && 370 BankMaskOpnd->getImm() == 0xF; 371 372 auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl); 373 assert(BCZOpnd && BCZOpnd->isImm()); 374 bool BoundCtrlZero = BCZOpnd->getImm(); 375 376 auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old); 377 auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); 378 assert(OldOpnd && OldOpnd->isReg()); 379 assert(SrcOpnd && SrcOpnd->isReg()); 380 if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) { 381 LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n"); 382 return false; 383 } 384 385 auto * const OldOpndValue = getOldOpndValue(*OldOpnd); 386 // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else 387 // We could use: assert(!OldOpndValue || OldOpndValue->isImm()) 388 // but the third option is used to distinguish undef from non-immediate 389 // to reuse IMPLICIT_DEF instruction later 390 assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd); 391 392 bool CombBCZ = false; 393 394 if (MaskAllLanes && BoundCtrlZero) { // [1] 395 CombBCZ = true; 396 } else { 397 if (!OldOpndValue || !OldOpndValue->isImm()) { 398 LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n"); 399 return false; 400 } 401 402 if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) { 403 LLVM_DEBUG(dbgs() << 404 " failed: old reg def and mov should be in the same BB\n"); 405 return false; 406 } 407 408 if (OldOpndValue->getImm() == 0) { 409 if (MaskAllLanes) { 410 assert(!BoundCtrlZero); // by check [1] 411 CombBCZ = true; 412 } 413 } else if (BoundCtrlZero) { 414 assert(!MaskAllLanes); // by check [1] 415 LLVM_DEBUG(dbgs() << 416 " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n"); 417 return false; 418 } 419 } 420 421 LLVM_DEBUG(dbgs() << " old="; 422 if (!OldOpndValue) 423 dbgs() << "undef"; 424 else 425 dbgs() << *OldOpndValue; 426 dbgs() << ", bound_ctrl=" << CombBCZ << '\n'); 427 428 SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs; 429 DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos; 430 auto CombOldVGPR = getRegSubRegPair(*OldOpnd); 431 // try to reuse previous old reg if its undefined (IMPLICIT_DEF) 432 if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef 433 CombOldVGPR = RegSubRegPair( 434 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass)); 435 auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(), 436 TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg); 437 DPPMIs.push_back(UndefInst.getInstr()); 438 } 439 440 OrigMIs.push_back(&MovMI); 441 bool Rollback = true; 442 SmallVector<MachineOperand*, 16> Uses; 443 444 for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) { 445 Uses.push_back(&Use); 446 } 447 448 while (!Uses.empty()) { 449 MachineOperand *Use = Uses.pop_back_val(); 450 Rollback = true; 451 452 auto &OrigMI = *Use->getParent(); 453 LLVM_DEBUG(dbgs() << " try: " << OrigMI); 454 455 auto OrigOp = OrigMI.getOpcode(); 456 if (OrigOp == AMDGPU::REG_SEQUENCE) { 457 Register FwdReg = OrigMI.getOperand(0).getReg(); 458 unsigned FwdSubReg = 0; 459 460 if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) { 461 LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" 462 " for all uses\n"); 463 break; 464 } 465 466 unsigned OpNo, E = OrigMI.getNumOperands(); 467 for (OpNo = 1; OpNo < E; OpNo += 2) { 468 if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) { 469 FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm(); 470 break; 471 } 472 } 473 474 if (!FwdSubReg) 475 break; 476 477 for (auto &Op : MRI->use_nodbg_operands(FwdReg)) { 478 if (Op.getSubReg() == FwdSubReg) 479 Uses.push_back(&Op); 480 } 481 RegSeqWithOpNos[&OrigMI].push_back(OpNo); 482 continue; 483 } 484 485 if (TII->isVOP3(OrigOp)) { 486 if (!TII->hasVALU32BitEncoding(OrigOp)) { 487 LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n"); 488 break; 489 } 490 // check if other than abs|neg modifiers are set (opsel for example) 491 const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG); 492 if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) || 493 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) || 494 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) || 495 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) { 496 LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n"); 497 break; 498 } 499 } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) { 500 LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n"); 501 break; 502 } 503 504 auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0); 505 auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); 506 if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1] 507 LLVM_DEBUG(dbgs() << " failed: no suitable operands\n"); 508 break; 509 } 510 511 assert(Src0 && "Src1 without Src0?"); 512 if (Src1 && Src1->isIdenticalTo(*Src0)) { 513 assert(Src1->isReg()); 514 LLVM_DEBUG( 515 dbgs() 516 << " " << OrigMI 517 << " failed: DPP register is used more than once per instruction\n"); 518 break; 519 } 520 521 LLVM_DEBUG(dbgs() << " combining: " << OrigMI); 522 if (Use == Src0) { 523 if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR, 524 OldOpndValue, CombBCZ)) { 525 DPPMIs.push_back(DPPInst); 526 Rollback = false; 527 } 528 } else { 529 assert(Use == Src1 && OrigMI.isCommutable()); // by check [1] 530 auto *BB = OrigMI.getParent(); 531 auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI); 532 BB->insert(OrigMI, NewMI); 533 if (TII->commuteInstruction(*NewMI)) { 534 LLVM_DEBUG(dbgs() << " commuted: " << *NewMI); 535 if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR, 536 OldOpndValue, CombBCZ)) { 537 DPPMIs.push_back(DPPInst); 538 Rollback = false; 539 } 540 } else 541 LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n"); 542 NewMI->eraseFromParent(); 543 } 544 if (Rollback) 545 break; 546 OrigMIs.push_back(&OrigMI); 547 } 548 549 Rollback |= !Uses.empty(); 550 551 for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs)) 552 MI->eraseFromParent(); 553 554 if (!Rollback) { 555 for (auto &S : RegSeqWithOpNos) { 556 if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) { 557 S.first->eraseFromParent(); 558 continue; 559 } 560 while (!S.second.empty()) 561 S.first->getOperand(S.second.pop_back_val()).setIsUndef(true); 562 } 563 } 564 565 return !Rollback; 566 } 567 568 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { 569 auto &ST = MF.getSubtarget<GCNSubtarget>(); 570 if (!ST.hasDPP() || skipFunction(MF.getFunction())) 571 return false; 572 573 MRI = &MF.getRegInfo(); 574 TII = ST.getInstrInfo(); 575 576 bool Changed = false; 577 for (auto &MBB : MF) { 578 for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) { 579 auto &MI = *I++; 580 if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) { 581 Changed = true; 582 ++NumDPPMovsCombined; 583 } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) { 584 auto Split = TII->expandMovDPP64(MI); 585 for (auto M : { Split.first, Split.second }) { 586 if (combineDPPMov(*M)) 587 ++NumDPPMovsCombined; 588 } 589 Changed = true; 590 } 591 } 592 } 593 return Changed; 594 } 595