xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp (revision 179219ea046f46927d6478d43431e8b541703539)
1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 //                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 //                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 //    $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 //    $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 //    $bound_ctrl==DPP_BOUND_OFF and
31 //    $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 //    $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "GCNSubtarget.h"
42 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
43 #include "llvm/ADT/Statistic.h"
44 #include "llvm/CodeGen/MachineFunctionPass.h"
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "gcn-dpp-combine"
49 
50 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
51 
52 namespace {
53 
54 class GCNDPPCombine : public MachineFunctionPass {
55   MachineRegisterInfo *MRI;
56   const SIInstrInfo *TII;
57 
58   using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
59 
60   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
61 
62   MachineInstr *createDPPInst(MachineInstr &OrigMI,
63                               MachineInstr &MovMI,
64                               RegSubRegPair CombOldVGPR,
65                               MachineOperand *OldOpnd,
66                               bool CombBCZ) const;
67 
68   MachineInstr *createDPPInst(MachineInstr &OrigMI,
69                               MachineInstr &MovMI,
70                               RegSubRegPair CombOldVGPR,
71                               bool CombBCZ) const;
72 
73   bool hasNoImmOrEqual(MachineInstr &MI,
74                        unsigned OpndName,
75                        int64_t Value,
76                        int64_t Mask = -1) const;
77 
78   bool combineDPPMov(MachineInstr &MI) const;
79 
80 public:
81   static char ID;
82 
83   GCNDPPCombine() : MachineFunctionPass(ID) {
84     initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
85   }
86 
87   bool runOnMachineFunction(MachineFunction &MF) override;
88 
89   StringRef getPassName() const override { return "GCN DPP Combine"; }
90 
91   void getAnalysisUsage(AnalysisUsage &AU) const override {
92     AU.setPreservesCFG();
93     MachineFunctionPass::getAnalysisUsage(AU);
94   }
95 
96   MachineFunctionProperties getRequiredProperties() const override {
97     return MachineFunctionProperties()
98       .set(MachineFunctionProperties::Property::IsSSA);
99   }
100 
101 private:
102   int getDPPOp(unsigned Op) const;
103 };
104 
105 } // end anonymous namespace
106 
107 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
108 
109 char GCNDPPCombine::ID = 0;
110 
111 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
112 
113 FunctionPass *llvm::createGCNDPPCombinePass() {
114   return new GCNDPPCombine();
115 }
116 
117 int GCNDPPCombine::getDPPOp(unsigned Op) const {
118   auto DPP32 = AMDGPU::getDPPOp32(Op);
119   if (DPP32 == -1) {
120     auto E32 = AMDGPU::getVOPe32(Op);
121     DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32);
122   }
123   return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
124 }
125 
126 // tracks the register operand definition and returns:
127 //   1. immediate operand used to initialize the register if found
128 //   2. nullptr if the register operand is undef
129 //   3. the operand itself otherwise
130 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
131   auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
132   if (!Def)
133     return nullptr;
134 
135   switch(Def->getOpcode()) {
136   default: break;
137   case AMDGPU::IMPLICIT_DEF:
138     return nullptr;
139   case AMDGPU::COPY:
140   case AMDGPU::V_MOV_B32_e32: {
141     auto &Op1 = Def->getOperand(1);
142     if (Op1.isImm())
143       return &Op1;
144     break;
145   }
146   }
147   return &OldOpnd;
148 }
149 
150 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
151                                            MachineInstr &MovMI,
152                                            RegSubRegPair CombOldVGPR,
153                                            bool CombBCZ) const {
154   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
155 
156   auto OrigOp = OrigMI.getOpcode();
157   auto DPPOp = getDPPOp(OrigOp);
158   if (DPPOp == -1) {
159     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
160     return nullptr;
161   }
162 
163   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
164                          OrigMI.getDebugLoc(), TII->get(DPPOp))
165     .setMIFlags(OrigMI.getFlags());
166 
167   bool Fail = false;
168   do {
169     auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
170     assert(Dst);
171     DPPInst.add(*Dst);
172     int NumOperands = 1;
173 
174     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
175     if (OldIdx != -1) {
176       assert(OldIdx == NumOperands);
177       assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
178       auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
179       DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
180                      CombOldVGPR.SubReg);
181       ++NumOperands;
182     } else {
183       // TODO: this discards MAC/FMA instructions for now, let's add it later
184       LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
185                            " TBD\n");
186       Fail = true;
187       break;
188     }
189 
190     if (auto *Mod0 = TII->getNamedOperand(OrigMI,
191                                           AMDGPU::OpName::src0_modifiers)) {
192       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
193                                           AMDGPU::OpName::src0_modifiers));
194       assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
195       DPPInst.addImm(Mod0->getImm());
196       ++NumOperands;
197     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
198                    AMDGPU::OpName::src0_modifiers) != -1) {
199       DPPInst.addImm(0);
200       ++NumOperands;
201     }
202     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
203     assert(Src0);
204     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
205       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
206       Fail = true;
207       break;
208     }
209     DPPInst.add(*Src0);
210     DPPInst->getOperand(NumOperands).setIsKill(false);
211     ++NumOperands;
212 
213     if (auto *Mod1 = TII->getNamedOperand(OrigMI,
214                                           AMDGPU::OpName::src1_modifiers)) {
215       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
216                                           AMDGPU::OpName::src1_modifiers));
217       assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
218       DPPInst.addImm(Mod1->getImm());
219       ++NumOperands;
220     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
221                    AMDGPU::OpName::src1_modifiers) != -1) {
222       DPPInst.addImm(0);
223       ++NumOperands;
224     }
225     if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
226       if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
227         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
228         Fail = true;
229         break;
230       }
231       DPPInst.add(*Src1);
232       ++NumOperands;
233     }
234 
235     if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
236       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
237           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
238         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
239         Fail = true;
240         break;
241       }
242       DPPInst.add(*Src2);
243     }
244 
245     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
246     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
247     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
248     DPPInst.addImm(CombBCZ ? 1 : 0);
249   } while (false);
250 
251   if (Fail) {
252     DPPInst.getInstr()->eraseFromParent();
253     return nullptr;
254   }
255   LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
256   return DPPInst.getInstr();
257 }
258 
259 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
260   assert(OldOpnd->isImm());
261   switch (OrigMIOp) {
262   default: break;
263   case AMDGPU::V_ADD_U32_e32:
264   case AMDGPU::V_ADD_U32_e64:
265   case AMDGPU::V_ADD_CO_U32_e32:
266   case AMDGPU::V_ADD_CO_U32_e64:
267   case AMDGPU::V_OR_B32_e32:
268   case AMDGPU::V_OR_B32_e64:
269   case AMDGPU::V_SUBREV_U32_e32:
270   case AMDGPU::V_SUBREV_U32_e64:
271   case AMDGPU::V_SUBREV_CO_U32_e32:
272   case AMDGPU::V_SUBREV_CO_U32_e64:
273   case AMDGPU::V_MAX_U32_e32:
274   case AMDGPU::V_MAX_U32_e64:
275   case AMDGPU::V_XOR_B32_e32:
276   case AMDGPU::V_XOR_B32_e64:
277     if (OldOpnd->getImm() == 0)
278       return true;
279     break;
280   case AMDGPU::V_AND_B32_e32:
281   case AMDGPU::V_AND_B32_e64:
282   case AMDGPU::V_MIN_U32_e32:
283   case AMDGPU::V_MIN_U32_e64:
284     if (static_cast<uint32_t>(OldOpnd->getImm()) ==
285         std::numeric_limits<uint32_t>::max())
286       return true;
287     break;
288   case AMDGPU::V_MIN_I32_e32:
289   case AMDGPU::V_MIN_I32_e64:
290     if (static_cast<int32_t>(OldOpnd->getImm()) ==
291         std::numeric_limits<int32_t>::max())
292       return true;
293     break;
294   case AMDGPU::V_MAX_I32_e32:
295   case AMDGPU::V_MAX_I32_e64:
296     if (static_cast<int32_t>(OldOpnd->getImm()) ==
297         std::numeric_limits<int32_t>::min())
298       return true;
299     break;
300   case AMDGPU::V_MUL_I32_I24_e32:
301   case AMDGPU::V_MUL_I32_I24_e64:
302   case AMDGPU::V_MUL_U32_U24_e32:
303   case AMDGPU::V_MUL_U32_U24_e64:
304     if (OldOpnd->getImm() == 1)
305       return true;
306     break;
307   }
308   return false;
309 }
310 
311 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
312                                            MachineInstr &MovMI,
313                                            RegSubRegPair CombOldVGPR,
314                                            MachineOperand *OldOpndValue,
315                                            bool CombBCZ) const {
316   assert(CombOldVGPR.Reg);
317   if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
318     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
319     if (!Src1 || !Src1->isReg()) {
320       LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
321       return nullptr;
322     }
323     if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
324       LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
325       return nullptr;
326     }
327     CombOldVGPR = getRegSubRegPair(*Src1);
328     if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
329       LLVM_DEBUG(dbgs() << "  failed: src1 isn't a VGPR32 register\n");
330       return nullptr;
331     }
332   }
333   return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
334 }
335 
336 // returns true if MI doesn't have OpndName immediate operand or the
337 // operand has Value
338 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
339                                     int64_t Value, int64_t Mask) const {
340   auto *Imm = TII->getNamedOperand(MI, OpndName);
341   if (!Imm)
342     return true;
343 
344   assert(Imm->isImm());
345   return (Imm->getImm() & Mask) == Value;
346 }
347 
348 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
349   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
350   LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
351 
352   auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
353   assert(DstOpnd && DstOpnd->isReg());
354   auto DPPMovReg = DstOpnd->getReg();
355   if (DPPMovReg.isPhysical()) {
356     LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
357     return false;
358   }
359   if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
360     LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
361                          " for all uses\n");
362     return false;
363   }
364 
365   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
366   assert(RowMaskOpnd && RowMaskOpnd->isImm());
367   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
368   assert(BankMaskOpnd && BankMaskOpnd->isImm());
369   const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
370                             BankMaskOpnd->getImm() == 0xF;
371 
372   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
373   assert(BCZOpnd && BCZOpnd->isImm());
374   bool BoundCtrlZero = BCZOpnd->getImm();
375 
376   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
377   auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
378   assert(OldOpnd && OldOpnd->isReg());
379   assert(SrcOpnd && SrcOpnd->isReg());
380   if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
381     LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
382     return false;
383   }
384 
385   auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
386   // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
387   // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
388   // but the third option is used to distinguish undef from non-immediate
389   // to reuse IMPLICIT_DEF instruction later
390   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
391 
392   bool CombBCZ = false;
393 
394   if (MaskAllLanes && BoundCtrlZero) { // [1]
395     CombBCZ = true;
396   } else {
397     if (!OldOpndValue || !OldOpndValue->isImm()) {
398       LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
399       return false;
400     }
401 
402     if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
403       LLVM_DEBUG(dbgs() <<
404         "  failed: old reg def and mov should be in the same BB\n");
405       return false;
406     }
407 
408     if (OldOpndValue->getImm() == 0) {
409       if (MaskAllLanes) {
410         assert(!BoundCtrlZero); // by check [1]
411         CombBCZ = true;
412       }
413     } else if (BoundCtrlZero) {
414       assert(!MaskAllLanes); // by check [1]
415       LLVM_DEBUG(dbgs() <<
416         "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
417       return false;
418     }
419   }
420 
421   LLVM_DEBUG(dbgs() << "  old=";
422     if (!OldOpndValue)
423       dbgs() << "undef";
424     else
425       dbgs() << *OldOpndValue;
426     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
427 
428   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
429   DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
430   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
431   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
432   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
433     CombOldVGPR = RegSubRegPair(
434       MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
435     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
436                              TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
437     DPPMIs.push_back(UndefInst.getInstr());
438   }
439 
440   OrigMIs.push_back(&MovMI);
441   bool Rollback = true;
442   SmallVector<MachineOperand*, 16> Uses;
443 
444   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
445     Uses.push_back(&Use);
446   }
447 
448   while (!Uses.empty()) {
449     MachineOperand *Use = Uses.pop_back_val();
450     Rollback = true;
451 
452     auto &OrigMI = *Use->getParent();
453     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
454 
455     auto OrigOp = OrigMI.getOpcode();
456     if (OrigOp == AMDGPU::REG_SEQUENCE) {
457       Register FwdReg = OrigMI.getOperand(0).getReg();
458       unsigned FwdSubReg = 0;
459 
460       if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
461         LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
462                              " for all uses\n");
463         break;
464       }
465 
466       unsigned OpNo, E = OrigMI.getNumOperands();
467       for (OpNo = 1; OpNo < E; OpNo += 2) {
468         if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
469           FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
470           break;
471         }
472       }
473 
474       if (!FwdSubReg)
475         break;
476 
477       for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
478         if (Op.getSubReg() == FwdSubReg)
479           Uses.push_back(&Op);
480       }
481       RegSeqWithOpNos[&OrigMI].push_back(OpNo);
482       continue;
483     }
484 
485     if (TII->isVOP3(OrigOp)) {
486       if (!TII->hasVALU32BitEncoding(OrigOp)) {
487         LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
488         break;
489       }
490       // check if other than abs|neg modifiers are set (opsel for example)
491       const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
492       if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
493           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
494           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
495           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
496         LLVM_DEBUG(dbgs() << "  failed: VOP3 has non-default modifiers\n");
497         break;
498       }
499     } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
500       LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
501       break;
502     }
503 
504     auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
505     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
506     if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
507       LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
508       break;
509     }
510 
511     assert(Src0 && "Src1 without Src0?");
512     if (Src1 && Src1->isIdenticalTo(*Src0)) {
513       assert(Src1->isReg());
514       LLVM_DEBUG(
515           dbgs()
516           << "  " << OrigMI
517           << "  failed: DPP register is used more than once per instruction\n");
518       break;
519     }
520 
521     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
522     if (Use == Src0) {
523       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
524                                         OldOpndValue, CombBCZ)) {
525         DPPMIs.push_back(DPPInst);
526         Rollback = false;
527       }
528     } else {
529       assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
530       auto *BB = OrigMI.getParent();
531       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
532       BB->insert(OrigMI, NewMI);
533       if (TII->commuteInstruction(*NewMI)) {
534         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
535         if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
536                                           OldOpndValue, CombBCZ)) {
537           DPPMIs.push_back(DPPInst);
538           Rollback = false;
539         }
540       } else
541         LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
542       NewMI->eraseFromParent();
543     }
544     if (Rollback)
545       break;
546     OrigMIs.push_back(&OrigMI);
547   }
548 
549   Rollback |= !Uses.empty();
550 
551   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
552     MI->eraseFromParent();
553 
554   if (!Rollback) {
555     for (auto &S : RegSeqWithOpNos) {
556       if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
557         S.first->eraseFromParent();
558         continue;
559       }
560       while (!S.second.empty())
561         S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
562     }
563   }
564 
565   return !Rollback;
566 }
567 
568 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
569   auto &ST = MF.getSubtarget<GCNSubtarget>();
570   if (!ST.hasDPP() || skipFunction(MF.getFunction()))
571     return false;
572 
573   MRI = &MF.getRegInfo();
574   TII = ST.getInstrInfo();
575 
576   bool Changed = false;
577   for (auto &MBB : MF) {
578     for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
579       auto &MI = *I++;
580       if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
581         Changed = true;
582         ++NumDPPMovsCombined;
583       } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
584         auto Split = TII->expandMovDPP64(MI);
585         for (auto M : { Split.first, Split.second }) {
586           if (combineDPPMov(*M))
587             ++NumDPPMovsCombined;
588         }
589         Changed = true;
590       }
591     }
592   }
593   return Changed;
594 }
595