xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp (revision 9e5787d2284e187abb5b654d924394a65772e004)
1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 //                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 //                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 //    $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 //    $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 //    $bound_ctrl==DPP_BOUND_OFF and
31 //    $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 //    $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "AMDGPUSubtarget.h"
42 #include "SIInstrInfo.h"
43 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44 #include "llvm/ADT/DenseMap.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
47 #include "llvm/CodeGen/MachineBasicBlock.h"
48 #include "llvm/CodeGen/MachineFunction.h"
49 #include "llvm/CodeGen/MachineFunctionPass.h"
50 #include "llvm/CodeGen/MachineInstr.h"
51 #include "llvm/CodeGen/MachineInstrBuilder.h"
52 #include "llvm/CodeGen/MachineOperand.h"
53 #include "llvm/CodeGen/MachineRegisterInfo.h"
54 #include "llvm/CodeGen/TargetRegisterInfo.h"
55 #include "llvm/Pass.h"
56 #include <cassert>
57 
58 using namespace llvm;
59 
60 #define DEBUG_TYPE "gcn-dpp-combine"
61 
62 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
63 
64 namespace {
65 
66 class GCNDPPCombine : public MachineFunctionPass {
67   MachineRegisterInfo *MRI;
68   const SIInstrInfo *TII;
69 
70   using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
71 
72   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
73 
74   MachineInstr *createDPPInst(MachineInstr &OrigMI,
75                               MachineInstr &MovMI,
76                               RegSubRegPair CombOldVGPR,
77                               MachineOperand *OldOpnd,
78                               bool CombBCZ) const;
79 
80   MachineInstr *createDPPInst(MachineInstr &OrigMI,
81                               MachineInstr &MovMI,
82                               RegSubRegPair CombOldVGPR,
83                               bool CombBCZ) const;
84 
85   bool hasNoImmOrEqual(MachineInstr &MI,
86                        unsigned OpndName,
87                        int64_t Value,
88                        int64_t Mask = -1) const;
89 
90   bool combineDPPMov(MachineInstr &MI) const;
91 
92 public:
93   static char ID;
94 
95   GCNDPPCombine() : MachineFunctionPass(ID) {
96     initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
97   }
98 
99   bool runOnMachineFunction(MachineFunction &MF) override;
100 
101   StringRef getPassName() const override { return "GCN DPP Combine"; }
102 
103   void getAnalysisUsage(AnalysisUsage &AU) const override {
104     AU.setPreservesCFG();
105     MachineFunctionPass::getAnalysisUsage(AU);
106   }
107 
108   MachineFunctionProperties getRequiredProperties() const override {
109     return MachineFunctionProperties()
110       .set(MachineFunctionProperties::Property::IsSSA);
111   }
112 
113 private:
114   int getDPPOp(unsigned Op) const;
115 };
116 
117 } // end anonymous namespace
118 
119 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
120 
121 char GCNDPPCombine::ID = 0;
122 
123 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
124 
125 FunctionPass *llvm::createGCNDPPCombinePass() {
126   return new GCNDPPCombine();
127 }
128 
129 int GCNDPPCombine::getDPPOp(unsigned Op) const {
130   auto DPP32 = AMDGPU::getDPPOp32(Op);
131   if (DPP32 == -1) {
132     auto E32 = AMDGPU::getVOPe32(Op);
133     DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32);
134   }
135   return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
136 }
137 
138 // tracks the register operand definition and returns:
139 //   1. immediate operand used to initialize the register if found
140 //   2. nullptr if the register operand is undef
141 //   3. the operand itself otherwise
142 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
143   auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
144   if (!Def)
145     return nullptr;
146 
147   switch(Def->getOpcode()) {
148   default: break;
149   case AMDGPU::IMPLICIT_DEF:
150     return nullptr;
151   case AMDGPU::COPY:
152   case AMDGPU::V_MOV_B32_e32: {
153     auto &Op1 = Def->getOperand(1);
154     if (Op1.isImm())
155       return &Op1;
156     break;
157   }
158   }
159   return &OldOpnd;
160 }
161 
162 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
163                                            MachineInstr &MovMI,
164                                            RegSubRegPair CombOldVGPR,
165                                            bool CombBCZ) const {
166   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
167 
168   auto OrigOp = OrigMI.getOpcode();
169   auto DPPOp = getDPPOp(OrigOp);
170   if (DPPOp == -1) {
171     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
172     return nullptr;
173   }
174 
175   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
176                          OrigMI.getDebugLoc(), TII->get(DPPOp))
177     .setMIFlags(OrigMI.getFlags());
178 
179   bool Fail = false;
180   do {
181     auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
182     assert(Dst);
183     DPPInst.add(*Dst);
184     int NumOperands = 1;
185 
186     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
187     if (OldIdx != -1) {
188       assert(OldIdx == NumOperands);
189       assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
190       auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
191       DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
192                      CombOldVGPR.SubReg);
193       ++NumOperands;
194     } else {
195       // TODO: this discards MAC/FMA instructions for now, let's add it later
196       LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
197                            " TBD\n");
198       Fail = true;
199       break;
200     }
201 
202     if (auto *Mod0 = TII->getNamedOperand(OrigMI,
203                                           AMDGPU::OpName::src0_modifiers)) {
204       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
205                                           AMDGPU::OpName::src0_modifiers));
206       assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
207       DPPInst.addImm(Mod0->getImm());
208       ++NumOperands;
209     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
210                    AMDGPU::OpName::src0_modifiers) != -1) {
211       DPPInst.addImm(0);
212       ++NumOperands;
213     }
214     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
215     assert(Src0);
216     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
217       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
218       Fail = true;
219       break;
220     }
221     DPPInst.add(*Src0);
222     DPPInst->getOperand(NumOperands).setIsKill(false);
223     ++NumOperands;
224 
225     if (auto *Mod1 = TII->getNamedOperand(OrigMI,
226                                           AMDGPU::OpName::src1_modifiers)) {
227       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
228                                           AMDGPU::OpName::src1_modifiers));
229       assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
230       DPPInst.addImm(Mod1->getImm());
231       ++NumOperands;
232     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
233                    AMDGPU::OpName::src1_modifiers) != -1) {
234       DPPInst.addImm(0);
235       ++NumOperands;
236     }
237     if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
238       if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
239         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
240         Fail = true;
241         break;
242       }
243       DPPInst.add(*Src1);
244       ++NumOperands;
245     }
246 
247     if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
248       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
249           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
250         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
251         Fail = true;
252         break;
253       }
254       DPPInst.add(*Src2);
255     }
256 
257     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
258     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
259     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
260     DPPInst.addImm(CombBCZ ? 1 : 0);
261   } while (false);
262 
263   if (Fail) {
264     DPPInst.getInstr()->eraseFromParent();
265     return nullptr;
266   }
267   LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
268   return DPPInst.getInstr();
269 }
270 
271 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
272   assert(OldOpnd->isImm());
273   switch (OrigMIOp) {
274   default: break;
275   case AMDGPU::V_ADD_U32_e32:
276   case AMDGPU::V_ADD_U32_e64:
277   case AMDGPU::V_ADD_I32_e32:
278   case AMDGPU::V_ADD_I32_e64:
279   case AMDGPU::V_OR_B32_e32:
280   case AMDGPU::V_OR_B32_e64:
281   case AMDGPU::V_SUBREV_U32_e32:
282   case AMDGPU::V_SUBREV_U32_e64:
283   case AMDGPU::V_SUBREV_I32_e32:
284   case AMDGPU::V_SUBREV_I32_e64:
285   case AMDGPU::V_MAX_U32_e32:
286   case AMDGPU::V_MAX_U32_e64:
287   case AMDGPU::V_XOR_B32_e32:
288   case AMDGPU::V_XOR_B32_e64:
289     if (OldOpnd->getImm() == 0)
290       return true;
291     break;
292   case AMDGPU::V_AND_B32_e32:
293   case AMDGPU::V_AND_B32_e64:
294   case AMDGPU::V_MIN_U32_e32:
295   case AMDGPU::V_MIN_U32_e64:
296     if (static_cast<uint32_t>(OldOpnd->getImm()) ==
297         std::numeric_limits<uint32_t>::max())
298       return true;
299     break;
300   case AMDGPU::V_MIN_I32_e32:
301   case AMDGPU::V_MIN_I32_e64:
302     if (static_cast<int32_t>(OldOpnd->getImm()) ==
303         std::numeric_limits<int32_t>::max())
304       return true;
305     break;
306   case AMDGPU::V_MAX_I32_e32:
307   case AMDGPU::V_MAX_I32_e64:
308     if (static_cast<int32_t>(OldOpnd->getImm()) ==
309         std::numeric_limits<int32_t>::min())
310       return true;
311     break;
312   case AMDGPU::V_MUL_I32_I24_e32:
313   case AMDGPU::V_MUL_I32_I24_e64:
314   case AMDGPU::V_MUL_U32_U24_e32:
315   case AMDGPU::V_MUL_U32_U24_e64:
316     if (OldOpnd->getImm() == 1)
317       return true;
318     break;
319   }
320   return false;
321 }
322 
323 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
324                                            MachineInstr &MovMI,
325                                            RegSubRegPair CombOldVGPR,
326                                            MachineOperand *OldOpndValue,
327                                            bool CombBCZ) const {
328   assert(CombOldVGPR.Reg);
329   if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
330     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
331     if (!Src1 || !Src1->isReg()) {
332       LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
333       return nullptr;
334     }
335     if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
336       LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
337       return nullptr;
338     }
339     CombOldVGPR = getRegSubRegPair(*Src1);
340     if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
341       LLVM_DEBUG(dbgs() << "  failed: src1 isn't a VGPR32 register\n");
342       return nullptr;
343     }
344   }
345   return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
346 }
347 
348 // returns true if MI doesn't have OpndName immediate operand or the
349 // operand has Value
350 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
351                                     int64_t Value, int64_t Mask) const {
352   auto *Imm = TII->getNamedOperand(MI, OpndName);
353   if (!Imm)
354     return true;
355 
356   assert(Imm->isImm());
357   return (Imm->getImm() & Mask) == Value;
358 }
359 
360 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
361   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
362   LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
363 
364   auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
365   assert(DstOpnd && DstOpnd->isReg());
366   auto DPPMovReg = DstOpnd->getReg();
367   if (DPPMovReg.isPhysical()) {
368     LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
369     return false;
370   }
371   if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
372     LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
373                          " for all uses\n");
374     return false;
375   }
376 
377   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
378   assert(RowMaskOpnd && RowMaskOpnd->isImm());
379   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
380   assert(BankMaskOpnd && BankMaskOpnd->isImm());
381   const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
382                             BankMaskOpnd->getImm() == 0xF;
383 
384   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
385   assert(BCZOpnd && BCZOpnd->isImm());
386   bool BoundCtrlZero = BCZOpnd->getImm();
387 
388   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
389   auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
390   assert(OldOpnd && OldOpnd->isReg());
391   assert(SrcOpnd && SrcOpnd->isReg());
392   if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
393     LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
394     return false;
395   }
396 
397   auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
398   // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
399   // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
400   // but the third option is used to distinguish undef from non-immediate
401   // to reuse IMPLICIT_DEF instruction later
402   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
403 
404   bool CombBCZ = false;
405 
406   if (MaskAllLanes && BoundCtrlZero) { // [1]
407     CombBCZ = true;
408   } else {
409     if (!OldOpndValue || !OldOpndValue->isImm()) {
410       LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
411       return false;
412     }
413 
414     if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
415       LLVM_DEBUG(dbgs() <<
416         "  failed: old reg def and mov should be in the same BB\n");
417       return false;
418     }
419 
420     if (OldOpndValue->getImm() == 0) {
421       if (MaskAllLanes) {
422         assert(!BoundCtrlZero); // by check [1]
423         CombBCZ = true;
424       }
425     } else if (BoundCtrlZero) {
426       assert(!MaskAllLanes); // by check [1]
427       LLVM_DEBUG(dbgs() <<
428         "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
429       return false;
430     }
431   }
432 
433   LLVM_DEBUG(dbgs() << "  old=";
434     if (!OldOpndValue)
435       dbgs() << "undef";
436     else
437       dbgs() << *OldOpndValue;
438     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
439 
440   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
441   DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
442   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
443   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
444   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
445     CombOldVGPR = RegSubRegPair(
446       MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
447     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
448                              TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
449     DPPMIs.push_back(UndefInst.getInstr());
450   }
451 
452   OrigMIs.push_back(&MovMI);
453   bool Rollback = true;
454   SmallVector<MachineOperand*, 16> Uses;
455 
456   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
457     Uses.push_back(&Use);
458   }
459 
460   while (!Uses.empty()) {
461     MachineOperand *Use = Uses.pop_back_val();
462     Rollback = true;
463 
464     auto &OrigMI = *Use->getParent();
465     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
466 
467     auto OrigOp = OrigMI.getOpcode();
468     if (OrigOp == AMDGPU::REG_SEQUENCE) {
469       Register FwdReg = OrigMI.getOperand(0).getReg();
470       unsigned FwdSubReg = 0;
471 
472       if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
473         LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
474                              " for all uses\n");
475         break;
476       }
477 
478       unsigned OpNo, E = OrigMI.getNumOperands();
479       for (OpNo = 1; OpNo < E; OpNo += 2) {
480         if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
481           FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
482           break;
483         }
484       }
485 
486       if (!FwdSubReg)
487         break;
488 
489       for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
490         if (Op.getSubReg() == FwdSubReg)
491           Uses.push_back(&Op);
492       }
493       RegSeqWithOpNos[&OrigMI].push_back(OpNo);
494       continue;
495     }
496 
497     if (TII->isVOP3(OrigOp)) {
498       if (!TII->hasVALU32BitEncoding(OrigOp)) {
499         LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
500         break;
501       }
502       // check if other than abs|neg modifiers are set (opsel for example)
503       const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
504       if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
505           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
506           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
507           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
508         LLVM_DEBUG(dbgs() << "  failed: VOP3 has non-default modifiers\n");
509         break;
510       }
511     } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
512       LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
513       break;
514     }
515 
516     auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
517     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
518     if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
519       LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
520       break;
521     }
522 
523     assert(Src0 && "Src1 without Src0?");
524     if (Src1 && Src1->isIdenticalTo(*Src0)) {
525       assert(Src1->isReg());
526       LLVM_DEBUG(
527           dbgs()
528           << "  " << OrigMI
529           << "  failed: DPP register is used more than once per instruction\n");
530       break;
531     }
532 
533     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
534     if (Use == Src0) {
535       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
536                                         OldOpndValue, CombBCZ)) {
537         DPPMIs.push_back(DPPInst);
538         Rollback = false;
539       }
540     } else {
541       assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
542       auto *BB = OrigMI.getParent();
543       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
544       BB->insert(OrigMI, NewMI);
545       if (TII->commuteInstruction(*NewMI)) {
546         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
547         if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
548                                           OldOpndValue, CombBCZ)) {
549           DPPMIs.push_back(DPPInst);
550           Rollback = false;
551         }
552       } else
553         LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
554       NewMI->eraseFromParent();
555     }
556     if (Rollback)
557       break;
558     OrigMIs.push_back(&OrigMI);
559   }
560 
561   Rollback |= !Uses.empty();
562 
563   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
564     MI->eraseFromParent();
565 
566   if (!Rollback) {
567     for (auto &S : RegSeqWithOpNos) {
568       if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
569         S.first->eraseFromParent();
570         continue;
571       }
572       while (!S.second.empty())
573         S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
574     }
575   }
576 
577   return !Rollback;
578 }
579 
580 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
581   auto &ST = MF.getSubtarget<GCNSubtarget>();
582   if (!ST.hasDPP() || skipFunction(MF.getFunction()))
583     return false;
584 
585   MRI = &MF.getRegInfo();
586   TII = ST.getInstrInfo();
587 
588   bool Changed = false;
589   for (auto &MBB : MF) {
590     for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
591       auto &MI = *I++;
592       if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
593         Changed = true;
594         ++NumDPPMovsCombined;
595       } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
596         auto Split = TII->expandMovDPP64(MI);
597         for (auto M : { Split.first, Split.second }) {
598           if (combineDPPMov(*M))
599             ++NumDPPMovsCombined;
600         }
601         Changed = true;
602       }
603     }
604   }
605   return Changed;
606 }
607