1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Combiner.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/Target/TargetMachine.h"
29
30 #define GET_GICOMBINER_DEPS
31 #include "AMDGPUGenPreLegalizeGICombiner.inc"
32 #undef GET_GICOMBINER_DEPS
33
34 #define DEBUG_TYPE "amdgpu-regbank-combiner"
35
36 using namespace llvm;
37 using namespace MIPatternMatch;
38
39 namespace {
40 #define GET_GICOMBINER_TYPES
41 #include "AMDGPUGenRegBankGICombiner.inc"
42 #undef GET_GICOMBINER_TYPES
43
44 class AMDGPURegBankCombinerImpl : public Combiner {
45 protected:
46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 const RegisterBankInfo &RBI;
49 const TargetRegisterInfo &TRI;
50 const SIInstrInfo &TII;
51 const CombinerHelper Helper;
52
53 public:
54 AMDGPURegBankCombinerImpl(
55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
57 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
getName()61 static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
62
63 bool tryCombineAll(MachineInstr &I) const override;
64
65 bool isVgprRegBank(Register Reg) const;
66 Register getAsVgpr(Register Reg) const;
67
68 struct MinMaxMedOpc {
69 unsigned Min, Max, Med;
70 };
71
72 struct Med3MatchInfo {
73 unsigned Opc;
74 Register Val0, Val1, Val2;
75 };
76
77 MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
78
79 template <class m_Cst, typename CstTy>
80 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
81 Register &Val, CstTy &K0, CstTy &K1) const;
82
83 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
84 bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
85 bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const;
86 bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const;
87 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
88 void applyClamp(MachineInstr &MI, Register &Reg) const;
89
90 void applyCanonicalizeZextShiftAmt(MachineInstr &MI, MachineInstr &Ext) const;
91
92 private:
93 SIModeRegisterDefaults getMode() const;
94 bool getIEEE() const;
95 bool getDX10Clamp() const;
96 bool isFminnumIeee(const MachineInstr &MI) const;
97 bool isFCst(MachineInstr *MI) const;
98 bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const;
99
100 #define GET_GICOMBINER_CLASS_MEMBERS
101 #define AMDGPUSubtarget GCNSubtarget
102 #include "AMDGPUGenRegBankGICombiner.inc"
103 #undef GET_GICOMBINER_CLASS_MEMBERS
104 #undef AMDGPUSubtarget
105 };
106
107 #define GET_GICOMBINER_IMPL
108 #define AMDGPUSubtarget GCNSubtarget
109 #include "AMDGPUGenRegBankGICombiner.inc"
110 #undef AMDGPUSubtarget
111 #undef GET_GICOMBINER_IMPL
112
AMDGPURegBankCombinerImpl(MachineFunction & MF,CombinerInfo & CInfo,const TargetPassConfig * TPC,GISelValueTracking & VT,GISelCSEInfo * CSEInfo,const AMDGPURegBankCombinerImplRuleConfig & RuleConfig,const GCNSubtarget & STI,MachineDominatorTree * MDT,const LegalizerInfo * LI)113 AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
114 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
115 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
116 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
117 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
118 : Combiner(MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
119 RBI(*STI.getRegBankInfo()), TRI(*STI.getRegisterInfo()),
120 TII(*STI.getInstrInfo()),
121 Helper(Observer, B, /*IsPreLegalize*/ false, &VT, MDT, LI),
122 #define GET_GICOMBINER_CONSTRUCTOR_INITS
123 #include "AMDGPUGenRegBankGICombiner.inc"
124 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
125 {
126 }
127
isVgprRegBank(Register Reg) const128 bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {
129 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
130 }
131
getAsVgpr(Register Reg) const132 Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
133 if (isVgprRegBank(Reg))
134 return Reg;
135
136 // Search for existing copy of Reg to vgpr.
137 for (MachineInstr &Use : MRI.use_instructions(Reg)) {
138 Register Def = Use.getOperand(0).getReg();
139 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
140 return Def;
141 }
142
143 // Copy Reg to vgpr.
144 Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
145 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
146 return VgprReg;
147 }
148
149 AMDGPURegBankCombinerImpl::MinMaxMedOpc
getMinMaxPair(unsigned Opc) const150 AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
151 switch (Opc) {
152 default:
153 llvm_unreachable("Unsupported opcode");
154 case AMDGPU::G_SMAX:
155 case AMDGPU::G_SMIN:
156 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
157 case AMDGPU::G_UMAX:
158 case AMDGPU::G_UMIN:
159 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
160 case AMDGPU::G_FMAXNUM:
161 case AMDGPU::G_FMINNUM:
162 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
163 case AMDGPU::G_FMAXNUM_IEEE:
164 case AMDGPU::G_FMINNUM_IEEE:
165 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
166 AMDGPU::G_AMDGPU_FMED3};
167 }
168 }
169
170 template <class m_Cst, typename CstTy>
matchMed(MachineInstr & MI,MachineRegisterInfo & MRI,MinMaxMedOpc MMMOpc,Register & Val,CstTy & K0,CstTy & K1) const171 bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
172 MachineRegisterInfo &MRI,
173 MinMaxMedOpc MMMOpc, Register &Val,
174 CstTy &K0, CstTy &K1) const {
175 // 4 operand commutes of: min(max(Val, K0), K1).
176 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
177 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
178 // 4 operand commutes of: max(min(Val, K1), K0).
179 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
180 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
181 return mi_match(
182 MI, MRI,
183 m_any_of(
184 m_CommutativeBinOp(
185 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
186 m_Cst(K1)),
187 m_CommutativeBinOp(
188 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
189 m_Cst(K0))));
190 }
191
matchIntMinMaxToMed3(MachineInstr & MI,Med3MatchInfo & MatchInfo) const192 bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
193 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
194 Register Dst = MI.getOperand(0).getReg();
195 if (!isVgprRegBank(Dst))
196 return false;
197
198 // med3 for i16 is only available on gfx9+, and not available for v2i16.
199 LLT Ty = MRI.getType(Dst);
200 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
201 return false;
202
203 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
204 Register Val;
205 std::optional<ValueAndVReg> K0, K1;
206 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
207 if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
208 return false;
209
210 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
211 return false;
212 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
213 return false;
214
215 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
216 return true;
217 }
218
219 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
220 // ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
221 // ieee = false : min/max(NaN, K) = K
222 // clamp(NaN) = dx10_clamp ? 0.0 : NaN
223 // Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
224 // Other operand commutes (see matchMed) give same result since min and max are
225 // commutative.
226
227 // Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
228 // with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
229 // Val = SNaN only for ieee = true
230 // fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
231 // min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
232 // max(min(SNaN, K1), K0) = max(K1, K0) = K1
233 // Val = NaN,ieee = false or Val = QNaN,ieee = true
234 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
235 // min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
236 // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
matchFPMinMaxToMed3(MachineInstr & MI,Med3MatchInfo & MatchInfo) const237 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
238 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
239 Register Dst = MI.getOperand(0).getReg();
240 LLT Ty = MRI.getType(Dst);
241
242 // med3 for f16 is only available on gfx9+, and not available for v2f16.
243 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
244 return false;
245
246 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
247
248 Register Val;
249 std::optional<FPValueAndVReg> K0, K1;
250 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
251 if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
252 return false;
253
254 if (K0->Value > K1->Value)
255 return false;
256
257 // For IEEE=false perform combine only when it's safe to assume that there are
258 // no NaN inputs. Most often MI is marked with nnan fast math flag.
259 // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
260 // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
261 // nodes(max/min) have same behavior when one input is NaN and other isn't.
262 // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
263 // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
264 if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
265 // Don't fold single use constant that can't be inlined.
266 if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
267 (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
268 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
269 return true;
270 }
271 }
272
273 return false;
274 }
275
matchFPMinMaxToClamp(MachineInstr & MI,Register & Reg) const276 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
277 Register &Reg) const {
278 // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
279 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
280 Register Val;
281 std::optional<FPValueAndVReg> K0, K1;
282 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
283 if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
284 return false;
285
286 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
287 return false;
288
289 // For IEEE=false perform combine only when it's safe to assume that there are
290 // no NaN inputs. Most often MI is marked with nnan fast math flag.
291 // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
292 // to 0.0 requires dx10_clamp = true.
293 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
294 isKnownNeverSNaN(Val, MRI)) ||
295 isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
296 Reg = Val;
297 return true;
298 }
299
300 return false;
301 }
302
303 // Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
304 // Val = SNaN only for ieee = true. It is important which operand is NaN.
305 // min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
306 // min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
307 // min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
308 // Val = NaN,ieee = false or Val = QNaN,ieee = true
309 // min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
310 // min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
311 // min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
matchFPMed3ToClamp(MachineInstr & MI,Register & Reg) const312 bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
313 Register &Reg) const {
314 // In llvm-ir, clamp is often represented as an intrinsic call to
315 // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
316 MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
317 MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
318 MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
319
320 if (isFCst(Src0) && !isFCst(Src1))
321 std::swap(Src0, Src1);
322 if (isFCst(Src1) && !isFCst(Src2))
323 std::swap(Src1, Src2);
324 if (isFCst(Src0) && !isFCst(Src1))
325 std::swap(Src0, Src1);
326 if (!isClampZeroToOne(Src1, Src2))
327 return false;
328
329 Register Val = Src0->getOperand(0).getReg();
330
331 auto isOp3Zero = [&]() {
332 MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
333 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
334 return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
335 return false;
336 };
337 // For IEEE=false perform combine only when it's safe to assume that there are
338 // no NaN inputs. Most often MI is marked with nnan fast math flag.
339 // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
340 // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
341 if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
342 (getIEEE() && getDX10Clamp() &&
343 (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
344 Reg = Val;
345 return true;
346 }
347
348 return false;
349 }
350
applyClamp(MachineInstr & MI,Register & Reg) const351 void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,
352 Register &Reg) const {
353 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
354 MI.getFlags());
355 MI.eraseFromParent();
356 }
357
applyMed3(MachineInstr & MI,Med3MatchInfo & MatchInfo) const358 void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
359 Med3MatchInfo &MatchInfo) const {
360 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
361 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
362 getAsVgpr(MatchInfo.Val2)},
363 MI.getFlags());
364 MI.eraseFromParent();
365 }
366
applyCanonicalizeZextShiftAmt(MachineInstr & MI,MachineInstr & Ext) const367 void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
368 MachineInstr &MI, MachineInstr &Ext) const {
369 unsigned ShOpc = MI.getOpcode();
370 assert(ShOpc == AMDGPU::G_SHL || ShOpc == AMDGPU::G_LSHR ||
371 ShOpc == AMDGPU::G_ASHR);
372 assert(Ext.getOpcode() == AMDGPU::G_ZEXT);
373
374 Register AmtReg = Ext.getOperand(1).getReg();
375 Register ShDst = MI.getOperand(0).getReg();
376 Register ShSrc = MI.getOperand(1).getReg();
377
378 LLT ExtAmtTy = MRI.getType(Ext.getOperand(0).getReg());
379 LLT AmtTy = MRI.getType(AmtReg);
380
381 auto &RB = *MRI.getRegBank(AmtReg);
382
383 auto NewExt = B.buildAnyExt(ExtAmtTy, AmtReg);
384 auto Mask = B.buildConstant(
385 ExtAmtTy, maskTrailingOnes<uint64_t>(AmtTy.getScalarSizeInBits()));
386 auto And = B.buildAnd(ExtAmtTy, NewExt, Mask);
387 B.buildInstr(ShOpc, {ShDst}, {ShSrc, And});
388
389 MRI.setRegBank(NewExt.getReg(0), RB);
390 MRI.setRegBank(Mask.getReg(0), RB);
391 MRI.setRegBank(And.getReg(0), RB);
392 MI.eraseFromParent();
393 }
394
getMode() const395 SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
396 return MF.getInfo<SIMachineFunctionInfo>()->getMode();
397 }
398
getIEEE() const399 bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }
400
getDX10Clamp() const401 bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {
402 return getMode().DX10Clamp;
403 }
404
isFminnumIeee(const MachineInstr & MI) const405 bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {
406 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
407 }
408
isFCst(MachineInstr * MI) const409 bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {
410 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
411 }
412
isClampZeroToOne(MachineInstr * K0,MachineInstr * K1) const413 bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
414 MachineInstr *K1) const {
415 if (isFCst(K0) && isFCst(K1)) {
416 const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
417 const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
418 return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
419 (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
420 }
421 return false;
422 }
423
424 // Pass boilerplate
425 // ================
426
427 class AMDGPURegBankCombiner : public MachineFunctionPass {
428 public:
429 static char ID;
430
431 AMDGPURegBankCombiner(bool IsOptNone = false);
432
getPassName() const433 StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }
434
435 bool runOnMachineFunction(MachineFunction &MF) override;
436
437 void getAnalysisUsage(AnalysisUsage &AU) const override;
438
439 private:
440 bool IsOptNone;
441 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
442 };
443 } // end anonymous namespace
444
getAnalysisUsage(AnalysisUsage & AU) const445 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
446 AU.addRequired<TargetPassConfig>();
447 AU.setPreservesCFG();
448 getSelectionDAGFallbackAnalysisUsage(AU);
449 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
450 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
451 if (!IsOptNone) {
452 AU.addRequired<MachineDominatorTreeWrapperPass>();
453 AU.addPreserved<MachineDominatorTreeWrapperPass>();
454 }
455 MachineFunctionPass::getAnalysisUsage(AU);
456 }
457
AMDGPURegBankCombiner(bool IsOptNone)458 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
459 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
460 if (!RuleConfig.parseCommandLineOption())
461 report_fatal_error("Invalid rule identifier");
462 }
463
runOnMachineFunction(MachineFunction & MF)464 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
465 if (MF.getProperties().hasFailedISel())
466 return false;
467 auto *TPC = &getAnalysis<TargetPassConfig>();
468 const Function &F = MF.getFunction();
469 bool EnableOpt =
470 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
471
472 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
473 GISelValueTracking *VT =
474 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
475
476 const auto *LI = ST.getLegalizerInfo();
477 MachineDominatorTree *MDT =
478 IsOptNone ? nullptr
479 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
480
481 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
482 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
483 // Disable fixed-point iteration to reduce compile-time
484 CInfo.MaxIterations = 1;
485 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
486 // RegBankSelect seems not to leave dead instructions, so a full DCE pass is
487 // unnecessary.
488 CInfo.EnableFullDCE = false;
489 AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *VT, /*CSEInfo*/ nullptr,
490 RuleConfig, ST, MDT, LI);
491 return Impl.combineMachineInstrs();
492 }
493
494 char AMDGPURegBankCombiner::ID = 0;
495 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
496 "Combine AMDGPU machine instrs after regbankselect",
497 false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)498 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
499 INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy)
500 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
501 "Combine AMDGPU machine instrs after regbankselect", false,
502 false)
503
504 FunctionPass *llvm::createAMDGPURegBankCombiner(bool IsOptNone) {
505 return new AMDGPURegBankCombiner(IsOptNone);
506 }
507