xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
1*349cc55cSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
2*349cc55cSDimitry Andric //
3*349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*349cc55cSDimitry Andric //
7*349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8*349cc55cSDimitry Andric 
9*349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h"
10*349cc55cSDimitry Andric #include "GCNSubtarget.h"
11*349cc55cSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12*349cc55cSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
13*349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
14*349cc55cSDimitry Andric #include "llvm/Target/TargetMachine.h"
15*349cc55cSDimitry Andric 
16*349cc55cSDimitry Andric using namespace llvm;
17*349cc55cSDimitry Andric using namespace MIPatternMatch;
18*349cc55cSDimitry Andric 
19*349cc55cSDimitry Andric LLVM_READNONE
20*349cc55cSDimitry Andric static bool fnegFoldsIntoMI(const MachineInstr &MI) {
21*349cc55cSDimitry Andric   switch (MI.getOpcode()) {
22*349cc55cSDimitry Andric   case AMDGPU::G_FADD:
23*349cc55cSDimitry Andric   case AMDGPU::G_FSUB:
24*349cc55cSDimitry Andric   case AMDGPU::G_FMUL:
25*349cc55cSDimitry Andric   case AMDGPU::G_FMA:
26*349cc55cSDimitry Andric   case AMDGPU::G_FMAD:
27*349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM:
28*349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM:
29*349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM_IEEE:
30*349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM_IEEE:
31*349cc55cSDimitry Andric   case AMDGPU::G_FSIN:
32*349cc55cSDimitry Andric   case AMDGPU::G_FPEXT:
33*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_TRUNC:
34*349cc55cSDimitry Andric   case AMDGPU::G_FPTRUNC:
35*349cc55cSDimitry Andric   case AMDGPU::G_FRINT:
36*349cc55cSDimitry Andric   case AMDGPU::G_FNEARBYINT:
37*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUND:
38*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUNDEVEN:
39*349cc55cSDimitry Andric   case AMDGPU::G_FCANONICALIZE:
40*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_RCP_IFLAG:
41*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
42*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMAX_LEGACY:
43*349cc55cSDimitry Andric     return true;
44*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC: {
45*349cc55cSDimitry Andric     unsigned IntrinsicID = MI.getIntrinsicID();
46*349cc55cSDimitry Andric     switch (IntrinsicID) {
47*349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp:
48*349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp_legacy:
49*349cc55cSDimitry Andric     case Intrinsic::amdgcn_sin:
50*349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmul_legacy:
51*349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmed3:
52*349cc55cSDimitry Andric     case Intrinsic::amdgcn_fma_legacy:
53*349cc55cSDimitry Andric       return true;
54*349cc55cSDimitry Andric     default:
55*349cc55cSDimitry Andric       return false;
56*349cc55cSDimitry Andric     }
57*349cc55cSDimitry Andric   }
58*349cc55cSDimitry Andric   default:
59*349cc55cSDimitry Andric     return false;
60*349cc55cSDimitry Andric   }
61*349cc55cSDimitry Andric }
62*349cc55cSDimitry Andric 
63*349cc55cSDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit
64*349cc55cSDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source
65*349cc55cSDimitry Andric /// modifiers.
66*349cc55cSDimitry Andric LLVM_READONLY
67*349cc55cSDimitry Andric static bool opMustUseVOP3Encoding(const MachineInstr &MI,
68*349cc55cSDimitry Andric                                   const MachineRegisterInfo &MRI) {
69*349cc55cSDimitry Andric   return MI.getNumOperands() >
70*349cc55cSDimitry Andric              (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
71*349cc55cSDimitry Andric          MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
72*349cc55cSDimitry Andric }
73*349cc55cSDimitry Andric 
74*349cc55cSDimitry Andric // Most FP instructions support source modifiers.
75*349cc55cSDimitry Andric LLVM_READONLY
76*349cc55cSDimitry Andric static bool hasSourceMods(const MachineInstr &MI) {
77*349cc55cSDimitry Andric   if (!MI.memoperands().empty())
78*349cc55cSDimitry Andric     return false;
79*349cc55cSDimitry Andric 
80*349cc55cSDimitry Andric   switch (MI.getOpcode()) {
81*349cc55cSDimitry Andric   case AMDGPU::COPY:
82*349cc55cSDimitry Andric   case AMDGPU::G_SELECT:
83*349cc55cSDimitry Andric   case AMDGPU::G_FDIV:
84*349cc55cSDimitry Andric   case AMDGPU::G_FREM:
85*349cc55cSDimitry Andric   case TargetOpcode::INLINEASM:
86*349cc55cSDimitry Andric   case TargetOpcode::INLINEASM_BR:
87*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
88*349cc55cSDimitry Andric   case AMDGPU::G_BITCAST:
89*349cc55cSDimitry Andric   case AMDGPU::G_ANYEXT:
90*349cc55cSDimitry Andric   case AMDGPU::G_BUILD_VECTOR:
91*349cc55cSDimitry Andric   case AMDGPU::G_BUILD_VECTOR_TRUNC:
92*349cc55cSDimitry Andric   case AMDGPU::G_PHI:
93*349cc55cSDimitry Andric     return false;
94*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC: {
95*349cc55cSDimitry Andric     unsigned IntrinsicID = MI.getIntrinsicID();
96*349cc55cSDimitry Andric     switch (IntrinsicID) {
97*349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_p1:
98*349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_p2:
99*349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_mov:
100*349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_p1_f16:
101*349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_p2_f16:
102*349cc55cSDimitry Andric     case Intrinsic::amdgcn_div_scale:
103*349cc55cSDimitry Andric       return false;
104*349cc55cSDimitry Andric     default:
105*349cc55cSDimitry Andric       return true;
106*349cc55cSDimitry Andric     }
107*349cc55cSDimitry Andric   }
108*349cc55cSDimitry Andric   default:
109*349cc55cSDimitry Andric     return true;
110*349cc55cSDimitry Andric   }
111*349cc55cSDimitry Andric }
112*349cc55cSDimitry Andric 
113*349cc55cSDimitry Andric static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
114*349cc55cSDimitry Andric                                   unsigned CostThreshold = 4) {
115*349cc55cSDimitry Andric   // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
116*349cc55cSDimitry Andric   // it is truly free to use a source modifier in all cases. If there are
117*349cc55cSDimitry Andric   // multiple users but for each one will necessitate using VOP3, there will be
118*349cc55cSDimitry Andric   // a code size increase. Try to avoid increasing code size unless we know it
119*349cc55cSDimitry Andric   // will save on the instruction count.
120*349cc55cSDimitry Andric   unsigned NumMayIncreaseSize = 0;
121*349cc55cSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
122*349cc55cSDimitry Andric   for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
123*349cc55cSDimitry Andric     if (!hasSourceMods(Use))
124*349cc55cSDimitry Andric       return false;
125*349cc55cSDimitry Andric 
126*349cc55cSDimitry Andric     if (!opMustUseVOP3Encoding(Use, MRI)) {
127*349cc55cSDimitry Andric       if (++NumMayIncreaseSize > CostThreshold)
128*349cc55cSDimitry Andric         return false;
129*349cc55cSDimitry Andric     }
130*349cc55cSDimitry Andric   }
131*349cc55cSDimitry Andric   return true;
132*349cc55cSDimitry Andric }
133*349cc55cSDimitry Andric 
134*349cc55cSDimitry Andric static bool mayIgnoreSignedZero(MachineInstr &MI) {
135*349cc55cSDimitry Andric   const TargetOptions &Options = MI.getMF()->getTarget().Options;
136*349cc55cSDimitry Andric   return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
137*349cc55cSDimitry Andric }
138*349cc55cSDimitry Andric 
139*349cc55cSDimitry Andric static bool isInv2Pi(const APFloat &APF) {
140*349cc55cSDimitry Andric   static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
141*349cc55cSDimitry Andric   static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
142*349cc55cSDimitry Andric   static const APFloat KF64(APFloat::IEEEdouble(),
143*349cc55cSDimitry Andric                             APInt(64, 0x3fc45f306dc9c882));
144*349cc55cSDimitry Andric 
145*349cc55cSDimitry Andric   return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
146*349cc55cSDimitry Andric          APF.bitwiseIsEqual(KF64);
147*349cc55cSDimitry Andric }
148*349cc55cSDimitry Andric 
149*349cc55cSDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
150*349cc55cSDimitry Andric // additional cost to negate them.
151*349cc55cSDimitry Andric static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
152*349cc55cSDimitry Andric                                        MachineRegisterInfo &MRI) {
153*349cc55cSDimitry Andric   Optional<FPValueAndVReg> FPValReg;
154*349cc55cSDimitry Andric   if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
155*349cc55cSDimitry Andric     if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
156*349cc55cSDimitry Andric       return true;
157*349cc55cSDimitry Andric 
158*349cc55cSDimitry Andric     const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
159*349cc55cSDimitry Andric     if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
160*349cc55cSDimitry Andric       return true;
161*349cc55cSDimitry Andric   }
162*349cc55cSDimitry Andric   return false;
163*349cc55cSDimitry Andric }
164*349cc55cSDimitry Andric 
165*349cc55cSDimitry Andric static unsigned inverseMinMax(unsigned Opc) {
166*349cc55cSDimitry Andric   switch (Opc) {
167*349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM:
168*349cc55cSDimitry Andric     return AMDGPU::G_FMINNUM;
169*349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM:
170*349cc55cSDimitry Andric     return AMDGPU::G_FMAXNUM;
171*349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM_IEEE:
172*349cc55cSDimitry Andric     return AMDGPU::G_FMINNUM_IEEE;
173*349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM_IEEE:
174*349cc55cSDimitry Andric     return AMDGPU::G_FMAXNUM_IEEE;
175*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMAX_LEGACY:
176*349cc55cSDimitry Andric     return AMDGPU::G_AMDGPU_FMIN_LEGACY;
177*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
178*349cc55cSDimitry Andric     return AMDGPU::G_AMDGPU_FMAX_LEGACY;
179*349cc55cSDimitry Andric   default:
180*349cc55cSDimitry Andric     llvm_unreachable("invalid min/max opcode");
181*349cc55cSDimitry Andric   }
182*349cc55cSDimitry Andric }
183*349cc55cSDimitry Andric 
184*349cc55cSDimitry Andric bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
185*349cc55cSDimitry Andric                                              MachineInstr *&MatchInfo) {
186*349cc55cSDimitry Andric   Register Src = MI.getOperand(1).getReg();
187*349cc55cSDimitry Andric   MatchInfo = MRI.getVRegDef(Src);
188*349cc55cSDimitry Andric 
189*349cc55cSDimitry Andric   // If the input has multiple uses and we can either fold the negate down, or
190*349cc55cSDimitry Andric   // the other uses cannot, give up. This both prevents unprofitable
191*349cc55cSDimitry Andric   // transformations and infinite loops: we won't repeatedly try to fold around
192*349cc55cSDimitry Andric   // a negate that has no 'good' form.
193*349cc55cSDimitry Andric   if (MRI.hasOneNonDBGUse(Src)) {
194*349cc55cSDimitry Andric     if (allUsesHaveSourceMods(MI, MRI, 0))
195*349cc55cSDimitry Andric       return false;
196*349cc55cSDimitry Andric   } else {
197*349cc55cSDimitry Andric     if (fnegFoldsIntoMI(*MatchInfo) &&
198*349cc55cSDimitry Andric         (allUsesHaveSourceMods(MI, MRI) ||
199*349cc55cSDimitry Andric          !allUsesHaveSourceMods(*MatchInfo, MRI)))
200*349cc55cSDimitry Andric       return false;
201*349cc55cSDimitry Andric   }
202*349cc55cSDimitry Andric 
203*349cc55cSDimitry Andric   switch (MatchInfo->getOpcode()) {
204*349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM:
205*349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM:
206*349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM_IEEE:
207*349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM_IEEE:
208*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
209*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMAX_LEGACY:
210*349cc55cSDimitry Andric     // 0 doesn't have a negated inline immediate.
211*349cc55cSDimitry Andric     return !isConstantCostlierToNegate(*MatchInfo,
212*349cc55cSDimitry Andric                                        MatchInfo->getOperand(2).getReg(), MRI);
213*349cc55cSDimitry Andric   case AMDGPU::G_FADD:
214*349cc55cSDimitry Andric   case AMDGPU::G_FSUB:
215*349cc55cSDimitry Andric   case AMDGPU::G_FMA:
216*349cc55cSDimitry Andric   case AMDGPU::G_FMAD:
217*349cc55cSDimitry Andric     return mayIgnoreSignedZero(*MatchInfo);
218*349cc55cSDimitry Andric   case AMDGPU::G_FMUL:
219*349cc55cSDimitry Andric   case AMDGPU::G_FPEXT:
220*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_TRUNC:
221*349cc55cSDimitry Andric   case AMDGPU::G_FPTRUNC:
222*349cc55cSDimitry Andric   case AMDGPU::G_FRINT:
223*349cc55cSDimitry Andric   case AMDGPU::G_FNEARBYINT:
224*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUND:
225*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUNDEVEN:
226*349cc55cSDimitry Andric   case AMDGPU::G_FSIN:
227*349cc55cSDimitry Andric   case AMDGPU::G_FCANONICALIZE:
228*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_RCP_IFLAG:
229*349cc55cSDimitry Andric     return true;
230*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC: {
231*349cc55cSDimitry Andric     unsigned IntrinsicID = MatchInfo->getIntrinsicID();
232*349cc55cSDimitry Andric     switch (IntrinsicID) {
233*349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp:
234*349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp_legacy:
235*349cc55cSDimitry Andric     case Intrinsic::amdgcn_sin:
236*349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmul_legacy:
237*349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmed3:
238*349cc55cSDimitry Andric       return true;
239*349cc55cSDimitry Andric     case Intrinsic::amdgcn_fma_legacy:
240*349cc55cSDimitry Andric       return mayIgnoreSignedZero(*MatchInfo);
241*349cc55cSDimitry Andric     default:
242*349cc55cSDimitry Andric       return false;
243*349cc55cSDimitry Andric     }
244*349cc55cSDimitry Andric   }
245*349cc55cSDimitry Andric   default:
246*349cc55cSDimitry Andric     return false;
247*349cc55cSDimitry Andric   }
248*349cc55cSDimitry Andric }
249*349cc55cSDimitry Andric 
250*349cc55cSDimitry Andric void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
251*349cc55cSDimitry Andric                                              MachineInstr *&MatchInfo) {
252*349cc55cSDimitry Andric   // Transform:
253*349cc55cSDimitry Andric   // %A = inst %Op1, ...
254*349cc55cSDimitry Andric   // %B = fneg %A
255*349cc55cSDimitry Andric   //
256*349cc55cSDimitry Andric   // into:
257*349cc55cSDimitry Andric   //
258*349cc55cSDimitry Andric   // (if %A has one use, specifically fneg above)
259*349cc55cSDimitry Andric   // %B = inst (maybe fneg %Op1), ...
260*349cc55cSDimitry Andric   //
261*349cc55cSDimitry Andric   // (if %A has multiple uses)
262*349cc55cSDimitry Andric   // %B = inst (maybe fneg %Op1), ...
263*349cc55cSDimitry Andric   // %A = fneg %B
264*349cc55cSDimitry Andric 
265*349cc55cSDimitry Andric   // Replace register in operand with a register holding negated value.
266*349cc55cSDimitry Andric   auto NegateOperand = [&](MachineOperand &Op) {
267*349cc55cSDimitry Andric     Register Reg = Op.getReg();
268*349cc55cSDimitry Andric     if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
269*349cc55cSDimitry Andric       Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
270*349cc55cSDimitry Andric     replaceRegOpWith(MRI, Op, Reg);
271*349cc55cSDimitry Andric   };
272*349cc55cSDimitry Andric 
273*349cc55cSDimitry Andric   // Replace either register in operands with a register holding negated value.
274*349cc55cSDimitry Andric   auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
275*349cc55cSDimitry Andric     Register XReg = X.getReg();
276*349cc55cSDimitry Andric     Register YReg = Y.getReg();
277*349cc55cSDimitry Andric     if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
278*349cc55cSDimitry Andric       replaceRegOpWith(MRI, X, XReg);
279*349cc55cSDimitry Andric     else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
280*349cc55cSDimitry Andric       replaceRegOpWith(MRI, Y, YReg);
281*349cc55cSDimitry Andric     else {
282*349cc55cSDimitry Andric       YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
283*349cc55cSDimitry Andric       replaceRegOpWith(MRI, Y, YReg);
284*349cc55cSDimitry Andric     }
285*349cc55cSDimitry Andric   };
286*349cc55cSDimitry Andric 
287*349cc55cSDimitry Andric   Builder.setInstrAndDebugLoc(*MatchInfo);
288*349cc55cSDimitry Andric 
289*349cc55cSDimitry Andric   // Negate appropriate operands so that resulting value of MatchInfo is
290*349cc55cSDimitry Andric   // negated.
291*349cc55cSDimitry Andric   switch (MatchInfo->getOpcode()) {
292*349cc55cSDimitry Andric   case AMDGPU::G_FADD:
293*349cc55cSDimitry Andric   case AMDGPU::G_FSUB:
294*349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(1));
295*349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(2));
296*349cc55cSDimitry Andric     break;
297*349cc55cSDimitry Andric   case AMDGPU::G_FMUL:
298*349cc55cSDimitry Andric     NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
299*349cc55cSDimitry Andric     break;
300*349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM:
301*349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM:
302*349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM_IEEE:
303*349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM_IEEE:
304*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
305*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
306*349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(1));
307*349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(2));
308*349cc55cSDimitry Andric     unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
309*349cc55cSDimitry Andric     replaceOpcodeWith(*MatchInfo, Opposite);
310*349cc55cSDimitry Andric     break;
311*349cc55cSDimitry Andric   }
312*349cc55cSDimitry Andric   case AMDGPU::G_FMA:
313*349cc55cSDimitry Andric   case AMDGPU::G_FMAD:
314*349cc55cSDimitry Andric     NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
315*349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(3));
316*349cc55cSDimitry Andric     break;
317*349cc55cSDimitry Andric   case AMDGPU::G_FPEXT:
318*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_TRUNC:
319*349cc55cSDimitry Andric   case AMDGPU::G_FRINT:
320*349cc55cSDimitry Andric   case AMDGPU::G_FNEARBYINT:
321*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUND:
322*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUNDEVEN:
323*349cc55cSDimitry Andric   case AMDGPU::G_FSIN:
324*349cc55cSDimitry Andric   case AMDGPU::G_FCANONICALIZE:
325*349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_RCP_IFLAG:
326*349cc55cSDimitry Andric   case AMDGPU::G_FPTRUNC:
327*349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(1));
328*349cc55cSDimitry Andric     break;
329*349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC: {
330*349cc55cSDimitry Andric     unsigned IntrinsicID = MatchInfo->getIntrinsicID();
331*349cc55cSDimitry Andric     switch (IntrinsicID) {
332*349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp:
333*349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp_legacy:
334*349cc55cSDimitry Andric     case Intrinsic::amdgcn_sin:
335*349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(2));
336*349cc55cSDimitry Andric       break;
337*349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmul_legacy:
338*349cc55cSDimitry Andric       NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
339*349cc55cSDimitry Andric       break;
340*349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmed3:
341*349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(2));
342*349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(3));
343*349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(4));
344*349cc55cSDimitry Andric       break;
345*349cc55cSDimitry Andric     case Intrinsic::amdgcn_fma_legacy:
346*349cc55cSDimitry Andric       NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
347*349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(4));
348*349cc55cSDimitry Andric       break;
349*349cc55cSDimitry Andric     default:
350*349cc55cSDimitry Andric       llvm_unreachable("folding fneg not supported for this intrinsic");
351*349cc55cSDimitry Andric     }
352*349cc55cSDimitry Andric     break;
353*349cc55cSDimitry Andric   }
354*349cc55cSDimitry Andric   default:
355*349cc55cSDimitry Andric     llvm_unreachable("folding fneg not supported for this instruction");
356*349cc55cSDimitry Andric   }
357*349cc55cSDimitry Andric 
358*349cc55cSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
359*349cc55cSDimitry Andric   Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
360*349cc55cSDimitry Andric 
361*349cc55cSDimitry Andric   if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
362*349cc55cSDimitry Andric     // MatchInfo now has negated value so use that instead of old Dst.
363*349cc55cSDimitry Andric     replaceRegWith(MRI, Dst, MatchInfoDst);
364*349cc55cSDimitry Andric   } else {
365*349cc55cSDimitry Andric     // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
366*349cc55cSDimitry Andric     // but replaceRegWith will replace defs as well. It is easier to replace one
367*349cc55cSDimitry Andric     // def with a new register.
368*349cc55cSDimitry Andric     LLT Type = MRI.getType(Dst);
369*349cc55cSDimitry Andric     Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
370*349cc55cSDimitry Andric     replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
371*349cc55cSDimitry Andric 
372*349cc55cSDimitry Andric     // MatchInfo now has negated value so use that instead of old Dst.
373*349cc55cSDimitry Andric     replaceRegWith(MRI, Dst, NegatedMatchInfo);
374*349cc55cSDimitry Andric 
375*349cc55cSDimitry Andric     // Recreate non negated value for other uses of old MatchInfoDst
376*349cc55cSDimitry Andric     Builder.setInstrAndDebugLoc(MI);
377*349cc55cSDimitry Andric     Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
378*349cc55cSDimitry Andric   }
379*349cc55cSDimitry Andric 
380*349cc55cSDimitry Andric   MI.eraseFromParent();
381*349cc55cSDimitry Andric   return;
382*349cc55cSDimitry Andric }
383