1349cc55cSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric
9349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h"
10349cc55cSDimitry Andric #include "GCNSubtarget.h"
11349cc55cSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
125f757f3fSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
13349cc55cSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
15349cc55cSDimitry Andric #include "llvm/Target/TargetMachine.h"
16349cc55cSDimitry Andric
17349cc55cSDimitry Andric using namespace llvm;
18349cc55cSDimitry Andric using namespace MIPatternMatch;
19349cc55cSDimitry Andric
20349cc55cSDimitry Andric LLVM_READNONE
fnegFoldsIntoMI(const MachineInstr & MI)21349cc55cSDimitry Andric static bool fnegFoldsIntoMI(const MachineInstr &MI) {
22349cc55cSDimitry Andric switch (MI.getOpcode()) {
23349cc55cSDimitry Andric case AMDGPU::G_FADD:
24349cc55cSDimitry Andric case AMDGPU::G_FSUB:
25349cc55cSDimitry Andric case AMDGPU::G_FMUL:
26349cc55cSDimitry Andric case AMDGPU::G_FMA:
27349cc55cSDimitry Andric case AMDGPU::G_FMAD:
28349cc55cSDimitry Andric case AMDGPU::G_FMINNUM:
29349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM:
30349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE:
31349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE:
325f757f3fSDimitry Andric case AMDGPU::G_FMINIMUM:
335f757f3fSDimitry Andric case AMDGPU::G_FMAXIMUM:
34349cc55cSDimitry Andric case AMDGPU::G_FSIN:
35349cc55cSDimitry Andric case AMDGPU::G_FPEXT:
36349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_TRUNC:
37349cc55cSDimitry Andric case AMDGPU::G_FPTRUNC:
38349cc55cSDimitry Andric case AMDGPU::G_FRINT:
39349cc55cSDimitry Andric case AMDGPU::G_FNEARBYINT:
40349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUND:
41349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUNDEVEN:
42349cc55cSDimitry Andric case AMDGPU::G_FCANONICALIZE:
43349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_RCP_IFLAG:
44349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY:
45349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY:
46349cc55cSDimitry Andric return true;
47349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC: {
48*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
49349cc55cSDimitry Andric switch (IntrinsicID) {
50349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp:
51349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp_legacy:
52349cc55cSDimitry Andric case Intrinsic::amdgcn_sin:
53349cc55cSDimitry Andric case Intrinsic::amdgcn_fmul_legacy:
54349cc55cSDimitry Andric case Intrinsic::amdgcn_fmed3:
55349cc55cSDimitry Andric case Intrinsic::amdgcn_fma_legacy:
56349cc55cSDimitry Andric return true;
57349cc55cSDimitry Andric default:
58349cc55cSDimitry Andric return false;
59349cc55cSDimitry Andric }
60349cc55cSDimitry Andric }
61349cc55cSDimitry Andric default:
62349cc55cSDimitry Andric return false;
63349cc55cSDimitry Andric }
64349cc55cSDimitry Andric }
65349cc55cSDimitry Andric
66349cc55cSDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit
67349cc55cSDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source
68349cc55cSDimitry Andric /// modifiers.
69349cc55cSDimitry Andric LLVM_READONLY
opMustUseVOP3Encoding(const MachineInstr & MI,const MachineRegisterInfo & MRI)70349cc55cSDimitry Andric static bool opMustUseVOP3Encoding(const MachineInstr &MI,
71349cc55cSDimitry Andric const MachineRegisterInfo &MRI) {
725f757f3fSDimitry Andric return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) ||
73349cc55cSDimitry Andric MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
74349cc55cSDimitry Andric }
75349cc55cSDimitry Andric
76349cc55cSDimitry Andric // Most FP instructions support source modifiers.
77349cc55cSDimitry Andric LLVM_READONLY
hasSourceMods(const MachineInstr & MI)78349cc55cSDimitry Andric static bool hasSourceMods(const MachineInstr &MI) {
79349cc55cSDimitry Andric if (!MI.memoperands().empty())
80349cc55cSDimitry Andric return false;
81349cc55cSDimitry Andric
82349cc55cSDimitry Andric switch (MI.getOpcode()) {
83349cc55cSDimitry Andric case AMDGPU::COPY:
84349cc55cSDimitry Andric case AMDGPU::G_SELECT:
85349cc55cSDimitry Andric case AMDGPU::G_FDIV:
86349cc55cSDimitry Andric case AMDGPU::G_FREM:
87349cc55cSDimitry Andric case TargetOpcode::INLINEASM:
88349cc55cSDimitry Andric case TargetOpcode::INLINEASM_BR:
89349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
905f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
91349cc55cSDimitry Andric case AMDGPU::G_BITCAST:
92349cc55cSDimitry Andric case AMDGPU::G_ANYEXT:
93349cc55cSDimitry Andric case AMDGPU::G_BUILD_VECTOR:
94349cc55cSDimitry Andric case AMDGPU::G_BUILD_VECTOR_TRUNC:
95349cc55cSDimitry Andric case AMDGPU::G_PHI:
96349cc55cSDimitry Andric return false;
975f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC:
985f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC_CONVERGENT: {
99*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
100349cc55cSDimitry Andric switch (IntrinsicID) {
101349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p1:
102349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p2:
103349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_mov:
104349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p1_f16:
105349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p2_f16:
106349cc55cSDimitry Andric case Intrinsic::amdgcn_div_scale:
107349cc55cSDimitry Andric return false;
108349cc55cSDimitry Andric default:
109349cc55cSDimitry Andric return true;
110349cc55cSDimitry Andric }
111349cc55cSDimitry Andric }
112349cc55cSDimitry Andric default:
113349cc55cSDimitry Andric return true;
114349cc55cSDimitry Andric }
115349cc55cSDimitry Andric }
116349cc55cSDimitry Andric
allUsesHaveSourceMods(MachineInstr & MI,MachineRegisterInfo & MRI,unsigned CostThreshold=4)117349cc55cSDimitry Andric static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
118349cc55cSDimitry Andric unsigned CostThreshold = 4) {
119349cc55cSDimitry Andric // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
120349cc55cSDimitry Andric // it is truly free to use a source modifier in all cases. If there are
121349cc55cSDimitry Andric // multiple users but for each one will necessitate using VOP3, there will be
122349cc55cSDimitry Andric // a code size increase. Try to avoid increasing code size unless we know it
123349cc55cSDimitry Andric // will save on the instruction count.
124349cc55cSDimitry Andric unsigned NumMayIncreaseSize = 0;
125349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg();
126349cc55cSDimitry Andric for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
127349cc55cSDimitry Andric if (!hasSourceMods(Use))
128349cc55cSDimitry Andric return false;
129349cc55cSDimitry Andric
130349cc55cSDimitry Andric if (!opMustUseVOP3Encoding(Use, MRI)) {
131349cc55cSDimitry Andric if (++NumMayIncreaseSize > CostThreshold)
132349cc55cSDimitry Andric return false;
133349cc55cSDimitry Andric }
134349cc55cSDimitry Andric }
135349cc55cSDimitry Andric return true;
136349cc55cSDimitry Andric }
137349cc55cSDimitry Andric
mayIgnoreSignedZero(MachineInstr & MI)138349cc55cSDimitry Andric static bool mayIgnoreSignedZero(MachineInstr &MI) {
139349cc55cSDimitry Andric const TargetOptions &Options = MI.getMF()->getTarget().Options;
140349cc55cSDimitry Andric return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
141349cc55cSDimitry Andric }
142349cc55cSDimitry Andric
isInv2Pi(const APFloat & APF)143349cc55cSDimitry Andric static bool isInv2Pi(const APFloat &APF) {
144349cc55cSDimitry Andric static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
145349cc55cSDimitry Andric static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
146349cc55cSDimitry Andric static const APFloat KF64(APFloat::IEEEdouble(),
147349cc55cSDimitry Andric APInt(64, 0x3fc45f306dc9c882));
148349cc55cSDimitry Andric
149349cc55cSDimitry Andric return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
150349cc55cSDimitry Andric APF.bitwiseIsEqual(KF64);
151349cc55cSDimitry Andric }
152349cc55cSDimitry Andric
153349cc55cSDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
154349cc55cSDimitry Andric // additional cost to negate them.
isConstantCostlierToNegate(MachineInstr & MI,Register Reg,MachineRegisterInfo & MRI)155349cc55cSDimitry Andric static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
156349cc55cSDimitry Andric MachineRegisterInfo &MRI) {
157bdd1243dSDimitry Andric std::optional<FPValueAndVReg> FPValReg;
158349cc55cSDimitry Andric if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
159349cc55cSDimitry Andric if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
160349cc55cSDimitry Andric return true;
161349cc55cSDimitry Andric
162349cc55cSDimitry Andric const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
163349cc55cSDimitry Andric if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
164349cc55cSDimitry Andric return true;
165349cc55cSDimitry Andric }
166349cc55cSDimitry Andric return false;
167349cc55cSDimitry Andric }
168349cc55cSDimitry Andric
inverseMinMax(unsigned Opc)169349cc55cSDimitry Andric static unsigned inverseMinMax(unsigned Opc) {
170349cc55cSDimitry Andric switch (Opc) {
171349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM:
172349cc55cSDimitry Andric return AMDGPU::G_FMINNUM;
173349cc55cSDimitry Andric case AMDGPU::G_FMINNUM:
174349cc55cSDimitry Andric return AMDGPU::G_FMAXNUM;
175349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE:
176349cc55cSDimitry Andric return AMDGPU::G_FMINNUM_IEEE;
177349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE:
178349cc55cSDimitry Andric return AMDGPU::G_FMAXNUM_IEEE;
1795f757f3fSDimitry Andric case AMDGPU::G_FMAXIMUM:
1805f757f3fSDimitry Andric return AMDGPU::G_FMINIMUM;
1815f757f3fSDimitry Andric case AMDGPU::G_FMINIMUM:
1825f757f3fSDimitry Andric return AMDGPU::G_FMAXIMUM;
183349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY:
184349cc55cSDimitry Andric return AMDGPU::G_AMDGPU_FMIN_LEGACY;
185349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY:
186349cc55cSDimitry Andric return AMDGPU::G_AMDGPU_FMAX_LEGACY;
187349cc55cSDimitry Andric default:
188349cc55cSDimitry Andric llvm_unreachable("invalid min/max opcode");
189349cc55cSDimitry Andric }
190349cc55cSDimitry Andric }
191349cc55cSDimitry Andric
matchFoldableFneg(MachineInstr & MI,MachineInstr * & MatchInfo)192349cc55cSDimitry Andric bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
193349cc55cSDimitry Andric MachineInstr *&MatchInfo) {
194349cc55cSDimitry Andric Register Src = MI.getOperand(1).getReg();
195349cc55cSDimitry Andric MatchInfo = MRI.getVRegDef(Src);
196349cc55cSDimitry Andric
197349cc55cSDimitry Andric // If the input has multiple uses and we can either fold the negate down, or
198349cc55cSDimitry Andric // the other uses cannot, give up. This both prevents unprofitable
199349cc55cSDimitry Andric // transformations and infinite loops: we won't repeatedly try to fold around
200349cc55cSDimitry Andric // a negate that has no 'good' form.
201349cc55cSDimitry Andric if (MRI.hasOneNonDBGUse(Src)) {
202349cc55cSDimitry Andric if (allUsesHaveSourceMods(MI, MRI, 0))
203349cc55cSDimitry Andric return false;
204349cc55cSDimitry Andric } else {
205349cc55cSDimitry Andric if (fnegFoldsIntoMI(*MatchInfo) &&
206349cc55cSDimitry Andric (allUsesHaveSourceMods(MI, MRI) ||
207349cc55cSDimitry Andric !allUsesHaveSourceMods(*MatchInfo, MRI)))
208349cc55cSDimitry Andric return false;
209349cc55cSDimitry Andric }
210349cc55cSDimitry Andric
211349cc55cSDimitry Andric switch (MatchInfo->getOpcode()) {
212349cc55cSDimitry Andric case AMDGPU::G_FMINNUM:
213349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM:
214349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE:
215349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE:
2165f757f3fSDimitry Andric case AMDGPU::G_FMINIMUM:
2175f757f3fSDimitry Andric case AMDGPU::G_FMAXIMUM:
218349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY:
219349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY:
220349cc55cSDimitry Andric // 0 doesn't have a negated inline immediate.
221349cc55cSDimitry Andric return !isConstantCostlierToNegate(*MatchInfo,
222349cc55cSDimitry Andric MatchInfo->getOperand(2).getReg(), MRI);
223349cc55cSDimitry Andric case AMDGPU::G_FADD:
224349cc55cSDimitry Andric case AMDGPU::G_FSUB:
225349cc55cSDimitry Andric case AMDGPU::G_FMA:
226349cc55cSDimitry Andric case AMDGPU::G_FMAD:
227349cc55cSDimitry Andric return mayIgnoreSignedZero(*MatchInfo);
228349cc55cSDimitry Andric case AMDGPU::G_FMUL:
229349cc55cSDimitry Andric case AMDGPU::G_FPEXT:
230349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_TRUNC:
231349cc55cSDimitry Andric case AMDGPU::G_FPTRUNC:
232349cc55cSDimitry Andric case AMDGPU::G_FRINT:
233349cc55cSDimitry Andric case AMDGPU::G_FNEARBYINT:
234349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUND:
235349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUNDEVEN:
236349cc55cSDimitry Andric case AMDGPU::G_FSIN:
237349cc55cSDimitry Andric case AMDGPU::G_FCANONICALIZE:
238349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_RCP_IFLAG:
239349cc55cSDimitry Andric return true;
2405f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC:
2415f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC_CONVERGENT: {
242*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
243349cc55cSDimitry Andric switch (IntrinsicID) {
244349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp:
245349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp_legacy:
246349cc55cSDimitry Andric case Intrinsic::amdgcn_sin:
247349cc55cSDimitry Andric case Intrinsic::amdgcn_fmul_legacy:
248349cc55cSDimitry Andric case Intrinsic::amdgcn_fmed3:
249349cc55cSDimitry Andric return true;
250349cc55cSDimitry Andric case Intrinsic::amdgcn_fma_legacy:
251349cc55cSDimitry Andric return mayIgnoreSignedZero(*MatchInfo);
252349cc55cSDimitry Andric default:
253349cc55cSDimitry Andric return false;
254349cc55cSDimitry Andric }
255349cc55cSDimitry Andric }
256349cc55cSDimitry Andric default:
257349cc55cSDimitry Andric return false;
258349cc55cSDimitry Andric }
259349cc55cSDimitry Andric }
260349cc55cSDimitry Andric
applyFoldableFneg(MachineInstr & MI,MachineInstr * & MatchInfo)261349cc55cSDimitry Andric void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
262349cc55cSDimitry Andric MachineInstr *&MatchInfo) {
263349cc55cSDimitry Andric // Transform:
264349cc55cSDimitry Andric // %A = inst %Op1, ...
265349cc55cSDimitry Andric // %B = fneg %A
266349cc55cSDimitry Andric //
267349cc55cSDimitry Andric // into:
268349cc55cSDimitry Andric //
269349cc55cSDimitry Andric // (if %A has one use, specifically fneg above)
270349cc55cSDimitry Andric // %B = inst (maybe fneg %Op1), ...
271349cc55cSDimitry Andric //
272349cc55cSDimitry Andric // (if %A has multiple uses)
273349cc55cSDimitry Andric // %B = inst (maybe fneg %Op1), ...
274349cc55cSDimitry Andric // %A = fneg %B
275349cc55cSDimitry Andric
276349cc55cSDimitry Andric // Replace register in operand with a register holding negated value.
277349cc55cSDimitry Andric auto NegateOperand = [&](MachineOperand &Op) {
278349cc55cSDimitry Andric Register Reg = Op.getReg();
279349cc55cSDimitry Andric if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
280349cc55cSDimitry Andric Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
281349cc55cSDimitry Andric replaceRegOpWith(MRI, Op, Reg);
282349cc55cSDimitry Andric };
283349cc55cSDimitry Andric
284349cc55cSDimitry Andric // Replace either register in operands with a register holding negated value.
285349cc55cSDimitry Andric auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
286349cc55cSDimitry Andric Register XReg = X.getReg();
287349cc55cSDimitry Andric Register YReg = Y.getReg();
288349cc55cSDimitry Andric if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
289349cc55cSDimitry Andric replaceRegOpWith(MRI, X, XReg);
290349cc55cSDimitry Andric else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
291349cc55cSDimitry Andric replaceRegOpWith(MRI, Y, YReg);
292349cc55cSDimitry Andric else {
293349cc55cSDimitry Andric YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
294349cc55cSDimitry Andric replaceRegOpWith(MRI, Y, YReg);
295349cc55cSDimitry Andric }
296349cc55cSDimitry Andric };
297349cc55cSDimitry Andric
298349cc55cSDimitry Andric Builder.setInstrAndDebugLoc(*MatchInfo);
299349cc55cSDimitry Andric
300349cc55cSDimitry Andric // Negate appropriate operands so that resulting value of MatchInfo is
301349cc55cSDimitry Andric // negated.
302349cc55cSDimitry Andric switch (MatchInfo->getOpcode()) {
303349cc55cSDimitry Andric case AMDGPU::G_FADD:
304349cc55cSDimitry Andric case AMDGPU::G_FSUB:
305349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(1));
306349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2));
307349cc55cSDimitry Andric break;
308349cc55cSDimitry Andric case AMDGPU::G_FMUL:
309349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
310349cc55cSDimitry Andric break;
311349cc55cSDimitry Andric case AMDGPU::G_FMINNUM:
312349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM:
313349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE:
314349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE:
3155f757f3fSDimitry Andric case AMDGPU::G_FMINIMUM:
3165f757f3fSDimitry Andric case AMDGPU::G_FMAXIMUM:
317349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY:
318349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
319349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(1));
320349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2));
321349cc55cSDimitry Andric unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
322349cc55cSDimitry Andric replaceOpcodeWith(*MatchInfo, Opposite);
323349cc55cSDimitry Andric break;
324349cc55cSDimitry Andric }
325349cc55cSDimitry Andric case AMDGPU::G_FMA:
326349cc55cSDimitry Andric case AMDGPU::G_FMAD:
327349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
328349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(3));
329349cc55cSDimitry Andric break;
330349cc55cSDimitry Andric case AMDGPU::G_FPEXT:
331349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_TRUNC:
332349cc55cSDimitry Andric case AMDGPU::G_FRINT:
333349cc55cSDimitry Andric case AMDGPU::G_FNEARBYINT:
334349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUND:
335349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUNDEVEN:
336349cc55cSDimitry Andric case AMDGPU::G_FSIN:
337349cc55cSDimitry Andric case AMDGPU::G_FCANONICALIZE:
338349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_RCP_IFLAG:
339349cc55cSDimitry Andric case AMDGPU::G_FPTRUNC:
340349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(1));
341349cc55cSDimitry Andric break;
3425f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC:
3435f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC_CONVERGENT: {
344*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
345349cc55cSDimitry Andric switch (IntrinsicID) {
346349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp:
347349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp_legacy:
348349cc55cSDimitry Andric case Intrinsic::amdgcn_sin:
349349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2));
350349cc55cSDimitry Andric break;
351349cc55cSDimitry Andric case Intrinsic::amdgcn_fmul_legacy:
352349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
353349cc55cSDimitry Andric break;
354349cc55cSDimitry Andric case Intrinsic::amdgcn_fmed3:
355349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2));
356349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(3));
357349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(4));
358349cc55cSDimitry Andric break;
359349cc55cSDimitry Andric case Intrinsic::amdgcn_fma_legacy:
360349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
361349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(4));
362349cc55cSDimitry Andric break;
363349cc55cSDimitry Andric default:
364349cc55cSDimitry Andric llvm_unreachable("folding fneg not supported for this intrinsic");
365349cc55cSDimitry Andric }
366349cc55cSDimitry Andric break;
367349cc55cSDimitry Andric }
368349cc55cSDimitry Andric default:
369349cc55cSDimitry Andric llvm_unreachable("folding fneg not supported for this instruction");
370349cc55cSDimitry Andric }
371349cc55cSDimitry Andric
372349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg();
373349cc55cSDimitry Andric Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
374349cc55cSDimitry Andric
375349cc55cSDimitry Andric if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
376349cc55cSDimitry Andric // MatchInfo now has negated value so use that instead of old Dst.
377349cc55cSDimitry Andric replaceRegWith(MRI, Dst, MatchInfoDst);
378349cc55cSDimitry Andric } else {
379349cc55cSDimitry Andric // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
380349cc55cSDimitry Andric // but replaceRegWith will replace defs as well. It is easier to replace one
381349cc55cSDimitry Andric // def with a new register.
382349cc55cSDimitry Andric LLT Type = MRI.getType(Dst);
383349cc55cSDimitry Andric Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
384349cc55cSDimitry Andric replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
385349cc55cSDimitry Andric
386349cc55cSDimitry Andric // MatchInfo now has negated value so use that instead of old Dst.
387349cc55cSDimitry Andric replaceRegWith(MRI, Dst, NegatedMatchInfo);
388349cc55cSDimitry Andric
389349cc55cSDimitry Andric // Recreate non negated value for other uses of old MatchInfoDst
39081ad6265SDimitry Andric auto NextInst = ++MatchInfo->getIterator();
39181ad6265SDimitry Andric Builder.setInstrAndDebugLoc(*NextInst);
392349cc55cSDimitry Andric Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
393349cc55cSDimitry Andric }
394349cc55cSDimitry Andric
395349cc55cSDimitry Andric MI.eraseFromParent();
396349cc55cSDimitry Andric }
39706c3fb27SDimitry Andric
39806c3fb27SDimitry Andric // TODO: Should return converted value / extension source and avoid introducing
39906c3fb27SDimitry Andric // intermediate fptruncs in the apply function.
isFPExtFromF16OrConst(const MachineRegisterInfo & MRI,Register Reg)40006c3fb27SDimitry Andric static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI,
40106c3fb27SDimitry Andric Register Reg) {
40206c3fb27SDimitry Andric const MachineInstr *Def = MRI.getVRegDef(Reg);
40306c3fb27SDimitry Andric if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
40406c3fb27SDimitry Andric Register SrcReg = Def->getOperand(1).getReg();
40506c3fb27SDimitry Andric return MRI.getType(SrcReg) == LLT::scalar(16);
40606c3fb27SDimitry Andric }
40706c3fb27SDimitry Andric
40806c3fb27SDimitry Andric if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
40906c3fb27SDimitry Andric APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
41006c3fb27SDimitry Andric bool LosesInfo = true;
41106c3fb27SDimitry Andric Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo);
41206c3fb27SDimitry Andric return !LosesInfo;
41306c3fb27SDimitry Andric }
41406c3fb27SDimitry Andric
41506c3fb27SDimitry Andric return false;
41606c3fb27SDimitry Andric }
41706c3fb27SDimitry Andric
matchExpandPromotedF16FMed3(MachineInstr & MI,Register Src0,Register Src1,Register Src2)41806c3fb27SDimitry Andric bool AMDGPUCombinerHelper::matchExpandPromotedF16FMed3(MachineInstr &MI,
41906c3fb27SDimitry Andric Register Src0,
42006c3fb27SDimitry Andric Register Src1,
42106c3fb27SDimitry Andric Register Src2) {
42206c3fb27SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);
42306c3fb27SDimitry Andric Register SrcReg = MI.getOperand(1).getReg();
42406c3fb27SDimitry Andric if (!MRI.hasOneNonDBGUse(SrcReg) || MRI.getType(SrcReg) != LLT::scalar(32))
42506c3fb27SDimitry Andric return false;
42606c3fb27SDimitry Andric
42706c3fb27SDimitry Andric return isFPExtFromF16OrConst(MRI, Src0) && isFPExtFromF16OrConst(MRI, Src1) &&
42806c3fb27SDimitry Andric isFPExtFromF16OrConst(MRI, Src2);
42906c3fb27SDimitry Andric }
43006c3fb27SDimitry Andric
applyExpandPromotedF16FMed3(MachineInstr & MI,Register Src0,Register Src1,Register Src2)43106c3fb27SDimitry Andric void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
43206c3fb27SDimitry Andric Register Src0,
43306c3fb27SDimitry Andric Register Src1,
43406c3fb27SDimitry Andric Register Src2) {
43506c3fb27SDimitry Andric // We expect fptrunc (fpext x) to fold out, and to constant fold any constant
43606c3fb27SDimitry Andric // sources.
43706c3fb27SDimitry Andric Src0 = Builder.buildFPTrunc(LLT::scalar(16), Src0).getReg(0);
43806c3fb27SDimitry Andric Src1 = Builder.buildFPTrunc(LLT::scalar(16), Src1).getReg(0);
43906c3fb27SDimitry Andric Src2 = Builder.buildFPTrunc(LLT::scalar(16), Src2).getReg(0);
44006c3fb27SDimitry Andric
44106c3fb27SDimitry Andric LLT Ty = MRI.getType(Src0);
44206c3fb27SDimitry Andric auto A1 = Builder.buildFMinNumIEEE(Ty, Src0, Src1);
44306c3fb27SDimitry Andric auto B1 = Builder.buildFMaxNumIEEE(Ty, Src0, Src1);
44406c3fb27SDimitry Andric auto C1 = Builder.buildFMaxNumIEEE(Ty, A1, Src2);
44506c3fb27SDimitry Andric Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
44606c3fb27SDimitry Andric MI.eraseFromParent();
44706c3fb27SDimitry Andric }
448