1*349cc55cSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===// 2*349cc55cSDimitry Andric // 3*349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*349cc55cSDimitry Andric // 7*349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8*349cc55cSDimitry Andric 9*349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h" 10*349cc55cSDimitry Andric #include "GCNSubtarget.h" 11*349cc55cSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12*349cc55cSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 13*349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 14*349cc55cSDimitry Andric #include "llvm/Target/TargetMachine.h" 15*349cc55cSDimitry Andric 16*349cc55cSDimitry Andric using namespace llvm; 17*349cc55cSDimitry Andric using namespace MIPatternMatch; 18*349cc55cSDimitry Andric 19*349cc55cSDimitry Andric LLVM_READNONE 20*349cc55cSDimitry Andric static bool fnegFoldsIntoMI(const MachineInstr &MI) { 21*349cc55cSDimitry Andric switch (MI.getOpcode()) { 22*349cc55cSDimitry Andric case AMDGPU::G_FADD: 23*349cc55cSDimitry Andric case AMDGPU::G_FSUB: 24*349cc55cSDimitry Andric case AMDGPU::G_FMUL: 25*349cc55cSDimitry Andric case AMDGPU::G_FMA: 26*349cc55cSDimitry Andric case AMDGPU::G_FMAD: 27*349cc55cSDimitry Andric case AMDGPU::G_FMINNUM: 28*349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM: 29*349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE: 30*349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE: 31*349cc55cSDimitry Andric case AMDGPU::G_FSIN: 32*349cc55cSDimitry Andric case AMDGPU::G_FPEXT: 33*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_TRUNC: 34*349cc55cSDimitry Andric case AMDGPU::G_FPTRUNC: 35*349cc55cSDimitry Andric case AMDGPU::G_FRINT: 36*349cc55cSDimitry Andric case AMDGPU::G_FNEARBYINT: 37*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUND: 38*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUNDEVEN: 39*349cc55cSDimitry Andric case AMDGPU::G_FCANONICALIZE: 40*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_RCP_IFLAG: 41*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY: 42*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY: 43*349cc55cSDimitry Andric return true; 44*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC: { 45*349cc55cSDimitry Andric unsigned IntrinsicID = MI.getIntrinsicID(); 46*349cc55cSDimitry Andric switch (IntrinsicID) { 47*349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp: 48*349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 49*349cc55cSDimitry Andric case Intrinsic::amdgcn_sin: 50*349cc55cSDimitry Andric case Intrinsic::amdgcn_fmul_legacy: 51*349cc55cSDimitry Andric case Intrinsic::amdgcn_fmed3: 52*349cc55cSDimitry Andric case Intrinsic::amdgcn_fma_legacy: 53*349cc55cSDimitry Andric return true; 54*349cc55cSDimitry Andric default: 55*349cc55cSDimitry Andric return false; 56*349cc55cSDimitry Andric } 57*349cc55cSDimitry Andric } 58*349cc55cSDimitry Andric default: 59*349cc55cSDimitry Andric return false; 60*349cc55cSDimitry Andric } 61*349cc55cSDimitry Andric } 62*349cc55cSDimitry Andric 63*349cc55cSDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit 64*349cc55cSDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source 65*349cc55cSDimitry Andric /// modifiers. 66*349cc55cSDimitry Andric LLVM_READONLY 67*349cc55cSDimitry Andric static bool opMustUseVOP3Encoding(const MachineInstr &MI, 68*349cc55cSDimitry Andric const MachineRegisterInfo &MRI) { 69*349cc55cSDimitry Andric return MI.getNumOperands() > 70*349cc55cSDimitry Andric (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) || 71*349cc55cSDimitry Andric MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64; 72*349cc55cSDimitry Andric } 73*349cc55cSDimitry Andric 74*349cc55cSDimitry Andric // Most FP instructions support source modifiers. 75*349cc55cSDimitry Andric LLVM_READONLY 76*349cc55cSDimitry Andric static bool hasSourceMods(const MachineInstr &MI) { 77*349cc55cSDimitry Andric if (!MI.memoperands().empty()) 78*349cc55cSDimitry Andric return false; 79*349cc55cSDimitry Andric 80*349cc55cSDimitry Andric switch (MI.getOpcode()) { 81*349cc55cSDimitry Andric case AMDGPU::COPY: 82*349cc55cSDimitry Andric case AMDGPU::G_SELECT: 83*349cc55cSDimitry Andric case AMDGPU::G_FDIV: 84*349cc55cSDimitry Andric case AMDGPU::G_FREM: 85*349cc55cSDimitry Andric case TargetOpcode::INLINEASM: 86*349cc55cSDimitry Andric case TargetOpcode::INLINEASM_BR: 87*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: 88*349cc55cSDimitry Andric case AMDGPU::G_BITCAST: 89*349cc55cSDimitry Andric case AMDGPU::G_ANYEXT: 90*349cc55cSDimitry Andric case AMDGPU::G_BUILD_VECTOR: 91*349cc55cSDimitry Andric case AMDGPU::G_BUILD_VECTOR_TRUNC: 92*349cc55cSDimitry Andric case AMDGPU::G_PHI: 93*349cc55cSDimitry Andric return false; 94*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC: { 95*349cc55cSDimitry Andric unsigned IntrinsicID = MI.getIntrinsicID(); 96*349cc55cSDimitry Andric switch (IntrinsicID) { 97*349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p1: 98*349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p2: 99*349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_mov: 100*349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p1_f16: 101*349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p2_f16: 102*349cc55cSDimitry Andric case Intrinsic::amdgcn_div_scale: 103*349cc55cSDimitry Andric return false; 104*349cc55cSDimitry Andric default: 105*349cc55cSDimitry Andric return true; 106*349cc55cSDimitry Andric } 107*349cc55cSDimitry Andric } 108*349cc55cSDimitry Andric default: 109*349cc55cSDimitry Andric return true; 110*349cc55cSDimitry Andric } 111*349cc55cSDimitry Andric } 112*349cc55cSDimitry Andric 113*349cc55cSDimitry Andric static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, 114*349cc55cSDimitry Andric unsigned CostThreshold = 4) { 115*349cc55cSDimitry Andric // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus 116*349cc55cSDimitry Andric // it is truly free to use a source modifier in all cases. If there are 117*349cc55cSDimitry Andric // multiple users but for each one will necessitate using VOP3, there will be 118*349cc55cSDimitry Andric // a code size increase. Try to avoid increasing code size unless we know it 119*349cc55cSDimitry Andric // will save on the instruction count. 120*349cc55cSDimitry Andric unsigned NumMayIncreaseSize = 0; 121*349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 122*349cc55cSDimitry Andric for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) { 123*349cc55cSDimitry Andric if (!hasSourceMods(Use)) 124*349cc55cSDimitry Andric return false; 125*349cc55cSDimitry Andric 126*349cc55cSDimitry Andric if (!opMustUseVOP3Encoding(Use, MRI)) { 127*349cc55cSDimitry Andric if (++NumMayIncreaseSize > CostThreshold) 128*349cc55cSDimitry Andric return false; 129*349cc55cSDimitry Andric } 130*349cc55cSDimitry Andric } 131*349cc55cSDimitry Andric return true; 132*349cc55cSDimitry Andric } 133*349cc55cSDimitry Andric 134*349cc55cSDimitry Andric static bool mayIgnoreSignedZero(MachineInstr &MI) { 135*349cc55cSDimitry Andric const TargetOptions &Options = MI.getMF()->getTarget().Options; 136*349cc55cSDimitry Andric return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz); 137*349cc55cSDimitry Andric } 138*349cc55cSDimitry Andric 139*349cc55cSDimitry Andric static bool isInv2Pi(const APFloat &APF) { 140*349cc55cSDimitry Andric static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118)); 141*349cc55cSDimitry Andric static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983)); 142*349cc55cSDimitry Andric static const APFloat KF64(APFloat::IEEEdouble(), 143*349cc55cSDimitry Andric APInt(64, 0x3fc45f306dc9c882)); 144*349cc55cSDimitry Andric 145*349cc55cSDimitry Andric return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) || 146*349cc55cSDimitry Andric APF.bitwiseIsEqual(KF64); 147*349cc55cSDimitry Andric } 148*349cc55cSDimitry Andric 149*349cc55cSDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an 150*349cc55cSDimitry Andric // additional cost to negate them. 151*349cc55cSDimitry Andric static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, 152*349cc55cSDimitry Andric MachineRegisterInfo &MRI) { 153*349cc55cSDimitry Andric Optional<FPValueAndVReg> FPValReg; 154*349cc55cSDimitry Andric if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) { 155*349cc55cSDimitry Andric if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative()) 156*349cc55cSDimitry Andric return true; 157*349cc55cSDimitry Andric 158*349cc55cSDimitry Andric const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>(); 159*349cc55cSDimitry Andric if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value)) 160*349cc55cSDimitry Andric return true; 161*349cc55cSDimitry Andric } 162*349cc55cSDimitry Andric return false; 163*349cc55cSDimitry Andric } 164*349cc55cSDimitry Andric 165*349cc55cSDimitry Andric static unsigned inverseMinMax(unsigned Opc) { 166*349cc55cSDimitry Andric switch (Opc) { 167*349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM: 168*349cc55cSDimitry Andric return AMDGPU::G_FMINNUM; 169*349cc55cSDimitry Andric case AMDGPU::G_FMINNUM: 170*349cc55cSDimitry Andric return AMDGPU::G_FMAXNUM; 171*349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE: 172*349cc55cSDimitry Andric return AMDGPU::G_FMINNUM_IEEE; 173*349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE: 174*349cc55cSDimitry Andric return AMDGPU::G_FMAXNUM_IEEE; 175*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY: 176*349cc55cSDimitry Andric return AMDGPU::G_AMDGPU_FMIN_LEGACY; 177*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY: 178*349cc55cSDimitry Andric return AMDGPU::G_AMDGPU_FMAX_LEGACY; 179*349cc55cSDimitry Andric default: 180*349cc55cSDimitry Andric llvm_unreachable("invalid min/max opcode"); 181*349cc55cSDimitry Andric } 182*349cc55cSDimitry Andric } 183*349cc55cSDimitry Andric 184*349cc55cSDimitry Andric bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI, 185*349cc55cSDimitry Andric MachineInstr *&MatchInfo) { 186*349cc55cSDimitry Andric Register Src = MI.getOperand(1).getReg(); 187*349cc55cSDimitry Andric MatchInfo = MRI.getVRegDef(Src); 188*349cc55cSDimitry Andric 189*349cc55cSDimitry Andric // If the input has multiple uses and we can either fold the negate down, or 190*349cc55cSDimitry Andric // the other uses cannot, give up. This both prevents unprofitable 191*349cc55cSDimitry Andric // transformations and infinite loops: we won't repeatedly try to fold around 192*349cc55cSDimitry Andric // a negate that has no 'good' form. 193*349cc55cSDimitry Andric if (MRI.hasOneNonDBGUse(Src)) { 194*349cc55cSDimitry Andric if (allUsesHaveSourceMods(MI, MRI, 0)) 195*349cc55cSDimitry Andric return false; 196*349cc55cSDimitry Andric } else { 197*349cc55cSDimitry Andric if (fnegFoldsIntoMI(*MatchInfo) && 198*349cc55cSDimitry Andric (allUsesHaveSourceMods(MI, MRI) || 199*349cc55cSDimitry Andric !allUsesHaveSourceMods(*MatchInfo, MRI))) 200*349cc55cSDimitry Andric return false; 201*349cc55cSDimitry Andric } 202*349cc55cSDimitry Andric 203*349cc55cSDimitry Andric switch (MatchInfo->getOpcode()) { 204*349cc55cSDimitry Andric case AMDGPU::G_FMINNUM: 205*349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM: 206*349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE: 207*349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE: 208*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY: 209*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY: 210*349cc55cSDimitry Andric // 0 doesn't have a negated inline immediate. 211*349cc55cSDimitry Andric return !isConstantCostlierToNegate(*MatchInfo, 212*349cc55cSDimitry Andric MatchInfo->getOperand(2).getReg(), MRI); 213*349cc55cSDimitry Andric case AMDGPU::G_FADD: 214*349cc55cSDimitry Andric case AMDGPU::G_FSUB: 215*349cc55cSDimitry Andric case AMDGPU::G_FMA: 216*349cc55cSDimitry Andric case AMDGPU::G_FMAD: 217*349cc55cSDimitry Andric return mayIgnoreSignedZero(*MatchInfo); 218*349cc55cSDimitry Andric case AMDGPU::G_FMUL: 219*349cc55cSDimitry Andric case AMDGPU::G_FPEXT: 220*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_TRUNC: 221*349cc55cSDimitry Andric case AMDGPU::G_FPTRUNC: 222*349cc55cSDimitry Andric case AMDGPU::G_FRINT: 223*349cc55cSDimitry Andric case AMDGPU::G_FNEARBYINT: 224*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUND: 225*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUNDEVEN: 226*349cc55cSDimitry Andric case AMDGPU::G_FSIN: 227*349cc55cSDimitry Andric case AMDGPU::G_FCANONICALIZE: 228*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_RCP_IFLAG: 229*349cc55cSDimitry Andric return true; 230*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC: { 231*349cc55cSDimitry Andric unsigned IntrinsicID = MatchInfo->getIntrinsicID(); 232*349cc55cSDimitry Andric switch (IntrinsicID) { 233*349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp: 234*349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 235*349cc55cSDimitry Andric case Intrinsic::amdgcn_sin: 236*349cc55cSDimitry Andric case Intrinsic::amdgcn_fmul_legacy: 237*349cc55cSDimitry Andric case Intrinsic::amdgcn_fmed3: 238*349cc55cSDimitry Andric return true; 239*349cc55cSDimitry Andric case Intrinsic::amdgcn_fma_legacy: 240*349cc55cSDimitry Andric return mayIgnoreSignedZero(*MatchInfo); 241*349cc55cSDimitry Andric default: 242*349cc55cSDimitry Andric return false; 243*349cc55cSDimitry Andric } 244*349cc55cSDimitry Andric } 245*349cc55cSDimitry Andric default: 246*349cc55cSDimitry Andric return false; 247*349cc55cSDimitry Andric } 248*349cc55cSDimitry Andric } 249*349cc55cSDimitry Andric 250*349cc55cSDimitry Andric void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI, 251*349cc55cSDimitry Andric MachineInstr *&MatchInfo) { 252*349cc55cSDimitry Andric // Transform: 253*349cc55cSDimitry Andric // %A = inst %Op1, ... 254*349cc55cSDimitry Andric // %B = fneg %A 255*349cc55cSDimitry Andric // 256*349cc55cSDimitry Andric // into: 257*349cc55cSDimitry Andric // 258*349cc55cSDimitry Andric // (if %A has one use, specifically fneg above) 259*349cc55cSDimitry Andric // %B = inst (maybe fneg %Op1), ... 260*349cc55cSDimitry Andric // 261*349cc55cSDimitry Andric // (if %A has multiple uses) 262*349cc55cSDimitry Andric // %B = inst (maybe fneg %Op1), ... 263*349cc55cSDimitry Andric // %A = fneg %B 264*349cc55cSDimitry Andric 265*349cc55cSDimitry Andric // Replace register in operand with a register holding negated value. 266*349cc55cSDimitry Andric auto NegateOperand = [&](MachineOperand &Op) { 267*349cc55cSDimitry Andric Register Reg = Op.getReg(); 268*349cc55cSDimitry Andric if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg)))) 269*349cc55cSDimitry Andric Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0); 270*349cc55cSDimitry Andric replaceRegOpWith(MRI, Op, Reg); 271*349cc55cSDimitry Andric }; 272*349cc55cSDimitry Andric 273*349cc55cSDimitry Andric // Replace either register in operands with a register holding negated value. 274*349cc55cSDimitry Andric auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) { 275*349cc55cSDimitry Andric Register XReg = X.getReg(); 276*349cc55cSDimitry Andric Register YReg = Y.getReg(); 277*349cc55cSDimitry Andric if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg)))) 278*349cc55cSDimitry Andric replaceRegOpWith(MRI, X, XReg); 279*349cc55cSDimitry Andric else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg)))) 280*349cc55cSDimitry Andric replaceRegOpWith(MRI, Y, YReg); 281*349cc55cSDimitry Andric else { 282*349cc55cSDimitry Andric YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0); 283*349cc55cSDimitry Andric replaceRegOpWith(MRI, Y, YReg); 284*349cc55cSDimitry Andric } 285*349cc55cSDimitry Andric }; 286*349cc55cSDimitry Andric 287*349cc55cSDimitry Andric Builder.setInstrAndDebugLoc(*MatchInfo); 288*349cc55cSDimitry Andric 289*349cc55cSDimitry Andric // Negate appropriate operands so that resulting value of MatchInfo is 290*349cc55cSDimitry Andric // negated. 291*349cc55cSDimitry Andric switch (MatchInfo->getOpcode()) { 292*349cc55cSDimitry Andric case AMDGPU::G_FADD: 293*349cc55cSDimitry Andric case AMDGPU::G_FSUB: 294*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(1)); 295*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2)); 296*349cc55cSDimitry Andric break; 297*349cc55cSDimitry Andric case AMDGPU::G_FMUL: 298*349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2)); 299*349cc55cSDimitry Andric break; 300*349cc55cSDimitry Andric case AMDGPU::G_FMINNUM: 301*349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM: 302*349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE: 303*349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE: 304*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY: 305*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY: { 306*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(1)); 307*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2)); 308*349cc55cSDimitry Andric unsigned Opposite = inverseMinMax(MatchInfo->getOpcode()); 309*349cc55cSDimitry Andric replaceOpcodeWith(*MatchInfo, Opposite); 310*349cc55cSDimitry Andric break; 311*349cc55cSDimitry Andric } 312*349cc55cSDimitry Andric case AMDGPU::G_FMA: 313*349cc55cSDimitry Andric case AMDGPU::G_FMAD: 314*349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2)); 315*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(3)); 316*349cc55cSDimitry Andric break; 317*349cc55cSDimitry Andric case AMDGPU::G_FPEXT: 318*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_TRUNC: 319*349cc55cSDimitry Andric case AMDGPU::G_FRINT: 320*349cc55cSDimitry Andric case AMDGPU::G_FNEARBYINT: 321*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUND: 322*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUNDEVEN: 323*349cc55cSDimitry Andric case AMDGPU::G_FSIN: 324*349cc55cSDimitry Andric case AMDGPU::G_FCANONICALIZE: 325*349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_RCP_IFLAG: 326*349cc55cSDimitry Andric case AMDGPU::G_FPTRUNC: 327*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(1)); 328*349cc55cSDimitry Andric break; 329*349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC: { 330*349cc55cSDimitry Andric unsigned IntrinsicID = MatchInfo->getIntrinsicID(); 331*349cc55cSDimitry Andric switch (IntrinsicID) { 332*349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp: 333*349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 334*349cc55cSDimitry Andric case Intrinsic::amdgcn_sin: 335*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2)); 336*349cc55cSDimitry Andric break; 337*349cc55cSDimitry Andric case Intrinsic::amdgcn_fmul_legacy: 338*349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3)); 339*349cc55cSDimitry Andric break; 340*349cc55cSDimitry Andric case Intrinsic::amdgcn_fmed3: 341*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2)); 342*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(3)); 343*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(4)); 344*349cc55cSDimitry Andric break; 345*349cc55cSDimitry Andric case Intrinsic::amdgcn_fma_legacy: 346*349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3)); 347*349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(4)); 348*349cc55cSDimitry Andric break; 349*349cc55cSDimitry Andric default: 350*349cc55cSDimitry Andric llvm_unreachable("folding fneg not supported for this intrinsic"); 351*349cc55cSDimitry Andric } 352*349cc55cSDimitry Andric break; 353*349cc55cSDimitry Andric } 354*349cc55cSDimitry Andric default: 355*349cc55cSDimitry Andric llvm_unreachable("folding fneg not supported for this instruction"); 356*349cc55cSDimitry Andric } 357*349cc55cSDimitry Andric 358*349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 359*349cc55cSDimitry Andric Register MatchInfoDst = MatchInfo->getOperand(0).getReg(); 360*349cc55cSDimitry Andric 361*349cc55cSDimitry Andric if (MRI.hasOneNonDBGUse(MatchInfoDst)) { 362*349cc55cSDimitry Andric // MatchInfo now has negated value so use that instead of old Dst. 363*349cc55cSDimitry Andric replaceRegWith(MRI, Dst, MatchInfoDst); 364*349cc55cSDimitry Andric } else { 365*349cc55cSDimitry Andric // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa 366*349cc55cSDimitry Andric // but replaceRegWith will replace defs as well. It is easier to replace one 367*349cc55cSDimitry Andric // def with a new register. 368*349cc55cSDimitry Andric LLT Type = MRI.getType(Dst); 369*349cc55cSDimitry Andric Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type); 370*349cc55cSDimitry Andric replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo); 371*349cc55cSDimitry Andric 372*349cc55cSDimitry Andric // MatchInfo now has negated value so use that instead of old Dst. 373*349cc55cSDimitry Andric replaceRegWith(MRI, Dst, NegatedMatchInfo); 374*349cc55cSDimitry Andric 375*349cc55cSDimitry Andric // Recreate non negated value for other uses of old MatchInfoDst 376*349cc55cSDimitry Andric Builder.setInstrAndDebugLoc(MI); 377*349cc55cSDimitry Andric Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags()); 378*349cc55cSDimitry Andric } 379*349cc55cSDimitry Andric 380*349cc55cSDimitry Andric MI.eraseFromParent(); 381*349cc55cSDimitry Andric return; 382*349cc55cSDimitry Andric } 383