15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level, 105ffd83dbSDimitry Andric // before the legalizer. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 14e8d8bef9SDimitry Andric #include "AMDGPU.h" 15349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h" 16fe6060f1SDimitry Andric #include "AMDGPULegalizerInfo.h" 17fe6060f1SDimitry Andric #include "GCNSubtarget.h" 18fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 1981ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/CSEInfo.h" 205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 2306c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 245ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 255ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 265ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 275ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 28e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 295ffd83dbSDimitry Andric 3006c3fb27SDimitry Andric #define GET_GICOMBINER_DEPS 3106c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 3206c3fb27SDimitry Andric #undef GET_GICOMBINER_DEPS 3306c3fb27SDimitry Andric 345ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 355ffd83dbSDimitry Andric 365ffd83dbSDimitry Andric using namespace llvm; 375ffd83dbSDimitry Andric using namespace MIPatternMatch; 3806c3fb27SDimitry Andric namespace { 395ffd83dbSDimitry Andric 4006c3fb27SDimitry Andric #define GET_GICOMBINER_TYPES 4106c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 4206c3fb27SDimitry Andric #undef GET_GICOMBINER_TYPES 4306c3fb27SDimitry Andric 44*5f757f3fSDimitry Andric class AMDGPUPreLegalizerCombinerImpl : public Combiner { 45fe6060f1SDimitry Andric protected: 4606c3fb27SDimitry Andric const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig; 4706c3fb27SDimitry Andric const GCNSubtarget &STI; 48*5f757f3fSDimitry Andric // TODO: Make CombinerHelper methods const. 49*5f757f3fSDimitry Andric mutable AMDGPUCombinerHelper Helper; 50fe6060f1SDimitry Andric 51fe6060f1SDimitry Andric public: 5206c3fb27SDimitry Andric AMDGPUPreLegalizerCombinerImpl( 53*5f757f3fSDimitry Andric MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 54*5f757f3fSDimitry Andric GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 5506c3fb27SDimitry Andric const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, 56*5f757f3fSDimitry Andric const GCNSubtarget &STI, MachineDominatorTree *MDT, 57*5f757f3fSDimitry Andric const LegalizerInfo *LI); 5806c3fb27SDimitry Andric 5906c3fb27SDimitry Andric static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; } 6006c3fb27SDimitry Andric 61*5f757f3fSDimitry Andric bool tryCombineAllImpl(MachineInstr &MI) const; 62*5f757f3fSDimitry Andric bool tryCombineAll(MachineInstr &I) const override; 63fe6060f1SDimitry Andric 64fe6060f1SDimitry Andric struct ClampI64ToI16MatchInfo { 65fe6060f1SDimitry Andric int64_t Cmp1 = 0; 66fe6060f1SDimitry Andric int64_t Cmp2 = 0; 67fe6060f1SDimitry Andric Register Origin; 68fe6060f1SDimitry Andric }; 69fe6060f1SDimitry Andric 7006c3fb27SDimitry Andric bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI, 7106c3fb27SDimitry Andric const MachineFunction &MF, 7206c3fb27SDimitry Andric ClampI64ToI16MatchInfo &MatchInfo) const; 73fe6060f1SDimitry Andric 74fe6060f1SDimitry Andric void applyClampI64ToI16(MachineInstr &MI, 7506c3fb27SDimitry Andric const ClampI64ToI16MatchInfo &MatchInfo) const; 7606c3fb27SDimitry Andric 7706c3fb27SDimitry Andric private: 7806c3fb27SDimitry Andric #define GET_GICOMBINER_CLASS_MEMBERS 7906c3fb27SDimitry Andric #define AMDGPUSubtarget GCNSubtarget 8006c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 8106c3fb27SDimitry Andric #undef GET_GICOMBINER_CLASS_MEMBERS 8206c3fb27SDimitry Andric #undef AMDGPUSubtarget 83fe6060f1SDimitry Andric }; 84fe6060f1SDimitry Andric 8506c3fb27SDimitry Andric #define GET_GICOMBINER_IMPL 8606c3fb27SDimitry Andric #define AMDGPUSubtarget GCNSubtarget 8706c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 8806c3fb27SDimitry Andric #undef AMDGPUSubtarget 8906c3fb27SDimitry Andric #undef GET_GICOMBINER_IMPL 9006c3fb27SDimitry Andric 9106c3fb27SDimitry Andric AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl( 92*5f757f3fSDimitry Andric MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 93*5f757f3fSDimitry Andric GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 9406c3fb27SDimitry Andric const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, 95*5f757f3fSDimitry Andric const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) 96*5f757f3fSDimitry Andric : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI), 97*5f757f3fSDimitry Andric Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI), 9806c3fb27SDimitry Andric #define GET_GICOMBINER_CONSTRUCTOR_INITS 9906c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 10006c3fb27SDimitry Andric #undef GET_GICOMBINER_CONSTRUCTOR_INITS 10106c3fb27SDimitry Andric { 10206c3fb27SDimitry Andric } 10306c3fb27SDimitry Andric 104*5f757f3fSDimitry Andric bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { 105*5f757f3fSDimitry Andric if (tryCombineAllImpl(MI)) 106*5f757f3fSDimitry Andric return true; 107*5f757f3fSDimitry Andric 108*5f757f3fSDimitry Andric switch (MI.getOpcode()) { 109*5f757f3fSDimitry Andric case TargetOpcode::G_CONCAT_VECTORS: 110*5f757f3fSDimitry Andric return Helper.tryCombineConcatVectors(MI); 111*5f757f3fSDimitry Andric case TargetOpcode::G_SHUFFLE_VECTOR: 112*5f757f3fSDimitry Andric return Helper.tryCombineShuffleVector(MI); 113*5f757f3fSDimitry Andric } 114*5f757f3fSDimitry Andric 115*5f757f3fSDimitry Andric return false; 116*5f757f3fSDimitry Andric } 117*5f757f3fSDimitry Andric 11806c3fb27SDimitry Andric bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16( 11906c3fb27SDimitry Andric MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF, 12006c3fb27SDimitry Andric ClampI64ToI16MatchInfo &MatchInfo) const { 121fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 122fe6060f1SDimitry Andric 123fe6060f1SDimitry Andric // Try to find a pattern where an i64 value should get clamped to short. 124fe6060f1SDimitry Andric const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 125fe6060f1SDimitry Andric if (SrcType != LLT::scalar(64)) 126fe6060f1SDimitry Andric return false; 127fe6060f1SDimitry Andric 128fe6060f1SDimitry Andric const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 129fe6060f1SDimitry Andric if (DstType != LLT::scalar(16)) 130fe6060f1SDimitry Andric return false; 131fe6060f1SDimitry Andric 132fe6060f1SDimitry Andric Register Base; 133fe6060f1SDimitry Andric 134fe6060f1SDimitry Andric auto IsApplicableForCombine = [&MatchInfo]() -> bool { 135fe6060f1SDimitry Andric const auto Cmp1 = MatchInfo.Cmp1; 136fe6060f1SDimitry Andric const auto Cmp2 = MatchInfo.Cmp2; 137fe6060f1SDimitry Andric const auto Diff = std::abs(Cmp2 - Cmp1); 138fe6060f1SDimitry Andric 139fe6060f1SDimitry Andric // If the difference between both comparison values is 0 or 1, there is no 140fe6060f1SDimitry Andric // need to clamp. 141fe6060f1SDimitry Andric if (Diff == 0 || Diff == 1) 142fe6060f1SDimitry Andric return false; 143fe6060f1SDimitry Andric 144fe6060f1SDimitry Andric const int64_t Min = std::numeric_limits<int16_t>::min(); 145fe6060f1SDimitry Andric const int64_t Max = std::numeric_limits<int16_t>::max(); 146fe6060f1SDimitry Andric 147fe6060f1SDimitry Andric // Check if the comparison values are between SHORT_MIN and SHORT_MAX. 148fe6060f1SDimitry Andric return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 149fe6060f1SDimitry Andric (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 150fe6060f1SDimitry Andric }; 151fe6060f1SDimitry Andric 152fe6060f1SDimitry Andric // Try to match a combination of min / max MIR opcodes. 153fe6060f1SDimitry Andric if (mi_match(MI.getOperand(1).getReg(), MRI, 154fe6060f1SDimitry Andric m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 155fe6060f1SDimitry Andric if (mi_match(Base, MRI, 156fe6060f1SDimitry Andric m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 157fe6060f1SDimitry Andric return IsApplicableForCombine(); 158fe6060f1SDimitry Andric } 159fe6060f1SDimitry Andric } 160fe6060f1SDimitry Andric 161fe6060f1SDimitry Andric if (mi_match(MI.getOperand(1).getReg(), MRI, 162fe6060f1SDimitry Andric m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 163fe6060f1SDimitry Andric if (mi_match(Base, MRI, 164fe6060f1SDimitry Andric m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 165fe6060f1SDimitry Andric return IsApplicableForCombine(); 166fe6060f1SDimitry Andric } 167fe6060f1SDimitry Andric } 168fe6060f1SDimitry Andric 169fe6060f1SDimitry Andric return false; 170fe6060f1SDimitry Andric } 171fe6060f1SDimitry Andric 172fe6060f1SDimitry Andric // We want to find a combination of instructions that 173fe6060f1SDimitry Andric // gets generated when an i64 gets clamped to i16. 174fe6060f1SDimitry Andric // The corresponding pattern is: 175fe6060f1SDimitry Andric // G_MAX / G_MAX for i16 <= G_TRUNC i64. 176fe6060f1SDimitry Andric // This can be efficiently written as following: 177fe6060f1SDimitry Andric // v_cvt_pk_i16_i32 v0, v0, v1 178fe6060f1SDimitry Andric // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 17906c3fb27SDimitry Andric void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16( 18006c3fb27SDimitry Andric MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const { 181fe6060f1SDimitry Andric 182fe6060f1SDimitry Andric Register Src = MatchInfo.Origin; 183fe6060f1SDimitry Andric assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == 184fe6060f1SDimitry Andric LLT::scalar(64)); 185fe6060f1SDimitry Andric const LLT S32 = LLT::scalar(32); 186fe6060f1SDimitry Andric 187fe6060f1SDimitry Andric B.setInstrAndDebugLoc(MI); 188fe6060f1SDimitry Andric 189fe6060f1SDimitry Andric auto Unmerge = B.buildUnmerge(S32, Src); 190fe6060f1SDimitry Andric 191fe6060f1SDimitry Andric assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); 192fe6060f1SDimitry Andric 193fe6060f1SDimitry Andric const LLT V2S16 = LLT::fixed_vector(2, 16); 194fe6060f1SDimitry Andric auto CvtPk = 195fe6060f1SDimitry Andric B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16}, 196fe6060f1SDimitry Andric {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags()); 197fe6060f1SDimitry Andric 198fe6060f1SDimitry Andric auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 199fe6060f1SDimitry Andric auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 200fe6060f1SDimitry Andric auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); 201fe6060f1SDimitry Andric auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); 202fe6060f1SDimitry Andric 203fe6060f1SDimitry Andric auto Bitcast = B.buildBitcast({S32}, CvtPk); 204fe6060f1SDimitry Andric 205fe6060f1SDimitry Andric auto Med3 = B.buildInstr( 206fe6060f1SDimitry Andric AMDGPU::G_AMDGPU_SMED3, {S32}, 207fe6060f1SDimitry Andric {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)}, 208fe6060f1SDimitry Andric MI.getFlags()); 209fe6060f1SDimitry Andric 210fe6060f1SDimitry Andric B.buildTrunc(MI.getOperand(0).getReg(), Med3); 211fe6060f1SDimitry Andric 212fe6060f1SDimitry Andric MI.eraseFromParent(); 213fe6060f1SDimitry Andric } 214fe6060f1SDimitry Andric 2155ffd83dbSDimitry Andric // Pass boilerplate 2165ffd83dbSDimitry Andric // ================ 2175ffd83dbSDimitry Andric 2185ffd83dbSDimitry Andric class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 2195ffd83dbSDimitry Andric public: 2205ffd83dbSDimitry Andric static char ID; 2215ffd83dbSDimitry Andric 2225ffd83dbSDimitry Andric AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 2235ffd83dbSDimitry Andric 2245ffd83dbSDimitry Andric StringRef getPassName() const override { 2255ffd83dbSDimitry Andric return "AMDGPUPreLegalizerCombiner"; 2265ffd83dbSDimitry Andric } 2275ffd83dbSDimitry Andric 2285ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 2295ffd83dbSDimitry Andric 2305ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 23106c3fb27SDimitry Andric 2325ffd83dbSDimitry Andric private: 2335ffd83dbSDimitry Andric bool IsOptNone; 234*5f757f3fSDimitry Andric AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig; 2355ffd83dbSDimitry Andric }; 2365ffd83dbSDimitry Andric } // end anonymous namespace 2375ffd83dbSDimitry Andric 2385ffd83dbSDimitry Andric void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 2395ffd83dbSDimitry Andric AU.addRequired<TargetPassConfig>(); 2405ffd83dbSDimitry Andric AU.setPreservesCFG(); 2415ffd83dbSDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU); 2425ffd83dbSDimitry Andric AU.addRequired<GISelKnownBitsAnalysis>(); 2435ffd83dbSDimitry Andric AU.addPreserved<GISelKnownBitsAnalysis>(); 2445ffd83dbSDimitry Andric if (!IsOptNone) { 2455ffd83dbSDimitry Andric AU.addRequired<MachineDominatorTree>(); 2465ffd83dbSDimitry Andric AU.addPreserved<MachineDominatorTree>(); 2475ffd83dbSDimitry Andric } 248fe6060f1SDimitry Andric 249fe6060f1SDimitry Andric AU.addRequired<GISelCSEAnalysisWrapperPass>(); 250fe6060f1SDimitry Andric AU.addPreserved<GISelCSEAnalysisWrapperPass>(); 2515ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 2525ffd83dbSDimitry Andric } 2535ffd83dbSDimitry Andric 2545ffd83dbSDimitry Andric AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 2555ffd83dbSDimitry Andric : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 2565ffd83dbSDimitry Andric initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 257*5f757f3fSDimitry Andric 258*5f757f3fSDimitry Andric if (!RuleConfig.parseCommandLineOption()) 259*5f757f3fSDimitry Andric report_fatal_error("Invalid rule identifier"); 2605ffd83dbSDimitry Andric } 2615ffd83dbSDimitry Andric 2625ffd83dbSDimitry Andric bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 2635ffd83dbSDimitry Andric if (MF.getProperties().hasProperty( 2645ffd83dbSDimitry Andric MachineFunctionProperties::Property::FailedISel)) 2655ffd83dbSDimitry Andric return false; 2665ffd83dbSDimitry Andric auto *TPC = &getAnalysis<TargetPassConfig>(); 2675ffd83dbSDimitry Andric const Function &F = MF.getFunction(); 2685ffd83dbSDimitry Andric bool EnableOpt = 269*5f757f3fSDimitry Andric MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); 2705ffd83dbSDimitry Andric GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 271*5f757f3fSDimitry Andric 272fe6060f1SDimitry Andric // Enable CSE. 273fe6060f1SDimitry Andric GISelCSEAnalysisWrapper &Wrapper = 274fe6060f1SDimitry Andric getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); 275fe6060f1SDimitry Andric auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); 276fe6060f1SDimitry Andric 277*5f757f3fSDimitry Andric const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>(); 278*5f757f3fSDimitry Andric MachineDominatorTree *MDT = 279*5f757f3fSDimitry Andric IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 280*5f757f3fSDimitry Andric CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 281*5f757f3fSDimitry Andric nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize()); 282*5f757f3fSDimitry Andric AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig, 283*5f757f3fSDimitry Andric STI, MDT, STI.getLegalizerInfo()); 284*5f757f3fSDimitry Andric return Impl.combineMachineInstrs(); 2855ffd83dbSDimitry Andric } 2865ffd83dbSDimitry Andric 2875ffd83dbSDimitry Andric char AMDGPUPreLegalizerCombiner::ID = 0; 2885ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 2895ffd83dbSDimitry Andric "Combine AMDGPU machine instrs before legalization", 2905ffd83dbSDimitry Andric false, false) 2915ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 2925ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 2935ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 2945ffd83dbSDimitry Andric "Combine AMDGPU machine instrs before legalization", false, 2955ffd83dbSDimitry Andric false) 2965ffd83dbSDimitry Andric 2975ffd83dbSDimitry Andric namespace llvm { 2985ffd83dbSDimitry Andric FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 2995ffd83dbSDimitry Andric return new AMDGPUPreLegalizerCombiner(IsOptNone); 3005ffd83dbSDimitry Andric } 3015ffd83dbSDimitry Andric } // end namespace llvm 302