1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // before the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPUCombinerHelper.h" 16 #include "AMDGPULegalizerInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "llvm/CodeGen/GlobalISel/CSEInfo.h" 20 #include "llvm/CodeGen/GlobalISel/Combiner.h" 21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26 #include "llvm/CodeGen/MachineDominators.h" 27 #include "llvm/CodeGen/TargetPassConfig.h" 28 #include "llvm/Target/TargetMachine.h" 29 30 #define GET_GICOMBINER_DEPS 31 #include "AMDGPUGenPreLegalizeGICombiner.inc" 32 #undef GET_GICOMBINER_DEPS 33 34 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 35 36 using namespace llvm; 37 using namespace MIPatternMatch; 38 namespace { 39 40 #define GET_GICOMBINER_TYPES 41 #include "AMDGPUGenPreLegalizeGICombiner.inc" 42 #undef GET_GICOMBINER_TYPES 43 44 class AMDGPUPreLegalizerCombinerImpl : public Combiner { 45 protected: 46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig; 47 const GCNSubtarget &STI; 48 // TODO: Make CombinerHelper methods const. 49 mutable AMDGPUCombinerHelper Helper; 50 51 public: 52 AMDGPUPreLegalizerCombinerImpl( 53 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 54 GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 55 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, 56 const GCNSubtarget &STI, MachineDominatorTree *MDT, 57 const LegalizerInfo *LI); 58 59 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; } 60 61 bool tryCombineAllImpl(MachineInstr &MI) const; 62 bool tryCombineAll(MachineInstr &I) const override; 63 64 struct ClampI64ToI16MatchInfo { 65 int64_t Cmp1 = 0; 66 int64_t Cmp2 = 0; 67 Register Origin; 68 }; 69 70 bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI, 71 const MachineFunction &MF, 72 ClampI64ToI16MatchInfo &MatchInfo) const; 73 74 void applyClampI64ToI16(MachineInstr &MI, 75 const ClampI64ToI16MatchInfo &MatchInfo) const; 76 77 private: 78 #define GET_GICOMBINER_CLASS_MEMBERS 79 #define AMDGPUSubtarget GCNSubtarget 80 #include "AMDGPUGenPreLegalizeGICombiner.inc" 81 #undef GET_GICOMBINER_CLASS_MEMBERS 82 #undef AMDGPUSubtarget 83 }; 84 85 #define GET_GICOMBINER_IMPL 86 #define AMDGPUSubtarget GCNSubtarget 87 #include "AMDGPUGenPreLegalizeGICombiner.inc" 88 #undef AMDGPUSubtarget 89 #undef GET_GICOMBINER_IMPL 90 91 AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl( 92 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 93 GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 94 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, 95 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) 96 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI), 97 Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI), 98 #define GET_GICOMBINER_CONSTRUCTOR_INITS 99 #include "AMDGPUGenPreLegalizeGICombiner.inc" 100 #undef GET_GICOMBINER_CONSTRUCTOR_INITS 101 { 102 } 103 104 bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { 105 if (tryCombineAllImpl(MI)) 106 return true; 107 108 switch (MI.getOpcode()) { 109 case TargetOpcode::G_CONCAT_VECTORS: 110 return Helper.tryCombineConcatVectors(MI); 111 case TargetOpcode::G_SHUFFLE_VECTOR: 112 return Helper.tryCombineShuffleVector(MI); 113 } 114 115 return false; 116 } 117 118 bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16( 119 MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF, 120 ClampI64ToI16MatchInfo &MatchInfo) const { 121 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 122 123 // Try to find a pattern where an i64 value should get clamped to short. 124 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 125 if (SrcType != LLT::scalar(64)) 126 return false; 127 128 const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 129 if (DstType != LLT::scalar(16)) 130 return false; 131 132 Register Base; 133 134 auto IsApplicableForCombine = [&MatchInfo]() -> bool { 135 const auto Cmp1 = MatchInfo.Cmp1; 136 const auto Cmp2 = MatchInfo.Cmp2; 137 const auto Diff = std::abs(Cmp2 - Cmp1); 138 139 // If the difference between both comparison values is 0 or 1, there is no 140 // need to clamp. 141 if (Diff == 0 || Diff == 1) 142 return false; 143 144 const int64_t Min = std::numeric_limits<int16_t>::min(); 145 const int64_t Max = std::numeric_limits<int16_t>::max(); 146 147 // Check if the comparison values are between SHORT_MIN and SHORT_MAX. 148 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 149 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 150 }; 151 152 // Try to match a combination of min / max MIR opcodes. 153 if (mi_match(MI.getOperand(1).getReg(), MRI, 154 m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 155 if (mi_match(Base, MRI, 156 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 157 return IsApplicableForCombine(); 158 } 159 } 160 161 if (mi_match(MI.getOperand(1).getReg(), MRI, 162 m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 163 if (mi_match(Base, MRI, 164 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 165 return IsApplicableForCombine(); 166 } 167 } 168 169 return false; 170 } 171 172 // We want to find a combination of instructions that 173 // gets generated when an i64 gets clamped to i16. 174 // The corresponding pattern is: 175 // G_MAX / G_MAX for i16 <= G_TRUNC i64. 176 // This can be efficiently written as following: 177 // v_cvt_pk_i16_i32 v0, v0, v1 178 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 179 void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16( 180 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const { 181 182 Register Src = MatchInfo.Origin; 183 assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == 184 LLT::scalar(64)); 185 const LLT S32 = LLT::scalar(32); 186 187 B.setInstrAndDebugLoc(MI); 188 189 auto Unmerge = B.buildUnmerge(S32, Src); 190 191 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); 192 193 const LLT V2S16 = LLT::fixed_vector(2, 16); 194 auto CvtPk = 195 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16}, 196 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags()); 197 198 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 199 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 200 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); 201 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); 202 203 auto Bitcast = B.buildBitcast({S32}, CvtPk); 204 205 auto Med3 = B.buildInstr( 206 AMDGPU::G_AMDGPU_SMED3, {S32}, 207 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)}, 208 MI.getFlags()); 209 210 B.buildTrunc(MI.getOperand(0).getReg(), Med3); 211 212 MI.eraseFromParent(); 213 } 214 215 // Pass boilerplate 216 // ================ 217 218 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 219 public: 220 static char ID; 221 222 AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 223 224 StringRef getPassName() const override { 225 return "AMDGPUPreLegalizerCombiner"; 226 } 227 228 bool runOnMachineFunction(MachineFunction &MF) override; 229 230 void getAnalysisUsage(AnalysisUsage &AU) const override; 231 232 private: 233 bool IsOptNone; 234 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig; 235 }; 236 } // end anonymous namespace 237 238 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 239 AU.addRequired<TargetPassConfig>(); 240 AU.setPreservesCFG(); 241 getSelectionDAGFallbackAnalysisUsage(AU); 242 AU.addRequired<GISelKnownBitsAnalysis>(); 243 AU.addPreserved<GISelKnownBitsAnalysis>(); 244 if (!IsOptNone) { 245 AU.addRequired<MachineDominatorTree>(); 246 AU.addPreserved<MachineDominatorTree>(); 247 } 248 249 AU.addRequired<GISelCSEAnalysisWrapperPass>(); 250 AU.addPreserved<GISelCSEAnalysisWrapperPass>(); 251 MachineFunctionPass::getAnalysisUsage(AU); 252 } 253 254 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 255 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 256 initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 257 258 if (!RuleConfig.parseCommandLineOption()) 259 report_fatal_error("Invalid rule identifier"); 260 } 261 262 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 263 if (MF.getProperties().hasProperty( 264 MachineFunctionProperties::Property::FailedISel)) 265 return false; 266 auto *TPC = &getAnalysis<TargetPassConfig>(); 267 const Function &F = MF.getFunction(); 268 bool EnableOpt = 269 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); 270 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 271 272 // Enable CSE. 273 GISelCSEAnalysisWrapper &Wrapper = 274 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); 275 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); 276 277 const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>(); 278 MachineDominatorTree *MDT = 279 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 280 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 281 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize()); 282 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig, 283 STI, MDT, STI.getLegalizerInfo()); 284 return Impl.combineMachineInstrs(); 285 } 286 287 char AMDGPUPreLegalizerCombiner::ID = 0; 288 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 289 "Combine AMDGPU machine instrs before legalization", 290 false, false) 291 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 292 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 293 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 294 "Combine AMDGPU machine instrs before legalization", false, 295 false) 296 297 namespace llvm { 298 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 299 return new AMDGPUPreLegalizerCombiner(IsOptNone); 300 } 301 } // end namespace llvm 302