1 //===- AMDGPURegBankLegalizeRules --------------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H 10 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H 11 12 #include "llvm/ADT/DenseMap.h" 13 #include "llvm/ADT/SmallVector.h" 14 #include <functional> 15 16 namespace llvm { 17 18 class LLT; 19 class MachineRegisterInfo; 20 class MachineInstr; 21 class GCNSubtarget; 22 class MachineFunction; 23 template <typename T> class GenericUniformityInfo; 24 template <typename T> class GenericSSAContext; 25 using MachineSSAContext = GenericSSAContext<MachineFunction>; 26 using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>; 27 28 namespace AMDGPU { 29 30 /// \returns true if \p Ty is a pointer type with size \p Width. 31 bool isAnyPtr(LLT Ty, unsigned Width); 32 33 // IDs used to build predicate for RegBankLegalizeRule. Predicate can have one 34 // or more IDs and each represents a check for 'uniform or divergent' + LLT or 35 // just LLT on register operand. 36 // Most often checking one operand is enough to decide which RegBankLLTMapping 37 // to apply (see Fast Rules), IDs are useful when two or more operands need to 38 // be checked. 39 enum UniformityLLTOpPredicateID { 40 _, 41 // scalars 42 S1, 43 S16, 44 S32, 45 S64, 46 S128, 47 48 UniS1, 49 UniS16, 50 UniS32, 51 UniS64, 52 UniS128, 53 54 DivS1, 55 DivS16, 56 DivS32, 57 DivS64, 58 DivS128, 59 60 // pointers 61 P0, 62 P1, 63 P3, 64 P4, 65 P5, 66 Ptr32, 67 Ptr64, 68 Ptr128, 69 70 UniP0, 71 UniP1, 72 UniP3, 73 UniP4, 74 UniP5, 75 UniPtr32, 76 UniPtr64, 77 UniPtr128, 78 79 DivP0, 80 DivP1, 81 DivP3, 82 DivP4, 83 DivP5, 84 DivPtr32, 85 DivPtr64, 86 DivPtr128, 87 88 // vectors 89 V2S16, 90 V2S32, 91 V3S32, 92 V4S32, 93 94 UniV2S16, 95 96 DivV2S16, 97 98 // B types 99 B32, 100 B64, 101 B96, 102 B128, 103 B256, 104 B512, 105 106 UniB32, 107 UniB64, 108 UniB96, 109 UniB128, 110 UniB256, 111 UniB512, 112 113 DivB32, 114 DivB64, 115 DivB96, 116 DivB128, 117 DivB256, 118 DivB512, 119 }; 120 121 // How to apply register bank on register operand. 122 // In most cases, this serves as a LLT and register bank assert. 123 // Can change operands and insert copies, extends, truncs, and read-any-lanes. 124 // Anything more complicated requires LoweringMethod. 125 enum RegBankLLTMappingApplyID { 126 InvalidMapping, 127 None, 128 IntrId, 129 Imm, 130 Vcc, 131 132 // sgpr scalars, pointers, vectors and B-types 133 Sgpr16, 134 Sgpr32, 135 Sgpr64, 136 Sgpr128, 137 SgprP1, 138 SgprP3, 139 SgprP4, 140 SgprP5, 141 SgprPtr32, 142 SgprPtr64, 143 SgprPtr128, 144 SgprV2S16, 145 SgprV4S32, 146 SgprV2S32, 147 SgprB32, 148 SgprB64, 149 SgprB96, 150 SgprB128, 151 SgprB256, 152 SgprB512, 153 154 // vgpr scalars, pointers, vectors and B-types 155 Vgpr16, 156 Vgpr32, 157 Vgpr64, 158 Vgpr128, 159 VgprP0, 160 VgprP1, 161 VgprP3, 162 VgprP4, 163 VgprP5, 164 VgprPtr32, 165 VgprPtr64, 166 VgprPtr128, 167 VgprV2S16, 168 VgprV2S32, 169 VgprB32, 170 VgprB64, 171 VgprB96, 172 VgprB128, 173 VgprB256, 174 VgprB512, 175 VgprV4S32, 176 177 // Dst only modifiers: read-any-lane and truncs 178 UniInVcc, 179 UniInVgprS32, 180 UniInVgprV2S16, 181 UniInVgprV4S32, 182 UniInVgprB32, 183 UniInVgprB64, 184 UniInVgprB96, 185 UniInVgprB128, 186 UniInVgprB256, 187 UniInVgprB512, 188 189 Sgpr32Trunc, 190 191 // Src only modifiers: waterfalls, extends 192 Sgpr32AExt, 193 Sgpr32AExtBoolInReg, 194 Sgpr32SExt, 195 Sgpr32ZExt, 196 Vgpr32SExt, 197 Vgpr32ZExt, 198 }; 199 200 // Instruction needs to be replaced with sequence of instructions. Lowering was 201 // not done by legalizer since instructions is available in either sgpr or vgpr. 202 // For example S64 AND is available on sgpr, for that reason S64 AND is legal in 203 // context of Legalizer that only checks LLT. But S64 AND is not available on 204 // vgpr. Lower it to two S32 vgpr ANDs. 205 enum LoweringMethodID { 206 DoNotLower, 207 VccExtToSel, 208 UniExtToSel, 209 UnpackBitShift, 210 S_BFE, 211 V_BFE, 212 VgprToVccCopy, 213 SplitTo32, 214 SplitTo32Select, 215 SplitTo32SExtInReg, 216 Ext32To64, 217 UniCstExt, 218 SplitLoad, 219 WidenLoad, 220 }; 221 222 enum FastRulesTypes { 223 NoFastRules, 224 Standard, // S16, S32, S64, V2S16 225 StandardB, // B32, B64, B96, B128 226 Vector, // S32, V2S32, V3S32, V4S32 227 }; 228 229 struct RegBankLLTMapping { 230 SmallVector<RegBankLLTMappingApplyID, 2> DstOpMapping; 231 SmallVector<RegBankLLTMappingApplyID, 4> SrcOpMapping; 232 LoweringMethodID LoweringMethod; 233 RegBankLLTMapping( 234 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList, 235 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList, 236 LoweringMethodID LoweringMethod = DoNotLower); 237 }; 238 239 struct PredicateMapping { 240 SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes; 241 std::function<bool(const MachineInstr &)> TestFunc; 242 PredicateMapping( 243 std::initializer_list<UniformityLLTOpPredicateID> OpList, 244 std::function<bool(const MachineInstr &)> TestFunc = nullptr); 245 246 bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, 247 const MachineRegisterInfo &MRI) const; 248 }; 249 250 struct RegBankLegalizeRule { 251 PredicateMapping Predicate; 252 RegBankLLTMapping OperandMapping; 253 }; 254 255 class SetOfRulesForOpcode { 256 // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one. 257 SmallVector<RegBankLegalizeRule, 4> Rules; 258 259 // "Fast Rules" 260 // Instead of testing each 'Rules[i].Predicate' we do direct access to 261 // RegBankLLTMapping using getFastPredicateSlot. For example if: 262 // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32 263 // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32 264 FastRulesTypes FastTypes = NoFastRules; 265 #define InvMapping RegBankLLTMapping({InvalidMapping}, {InvalidMapping}) 266 RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping}; 267 RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping}; 268 269 public: 270 SetOfRulesForOpcode(); 271 SetOfRulesForOpcode(FastRulesTypes FastTypes); 272 273 const RegBankLLTMapping & 274 findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, 275 const MachineUniformityInfo &MUI) const; 276 277 void addRule(RegBankLegalizeRule Rule); 278 279 void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, 280 RegBankLLTMapping RuleApplyIDs); 281 void addFastRuleUniform(UniformityLLTOpPredicateID Ty, 282 RegBankLLTMapping RuleApplyIDs); 283 284 private: 285 int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const; 286 }; 287 288 // Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a 289 // little more efficient. 290 class RegBankLegalizeRules { 291 const GCNSubtarget *ST; 292 MachineRegisterInfo *MRI; 293 // Separate maps for G-opcodes and instrinsics since they are in different 294 // enums. Multiple opcodes can share same set of rules. 295 // RulesAlias = map<Opcode, KeyOpcode> 296 // Rules = map<KeyOpcode, SetOfRulesForOpcode> 297 SmallDenseMap<unsigned, unsigned, 256> GRulesAlias; 298 SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules; 299 SmallDenseMap<unsigned, unsigned, 128> IRulesAlias; 300 SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules; 301 class RuleSetInitializer { 302 SetOfRulesForOpcode *RuleSet; 303 304 public: 305 // Used for clang-format line breaks and to force writing all rules for 306 // opcode in same place. 307 template <class AliasMap, class RulesMap> 308 RuleSetInitializer(std::initializer_list<unsigned> OpcList, 309 AliasMap &RulesAlias, RulesMap &Rules, 310 FastRulesTypes FastTypes = NoFastRules) { 311 unsigned KeyOpcode = *OpcList.begin(); 312 for (unsigned Opc : OpcList) { 313 [[maybe_unused]] auto [_, NewInput] = 314 RulesAlias.try_emplace(Opc, KeyOpcode); 315 assert(NewInput && "Can't redefine existing Rules"); 316 } 317 318 auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes); 319 assert(NewInput && "Can't redefine existing Rules"); 320 321 RuleSet = &DenseMapIter->second; 322 } 323 324 RuleSetInitializer(const RuleSetInitializer &) = delete; 325 RuleSetInitializer &operator=(const RuleSetInitializer &) = delete; 326 RuleSetInitializer(RuleSetInitializer &&) = delete; 327 RuleSetInitializer &operator=(RuleSetInitializer &&) = delete; 328 ~RuleSetInitializer() = default; 329 330 RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty, 331 RegBankLLTMapping RuleApplyIDs, 332 bool STPred = true) { 333 if (STPred) 334 RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs); 335 return *this; 336 } 337 338 RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty, 339 RegBankLLTMapping RuleApplyIDs, 340 bool STPred = true) { 341 if (STPred) 342 RuleSet->addFastRuleUniform(Ty, RuleApplyIDs); 343 return *this; 344 } 345 346 RuleSetInitializer &Any(RegBankLegalizeRule Init, bool STPred = true) { 347 if (STPred) 348 RuleSet->addRule(Init); 349 return *this; 350 } 351 }; 352 353 RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList, 354 FastRulesTypes FastTypes = NoFastRules); 355 356 RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList, 357 FastRulesTypes FastTypes = NoFastRules); 358 359 public: 360 // Initialize rules for all opcodes. 361 RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI); 362 363 // In case we don't want to regenerate same rules, we can use already 364 // generated rules but need to refresh references to objects that are 365 // created for this run. refreshRefs(const GCNSubtarget & _ST,MachineRegisterInfo & _MRI)366 void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) { 367 ST = &_ST; 368 MRI = &_MRI; 369 }; 370 371 const SetOfRulesForOpcode &getRulesForOpc(MachineInstr &MI) const; 372 }; 373 374 } // end namespace AMDGPU 375 } // end namespace llvm 376 377 #endif 378