xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- AMDGPURegBankLegalizeRules --------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
10 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
11 
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include <functional>
15 
16 namespace llvm {
17 
18 class LLT;
19 class MachineRegisterInfo;
20 class MachineInstr;
21 class GCNSubtarget;
22 class MachineFunction;
23 template <typename T> class GenericUniformityInfo;
24 template <typename T> class GenericSSAContext;
25 using MachineSSAContext = GenericSSAContext<MachineFunction>;
26 using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>;
27 
28 namespace AMDGPU {
29 
30 /// \returns true if \p Ty is a pointer type with size \p Width.
31 bool isAnyPtr(LLT Ty, unsigned Width);
32 
33 // IDs used to build predicate for RegBankLegalizeRule. Predicate can have one
34 // or more IDs and each represents a check for 'uniform or divergent' + LLT or
35 // just LLT on register operand.
36 // Most often checking one operand is enough to decide which RegBankLLTMapping
37 // to apply (see Fast Rules), IDs are useful when two or more operands need to
38 // be checked.
39 enum UniformityLLTOpPredicateID {
40   _,
41   // scalars
42   S1,
43   S16,
44   S32,
45   S64,
46   S128,
47 
48   UniS1,
49   UniS16,
50   UniS32,
51   UniS64,
52   UniS128,
53 
54   DivS1,
55   DivS16,
56   DivS32,
57   DivS64,
58   DivS128,
59 
60   // pointers
61   P0,
62   P1,
63   P3,
64   P4,
65   P5,
66   Ptr32,
67   Ptr64,
68   Ptr128,
69 
70   UniP0,
71   UniP1,
72   UniP3,
73   UniP4,
74   UniP5,
75   UniPtr32,
76   UniPtr64,
77   UniPtr128,
78 
79   DivP0,
80   DivP1,
81   DivP3,
82   DivP4,
83   DivP5,
84   DivPtr32,
85   DivPtr64,
86   DivPtr128,
87 
88   // vectors
89   V2S16,
90   V2S32,
91   V3S32,
92   V4S32,
93 
94   UniV2S16,
95 
96   DivV2S16,
97 
98   // B types
99   B32,
100   B64,
101   B96,
102   B128,
103   B256,
104   B512,
105 
106   UniB32,
107   UniB64,
108   UniB96,
109   UniB128,
110   UniB256,
111   UniB512,
112 
113   DivB32,
114   DivB64,
115   DivB96,
116   DivB128,
117   DivB256,
118   DivB512,
119 };
120 
121 // How to apply register bank on register operand.
122 // In most cases, this serves as a LLT and register bank assert.
123 // Can change operands and insert copies, extends, truncs, and read-any-lanes.
124 // Anything more complicated requires LoweringMethod.
125 enum RegBankLLTMappingApplyID {
126   InvalidMapping,
127   None,
128   IntrId,
129   Imm,
130   Vcc,
131 
132   // sgpr scalars, pointers, vectors and B-types
133   Sgpr16,
134   Sgpr32,
135   Sgpr64,
136   Sgpr128,
137   SgprP1,
138   SgprP3,
139   SgprP4,
140   SgprP5,
141   SgprPtr32,
142   SgprPtr64,
143   SgprPtr128,
144   SgprV2S16,
145   SgprV4S32,
146   SgprV2S32,
147   SgprB32,
148   SgprB64,
149   SgprB96,
150   SgprB128,
151   SgprB256,
152   SgprB512,
153 
154   // vgpr scalars, pointers, vectors and B-types
155   Vgpr16,
156   Vgpr32,
157   Vgpr64,
158   Vgpr128,
159   VgprP0,
160   VgprP1,
161   VgprP3,
162   VgprP4,
163   VgprP5,
164   VgprPtr32,
165   VgprPtr64,
166   VgprPtr128,
167   VgprV2S16,
168   VgprV2S32,
169   VgprB32,
170   VgprB64,
171   VgprB96,
172   VgprB128,
173   VgprB256,
174   VgprB512,
175   VgprV4S32,
176 
177   // Dst only modifiers: read-any-lane and truncs
178   UniInVcc,
179   UniInVgprS32,
180   UniInVgprV2S16,
181   UniInVgprV4S32,
182   UniInVgprB32,
183   UniInVgprB64,
184   UniInVgprB96,
185   UniInVgprB128,
186   UniInVgprB256,
187   UniInVgprB512,
188 
189   Sgpr32Trunc,
190 
191   // Src only modifiers: waterfalls, extends
192   Sgpr32AExt,
193   Sgpr32AExtBoolInReg,
194   Sgpr32SExt,
195   Sgpr32ZExt,
196   Vgpr32SExt,
197   Vgpr32ZExt,
198 };
199 
200 // Instruction needs to be replaced with sequence of instructions. Lowering was
201 // not done by legalizer since instructions is available in either sgpr or vgpr.
202 // For example S64 AND is available on sgpr, for that reason S64 AND is legal in
203 // context of Legalizer that only checks LLT. But S64 AND is not available on
204 // vgpr. Lower it to two S32 vgpr ANDs.
205 enum LoweringMethodID {
206   DoNotLower,
207   VccExtToSel,
208   UniExtToSel,
209   UnpackBitShift,
210   S_BFE,
211   V_BFE,
212   VgprToVccCopy,
213   SplitTo32,
214   SplitTo32Select,
215   SplitTo32SExtInReg,
216   Ext32To64,
217   UniCstExt,
218   SplitLoad,
219   WidenLoad,
220 };
221 
222 enum FastRulesTypes {
223   NoFastRules,
224   Standard,  // S16, S32, S64, V2S16
225   StandardB, // B32, B64, B96, B128
226   Vector,    // S32, V2S32, V3S32, V4S32
227 };
228 
229 struct RegBankLLTMapping {
230   SmallVector<RegBankLLTMappingApplyID, 2> DstOpMapping;
231   SmallVector<RegBankLLTMappingApplyID, 4> SrcOpMapping;
232   LoweringMethodID LoweringMethod;
233   RegBankLLTMapping(
234       std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
235       std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
236       LoweringMethodID LoweringMethod = DoNotLower);
237 };
238 
239 struct PredicateMapping {
240   SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes;
241   std::function<bool(const MachineInstr &)> TestFunc;
242   PredicateMapping(
243       std::initializer_list<UniformityLLTOpPredicateID> OpList,
244       std::function<bool(const MachineInstr &)> TestFunc = nullptr);
245 
246   bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI,
247              const MachineRegisterInfo &MRI) const;
248 };
249 
250 struct RegBankLegalizeRule {
251   PredicateMapping Predicate;
252   RegBankLLTMapping OperandMapping;
253 };
254 
255 class SetOfRulesForOpcode {
256   // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one.
257   SmallVector<RegBankLegalizeRule, 4> Rules;
258 
259   // "Fast Rules"
260   // Instead of testing each 'Rules[i].Predicate' we do direct access to
261   // RegBankLLTMapping using getFastPredicateSlot. For example if:
262   // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32
263   // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32
264   FastRulesTypes FastTypes = NoFastRules;
265 #define InvMapping RegBankLLTMapping({InvalidMapping}, {InvalidMapping})
266   RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
267   RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
268 
269 public:
270   SetOfRulesForOpcode();
271   SetOfRulesForOpcode(FastRulesTypes FastTypes);
272 
273   const RegBankLLTMapping &
274   findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI,
275                    const MachineUniformityInfo &MUI) const;
276 
277   void addRule(RegBankLegalizeRule Rule);
278 
279   void addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
280                             RegBankLLTMapping RuleApplyIDs);
281   void addFastRuleUniform(UniformityLLTOpPredicateID Ty,
282                           RegBankLLTMapping RuleApplyIDs);
283 
284 private:
285   int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const;
286 };
287 
288 // Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a
289 // little more efficient.
290 class RegBankLegalizeRules {
291   const GCNSubtarget *ST;
292   MachineRegisterInfo *MRI;
293   // Separate maps for G-opcodes and instrinsics since they are in different
294   // enums. Multiple opcodes can share same set of rules.
295   // RulesAlias = map<Opcode, KeyOpcode>
296   // Rules = map<KeyOpcode, SetOfRulesForOpcode>
297   SmallDenseMap<unsigned, unsigned, 256> GRulesAlias;
298   SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules;
299   SmallDenseMap<unsigned, unsigned, 128> IRulesAlias;
300   SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules;
301   class RuleSetInitializer {
302     SetOfRulesForOpcode *RuleSet;
303 
304   public:
305     // Used for clang-format line breaks and to force  writing all rules for
306     // opcode in same place.
307     template <class AliasMap, class RulesMap>
308     RuleSetInitializer(std::initializer_list<unsigned> OpcList,
309                        AliasMap &RulesAlias, RulesMap &Rules,
310                        FastRulesTypes FastTypes = NoFastRules) {
311       unsigned KeyOpcode = *OpcList.begin();
312       for (unsigned Opc : OpcList) {
313         [[maybe_unused]] auto [_, NewInput] =
314             RulesAlias.try_emplace(Opc, KeyOpcode);
315         assert(NewInput && "Can't redefine existing Rules");
316       }
317 
318       auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes);
319       assert(NewInput && "Can't redefine existing Rules");
320 
321       RuleSet = &DenseMapIter->second;
322     }
323 
324     RuleSetInitializer(const RuleSetInitializer &) = delete;
325     RuleSetInitializer &operator=(const RuleSetInitializer &) = delete;
326     RuleSetInitializer(RuleSetInitializer &&) = delete;
327     RuleSetInitializer &operator=(RuleSetInitializer &&) = delete;
328     ~RuleSetInitializer() = default;
329 
330     RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty,
331                             RegBankLLTMapping RuleApplyIDs,
332                             bool STPred = true) {
333       if (STPred)
334         RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs);
335       return *this;
336     }
337 
338     RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty,
339                             RegBankLLTMapping RuleApplyIDs,
340                             bool STPred = true) {
341       if (STPred)
342         RuleSet->addFastRuleUniform(Ty, RuleApplyIDs);
343       return *this;
344     }
345 
346     RuleSetInitializer &Any(RegBankLegalizeRule Init, bool STPred = true) {
347       if (STPred)
348         RuleSet->addRule(Init);
349       return *this;
350     }
351   };
352 
353   RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
354                                       FastRulesTypes FastTypes = NoFastRules);
355 
356   RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
357                                       FastRulesTypes FastTypes = NoFastRules);
358 
359 public:
360   // Initialize rules for all opcodes.
361   RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI);
362 
363   // In case we don't want to regenerate same rules, we can use already
364   // generated rules but need to refresh references to objects that are
365   // created for this run.
refreshRefs(const GCNSubtarget & _ST,MachineRegisterInfo & _MRI)366   void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) {
367     ST = &_ST;
368     MRI = &_MRI;
369   };
370 
371   const SetOfRulesForOpcode &getRulesForOpc(MachineInstr &MI) const;
372 };
373 
374 } // end namespace AMDGPU
375 } // end namespace llvm
376 
377 #endif
378