xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric /// \file
9*0b57cec5SDimitry Andric /// This file implements the targeting of the Machinelegalizer class for
10*0b57cec5SDimitry Andric /// AMDGPU.
11*0b57cec5SDimitry Andric /// \todo This should be generated by TableGen.
12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric 
14*0b57cec5SDimitry Andric #include "AMDGPU.h"
15*0b57cec5SDimitry Andric #include "AMDGPULegalizerInfo.h"
16*0b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h"
17*0b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
18*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
19*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
20*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
21*0b57cec5SDimitry Andric #include "llvm/CodeGen/ValueTypes.h"
22*0b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
23*0b57cec5SDimitry Andric #include "llvm/IR/Type.h"
24*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
25*0b57cec5SDimitry Andric 
26*0b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-legalinfo"
27*0b57cec5SDimitry Andric 
28*0b57cec5SDimitry Andric using namespace llvm;
29*0b57cec5SDimitry Andric using namespace LegalizeActions;
30*0b57cec5SDimitry Andric using namespace LegalizeMutations;
31*0b57cec5SDimitry Andric using namespace LegalityPredicates;
32*0b57cec5SDimitry Andric 
33*0b57cec5SDimitry Andric 
34*0b57cec5SDimitry Andric static LegalityPredicate isMultiple32(unsigned TypeIdx,
35*0b57cec5SDimitry Andric                                       unsigned MaxSize = 512) {
36*0b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
37*0b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
38*0b57cec5SDimitry Andric     const LLT EltTy = Ty.getScalarType();
39*0b57cec5SDimitry Andric     return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
40*0b57cec5SDimitry Andric   };
41*0b57cec5SDimitry Andric }
42*0b57cec5SDimitry Andric 
43*0b57cec5SDimitry Andric static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
44*0b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
45*0b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
46*0b57cec5SDimitry Andric     return Ty.isVector() &&
47*0b57cec5SDimitry Andric            Ty.getNumElements() % 2 != 0 &&
48*0b57cec5SDimitry Andric            Ty.getElementType().getSizeInBits() < 32;
49*0b57cec5SDimitry Andric   };
50*0b57cec5SDimitry Andric }
51*0b57cec5SDimitry Andric 
52*0b57cec5SDimitry Andric static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
53*0b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
54*0b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
55*0b57cec5SDimitry Andric     const LLT EltTy = Ty.getElementType();
56*0b57cec5SDimitry Andric     return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
57*0b57cec5SDimitry Andric   };
58*0b57cec5SDimitry Andric }
59*0b57cec5SDimitry Andric 
60*0b57cec5SDimitry Andric static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
61*0b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
62*0b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
63*0b57cec5SDimitry Andric     const LLT EltTy = Ty.getElementType();
64*0b57cec5SDimitry Andric     unsigned Size = Ty.getSizeInBits();
65*0b57cec5SDimitry Andric     unsigned Pieces = (Size + 63) / 64;
66*0b57cec5SDimitry Andric     unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
67*0b57cec5SDimitry Andric     return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
68*0b57cec5SDimitry Andric   };
69*0b57cec5SDimitry Andric }
70*0b57cec5SDimitry Andric 
71*0b57cec5SDimitry Andric static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
72*0b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
73*0b57cec5SDimitry Andric     const LLT QueryTy = Query.Types[TypeIdx];
74*0b57cec5SDimitry Andric     return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
75*0b57cec5SDimitry Andric   };
76*0b57cec5SDimitry Andric }
77*0b57cec5SDimitry Andric 
78*0b57cec5SDimitry Andric static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
79*0b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
80*0b57cec5SDimitry Andric     const LLT QueryTy = Query.Types[TypeIdx];
81*0b57cec5SDimitry Andric     return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
82*0b57cec5SDimitry Andric   };
83*0b57cec5SDimitry Andric }
84*0b57cec5SDimitry Andric 
85*0b57cec5SDimitry Andric // Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
86*0b57cec5SDimitry Andric // v2s16.
87*0b57cec5SDimitry Andric static LegalityPredicate isRegisterType(unsigned TypeIdx) {
88*0b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
89*0b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
90*0b57cec5SDimitry Andric     if (Ty.isVector()) {
91*0b57cec5SDimitry Andric       const int EltSize = Ty.getElementType().getSizeInBits();
92*0b57cec5SDimitry Andric       return EltSize == 32 || EltSize == 64 ||
93*0b57cec5SDimitry Andric             (EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
94*0b57cec5SDimitry Andric              EltSize == 128 || EltSize == 256;
95*0b57cec5SDimitry Andric     }
96*0b57cec5SDimitry Andric 
97*0b57cec5SDimitry Andric     return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
98*0b57cec5SDimitry Andric   };
99*0b57cec5SDimitry Andric }
100*0b57cec5SDimitry Andric 
101*0b57cec5SDimitry Andric AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
102*0b57cec5SDimitry Andric                                          const GCNTargetMachine &TM)
103*0b57cec5SDimitry Andric   :  ST(ST_) {
104*0b57cec5SDimitry Andric   using namespace TargetOpcode;
105*0b57cec5SDimitry Andric 
106*0b57cec5SDimitry Andric   auto GetAddrSpacePtr = [&TM](unsigned AS) {
107*0b57cec5SDimitry Andric     return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
108*0b57cec5SDimitry Andric   };
109*0b57cec5SDimitry Andric 
110*0b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
111*0b57cec5SDimitry Andric   const LLT S8 = LLT::scalar(8);
112*0b57cec5SDimitry Andric   const LLT S16 = LLT::scalar(16);
113*0b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
114*0b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
115*0b57cec5SDimitry Andric   const LLT S128 = LLT::scalar(128);
116*0b57cec5SDimitry Andric   const LLT S256 = LLT::scalar(256);
117*0b57cec5SDimitry Andric   const LLT S512 = LLT::scalar(512);
118*0b57cec5SDimitry Andric 
119*0b57cec5SDimitry Andric   const LLT V2S16 = LLT::vector(2, 16);
120*0b57cec5SDimitry Andric   const LLT V4S16 = LLT::vector(4, 16);
121*0b57cec5SDimitry Andric 
122*0b57cec5SDimitry Andric   const LLT V2S32 = LLT::vector(2, 32);
123*0b57cec5SDimitry Andric   const LLT V3S32 = LLT::vector(3, 32);
124*0b57cec5SDimitry Andric   const LLT V4S32 = LLT::vector(4, 32);
125*0b57cec5SDimitry Andric   const LLT V5S32 = LLT::vector(5, 32);
126*0b57cec5SDimitry Andric   const LLT V6S32 = LLT::vector(6, 32);
127*0b57cec5SDimitry Andric   const LLT V7S32 = LLT::vector(7, 32);
128*0b57cec5SDimitry Andric   const LLT V8S32 = LLT::vector(8, 32);
129*0b57cec5SDimitry Andric   const LLT V9S32 = LLT::vector(9, 32);
130*0b57cec5SDimitry Andric   const LLT V10S32 = LLT::vector(10, 32);
131*0b57cec5SDimitry Andric   const LLT V11S32 = LLT::vector(11, 32);
132*0b57cec5SDimitry Andric   const LLT V12S32 = LLT::vector(12, 32);
133*0b57cec5SDimitry Andric   const LLT V13S32 = LLT::vector(13, 32);
134*0b57cec5SDimitry Andric   const LLT V14S32 = LLT::vector(14, 32);
135*0b57cec5SDimitry Andric   const LLT V15S32 = LLT::vector(15, 32);
136*0b57cec5SDimitry Andric   const LLT V16S32 = LLT::vector(16, 32);
137*0b57cec5SDimitry Andric 
138*0b57cec5SDimitry Andric   const LLT V2S64 = LLT::vector(2, 64);
139*0b57cec5SDimitry Andric   const LLT V3S64 = LLT::vector(3, 64);
140*0b57cec5SDimitry Andric   const LLT V4S64 = LLT::vector(4, 64);
141*0b57cec5SDimitry Andric   const LLT V5S64 = LLT::vector(5, 64);
142*0b57cec5SDimitry Andric   const LLT V6S64 = LLT::vector(6, 64);
143*0b57cec5SDimitry Andric   const LLT V7S64 = LLT::vector(7, 64);
144*0b57cec5SDimitry Andric   const LLT V8S64 = LLT::vector(8, 64);
145*0b57cec5SDimitry Andric 
146*0b57cec5SDimitry Andric   std::initializer_list<LLT> AllS32Vectors =
147*0b57cec5SDimitry Andric     {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
148*0b57cec5SDimitry Andric      V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
149*0b57cec5SDimitry Andric   std::initializer_list<LLT> AllS64Vectors =
150*0b57cec5SDimitry Andric     {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
151*0b57cec5SDimitry Andric 
152*0b57cec5SDimitry Andric   const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
153*0b57cec5SDimitry Andric   const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
154*0b57cec5SDimitry Andric   const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
155*0b57cec5SDimitry Andric   const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
156*0b57cec5SDimitry Andric   const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
157*0b57cec5SDimitry Andric 
158*0b57cec5SDimitry Andric   const LLT CodePtr = FlatPtr;
159*0b57cec5SDimitry Andric 
160*0b57cec5SDimitry Andric   const std::initializer_list<LLT> AddrSpaces64 = {
161*0b57cec5SDimitry Andric     GlobalPtr, ConstantPtr, FlatPtr
162*0b57cec5SDimitry Andric   };
163*0b57cec5SDimitry Andric 
164*0b57cec5SDimitry Andric   const std::initializer_list<LLT> AddrSpaces32 = {
165*0b57cec5SDimitry Andric     LocalPtr, PrivatePtr
166*0b57cec5SDimitry Andric   };
167*0b57cec5SDimitry Andric 
168*0b57cec5SDimitry Andric   const std::initializer_list<LLT> FPTypesBase = {
169*0b57cec5SDimitry Andric     S32, S64
170*0b57cec5SDimitry Andric   };
171*0b57cec5SDimitry Andric 
172*0b57cec5SDimitry Andric   const std::initializer_list<LLT> FPTypes16 = {
173*0b57cec5SDimitry Andric     S32, S64, S16
174*0b57cec5SDimitry Andric   };
175*0b57cec5SDimitry Andric 
176*0b57cec5SDimitry Andric   const std::initializer_list<LLT> FPTypesPK16 = {
177*0b57cec5SDimitry Andric     S32, S64, S16, V2S16
178*0b57cec5SDimitry Andric   };
179*0b57cec5SDimitry Andric 
180*0b57cec5SDimitry Andric   setAction({G_BRCOND, S1}, Legal);
181*0b57cec5SDimitry Andric 
182*0b57cec5SDimitry Andric   // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
183*0b57cec5SDimitry Andric   // elements for v3s16
184*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_PHI)
185*0b57cec5SDimitry Andric     .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
186*0b57cec5SDimitry Andric     .legalFor(AllS32Vectors)
187*0b57cec5SDimitry Andric     .legalFor(AllS64Vectors)
188*0b57cec5SDimitry Andric     .legalFor(AddrSpaces64)
189*0b57cec5SDimitry Andric     .legalFor(AddrSpaces32)
190*0b57cec5SDimitry Andric     .clampScalar(0, S32, S256)
191*0b57cec5SDimitry Andric     .widenScalarToNextPow2(0, 32)
192*0b57cec5SDimitry Andric     .clampMaxNumElements(0, S32, 16)
193*0b57cec5SDimitry Andric     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
194*0b57cec5SDimitry Andric     .legalIf(isPointer(0));
195*0b57cec5SDimitry Andric 
196*0b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
197*0b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
198*0b57cec5SDimitry Andric       .legalFor({S32, S16})
199*0b57cec5SDimitry Andric       .clampScalar(0, S16, S32)
200*0b57cec5SDimitry Andric       .scalarize(0);
201*0b57cec5SDimitry Andric   } else {
202*0b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
203*0b57cec5SDimitry Andric       .legalFor({S32})
204*0b57cec5SDimitry Andric       .clampScalar(0, S32, S32)
205*0b57cec5SDimitry Andric       .scalarize(0);
206*0b57cec5SDimitry Andric   }
207*0b57cec5SDimitry Andric 
208*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_UMULH, G_SMULH})
209*0b57cec5SDimitry Andric     .legalFor({S32})
210*0b57cec5SDimitry Andric     .clampScalar(0, S32, S32)
211*0b57cec5SDimitry Andric     .scalarize(0);
212*0b57cec5SDimitry Andric 
213*0b57cec5SDimitry Andric   // Report legal for any types we can handle anywhere. For the cases only legal
214*0b57cec5SDimitry Andric   // on the SALU, RegBankSelect will be able to re-legalize.
215*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
216*0b57cec5SDimitry Andric     .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
217*0b57cec5SDimitry Andric     .clampScalar(0, S32, S64)
218*0b57cec5SDimitry Andric     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
219*0b57cec5SDimitry Andric     .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0))
220*0b57cec5SDimitry Andric     .widenScalarToNextPow2(0)
221*0b57cec5SDimitry Andric     .scalarize(0);
222*0b57cec5SDimitry Andric 
223*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
224*0b57cec5SDimitry Andric                                G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
225*0b57cec5SDimitry Andric     .legalFor({{S32, S1}})
226*0b57cec5SDimitry Andric     .clampScalar(0, S32, S32);
227*0b57cec5SDimitry Andric 
228*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_BITCAST)
229*0b57cec5SDimitry Andric     .legalForCartesianProduct({S32, V2S16})
230*0b57cec5SDimitry Andric     .legalForCartesianProduct({S64, V2S32, V4S16})
231*0b57cec5SDimitry Andric     .legalForCartesianProduct({V2S64, V4S32})
232*0b57cec5SDimitry Andric     // Don't worry about the size constraint.
233*0b57cec5SDimitry Andric     .legalIf(all(isPointer(0), isPointer(1)));
234*0b57cec5SDimitry Andric 
235*0b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
236*0b57cec5SDimitry Andric     getActionDefinitionsBuilder(G_FCONSTANT)
237*0b57cec5SDimitry Andric       .legalFor({S32, S64, S16})
238*0b57cec5SDimitry Andric       .clampScalar(0, S16, S64);
239*0b57cec5SDimitry Andric   } else {
240*0b57cec5SDimitry Andric     getActionDefinitionsBuilder(G_FCONSTANT)
241*0b57cec5SDimitry Andric       .legalFor({S32, S64})
242*0b57cec5SDimitry Andric       .clampScalar(0, S32, S64);
243*0b57cec5SDimitry Andric   }
244*0b57cec5SDimitry Andric 
245*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_IMPLICIT_DEF)
246*0b57cec5SDimitry Andric     .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
247*0b57cec5SDimitry Andric                ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
248*0b57cec5SDimitry Andric     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
249*0b57cec5SDimitry Andric     .clampScalarOrElt(0, S32, S512)
250*0b57cec5SDimitry Andric     .legalIf(isMultiple32(0))
251*0b57cec5SDimitry Andric     .widenScalarToNextPow2(0, 32)
252*0b57cec5SDimitry Andric     .clampMaxNumElements(0, S32, 16);
253*0b57cec5SDimitry Andric 
254*0b57cec5SDimitry Andric 
255*0b57cec5SDimitry Andric   // FIXME: i1 operands to intrinsics should always be legal, but other i1
256*0b57cec5SDimitry Andric   // values may not be legal.  We need to figure out how to distinguish
257*0b57cec5SDimitry Andric   // between these two scenarios.
258*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_CONSTANT)
259*0b57cec5SDimitry Andric     .legalFor({S1, S32, S64, GlobalPtr,
260*0b57cec5SDimitry Andric                LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
261*0b57cec5SDimitry Andric     .clampScalar(0, S32, S64)
262*0b57cec5SDimitry Andric     .widenScalarToNextPow2(0)
263*0b57cec5SDimitry Andric     .legalIf(isPointer(0));
264*0b57cec5SDimitry Andric 
265*0b57cec5SDimitry Andric   setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
266*0b57cec5SDimitry Andric 
267*0b57cec5SDimitry Andric   auto &FPOpActions = getActionDefinitionsBuilder(
268*0b57cec5SDimitry Andric     { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
269*0b57cec5SDimitry Andric     .legalFor({S32, S64});
270*0b57cec5SDimitry Andric 
271*0b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
272*0b57cec5SDimitry Andric     if (ST.hasVOP3PInsts())
273*0b57cec5SDimitry Andric       FPOpActions.legalFor({S16, V2S16});
274*0b57cec5SDimitry Andric     else
275*0b57cec5SDimitry Andric       FPOpActions.legalFor({S16});
276*0b57cec5SDimitry Andric   }
277*0b57cec5SDimitry Andric 
278*0b57cec5SDimitry Andric   auto &MinNumMaxNum = getActionDefinitionsBuilder({
279*0b57cec5SDimitry Andric       G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
280*0b57cec5SDimitry Andric 
281*0b57cec5SDimitry Andric   if (ST.hasVOP3PInsts()) {
282*0b57cec5SDimitry Andric     MinNumMaxNum.customFor(FPTypesPK16)
283*0b57cec5SDimitry Andric       .clampMaxNumElements(0, S16, 2)
284*0b57cec5SDimitry Andric       .clampScalar(0, S16, S64)
285*0b57cec5SDimitry Andric       .scalarize(0);
286*0b57cec5SDimitry Andric   } else if (ST.has16BitInsts()) {
287*0b57cec5SDimitry Andric     MinNumMaxNum.customFor(FPTypes16)
288*0b57cec5SDimitry Andric       .clampScalar(0, S16, S64)
289*0b57cec5SDimitry Andric       .scalarize(0);
290*0b57cec5SDimitry Andric   } else {
291*0b57cec5SDimitry Andric     MinNumMaxNum.customFor(FPTypesBase)
292*0b57cec5SDimitry Andric       .clampScalar(0, S32, S64)
293*0b57cec5SDimitry Andric       .scalarize(0);
294*0b57cec5SDimitry Andric   }
295*0b57cec5SDimitry Andric 
296*0b57cec5SDimitry Andric   // TODO: Implement
297*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
298*0b57cec5SDimitry Andric 
299*0b57cec5SDimitry Andric   if (ST.hasVOP3PInsts())
300*0b57cec5SDimitry Andric     FPOpActions.clampMaxNumElements(0, S16, 2);
301*0b57cec5SDimitry Andric   FPOpActions
302*0b57cec5SDimitry Andric     .scalarize(0)
303*0b57cec5SDimitry Andric     .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
304*0b57cec5SDimitry Andric 
305*0b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
306*0b57cec5SDimitry Andric     getActionDefinitionsBuilder(G_FSQRT)
307*0b57cec5SDimitry Andric       .legalFor({S32, S64, S16})
308*0b57cec5SDimitry Andric       .scalarize(0)
309*0b57cec5SDimitry Andric       .clampScalar(0, S16, S64);
310*0b57cec5SDimitry Andric   } else {
311*0b57cec5SDimitry Andric     getActionDefinitionsBuilder(G_FSQRT)
312*0b57cec5SDimitry Andric       .legalFor({S32, S64})
313*0b57cec5SDimitry Andric       .scalarize(0)
314*0b57cec5SDimitry Andric       .clampScalar(0, S32, S64);
315*0b57cec5SDimitry Andric   }
316*0b57cec5SDimitry Andric 
317*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FPTRUNC)
318*0b57cec5SDimitry Andric     .legalFor({{S32, S64}, {S16, S32}})
319*0b57cec5SDimitry Andric     .scalarize(0);
320*0b57cec5SDimitry Andric 
321*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FPEXT)
322*0b57cec5SDimitry Andric     .legalFor({{S64, S32}, {S32, S16}})
323*0b57cec5SDimitry Andric     .lowerFor({{S64, S16}}) // FIXME: Implement
324*0b57cec5SDimitry Andric     .scalarize(0);
325*0b57cec5SDimitry Andric 
326*0b57cec5SDimitry Andric   // TODO: Verify V_BFI_B32 is generated from expanded bit ops.
327*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
328*0b57cec5SDimitry Andric 
329*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FSUB)
330*0b57cec5SDimitry Andric       // Use actual fsub instruction
331*0b57cec5SDimitry Andric       .legalFor({S32})
332*0b57cec5SDimitry Andric       // Must use fadd + fneg
333*0b57cec5SDimitry Andric       .lowerFor({S64, S16, V2S16})
334*0b57cec5SDimitry Andric       .scalarize(0)
335*0b57cec5SDimitry Andric       .clampScalar(0, S32, S64);
336*0b57cec5SDimitry Andric 
337*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
338*0b57cec5SDimitry Andric     .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
339*0b57cec5SDimitry Andric                {S32, S1}, {S64, S1}, {S16, S1},
340*0b57cec5SDimitry Andric                // FIXME: Hack
341*0b57cec5SDimitry Andric                {S64, LLT::scalar(33)},
342*0b57cec5SDimitry Andric                {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
343*0b57cec5SDimitry Andric     .scalarize(0);
344*0b57cec5SDimitry Andric 
345*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
346*0b57cec5SDimitry Andric     .legalFor({{S32, S32}, {S64, S32}})
347*0b57cec5SDimitry Andric     .lowerFor({{S32, S64}})
348*0b57cec5SDimitry Andric     .customFor({{S64, S64}})
349*0b57cec5SDimitry Andric     .scalarize(0);
350*0b57cec5SDimitry Andric 
351*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
352*0b57cec5SDimitry Andric     .legalFor({{S32, S32}, {S32, S64}})
353*0b57cec5SDimitry Andric     .scalarize(0);
354*0b57cec5SDimitry Andric 
355*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
356*0b57cec5SDimitry Andric     .legalFor({S32, S64})
357*0b57cec5SDimitry Andric     .scalarize(0);
358*0b57cec5SDimitry Andric 
359*0b57cec5SDimitry Andric   if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
360*0b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
361*0b57cec5SDimitry Andric       .legalFor({S32, S64})
362*0b57cec5SDimitry Andric       .clampScalar(0, S32, S64)
363*0b57cec5SDimitry Andric       .scalarize(0);
364*0b57cec5SDimitry Andric   } else {
365*0b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
366*0b57cec5SDimitry Andric       .legalFor({S32})
367*0b57cec5SDimitry Andric       .customFor({S64})
368*0b57cec5SDimitry Andric       .clampScalar(0, S32, S64)
369*0b57cec5SDimitry Andric       .scalarize(0);
370*0b57cec5SDimitry Andric   }
371*0b57cec5SDimitry Andric 
372*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_GEP)
373*0b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces64, {S64})
374*0b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces32, {S32})
375*0b57cec5SDimitry Andric     .scalarize(0);
376*0b57cec5SDimitry Andric 
377*0b57cec5SDimitry Andric   setAction({G_BLOCK_ADDR, CodePtr}, Legal);
378*0b57cec5SDimitry Andric 
379*0b57cec5SDimitry Andric   auto &CmpBuilder =
380*0b57cec5SDimitry Andric     getActionDefinitionsBuilder(G_ICMP)
381*0b57cec5SDimitry Andric     .legalForCartesianProduct(
382*0b57cec5SDimitry Andric       {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
383*0b57cec5SDimitry Andric     .legalFor({{S1, S32}, {S1, S64}});
384*0b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
385*0b57cec5SDimitry Andric     CmpBuilder.legalFor({{S1, S16}});
386*0b57cec5SDimitry Andric   }
387*0b57cec5SDimitry Andric 
388*0b57cec5SDimitry Andric   CmpBuilder
389*0b57cec5SDimitry Andric     .widenScalarToNextPow2(1)
390*0b57cec5SDimitry Andric     .clampScalar(1, S32, S64)
391*0b57cec5SDimitry Andric     .scalarize(0)
392*0b57cec5SDimitry Andric     .legalIf(all(typeIs(0, S1), isPointer(1)));
393*0b57cec5SDimitry Andric 
394*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FCMP)
395*0b57cec5SDimitry Andric     .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
396*0b57cec5SDimitry Andric     .widenScalarToNextPow2(1)
397*0b57cec5SDimitry Andric     .clampScalar(1, S32, S64)
398*0b57cec5SDimitry Andric     .scalarize(0);
399*0b57cec5SDimitry Andric 
400*0b57cec5SDimitry Andric   // FIXME: fexp, flog2, flog10 needs to be custom lowered.
401*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
402*0b57cec5SDimitry Andric                                G_FLOG, G_FLOG2, G_FLOG10})
403*0b57cec5SDimitry Andric     .legalFor({S32})
404*0b57cec5SDimitry Andric     .scalarize(0);
405*0b57cec5SDimitry Andric 
406*0b57cec5SDimitry Andric   // The 64-bit versions produce 32-bit results, but only on the SALU.
407*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
408*0b57cec5SDimitry Andric                                G_CTTZ, G_CTTZ_ZERO_UNDEF,
409*0b57cec5SDimitry Andric                                G_CTPOP})
410*0b57cec5SDimitry Andric     .legalFor({{S32, S32}, {S32, S64}})
411*0b57cec5SDimitry Andric     .clampScalar(0, S32, S32)
412*0b57cec5SDimitry Andric     .clampScalar(1, S32, S64)
413*0b57cec5SDimitry Andric     .scalarize(0)
414*0b57cec5SDimitry Andric     .widenScalarToNextPow2(0, 32)
415*0b57cec5SDimitry Andric     .widenScalarToNextPow2(1, 32);
416*0b57cec5SDimitry Andric 
417*0b57cec5SDimitry Andric   // TODO: Expand for > s32
418*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_BSWAP)
419*0b57cec5SDimitry Andric     .legalFor({S32})
420*0b57cec5SDimitry Andric     .clampScalar(0, S32, S32)
421*0b57cec5SDimitry Andric     .scalarize(0);
422*0b57cec5SDimitry Andric 
423*0b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
424*0b57cec5SDimitry Andric     if (ST.hasVOP3PInsts()) {
425*0b57cec5SDimitry Andric       getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
426*0b57cec5SDimitry Andric         .legalFor({S32, S16, V2S16})
427*0b57cec5SDimitry Andric         .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
428*0b57cec5SDimitry Andric         .clampMaxNumElements(0, S16, 2)
429*0b57cec5SDimitry Andric         .clampScalar(0, S16, S32)
430*0b57cec5SDimitry Andric         .widenScalarToNextPow2(0)
431*0b57cec5SDimitry Andric         .scalarize(0);
432*0b57cec5SDimitry Andric     } else {
433*0b57cec5SDimitry Andric       getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
434*0b57cec5SDimitry Andric         .legalFor({S32, S16})
435*0b57cec5SDimitry Andric         .widenScalarToNextPow2(0)
436*0b57cec5SDimitry Andric         .clampScalar(0, S16, S32)
437*0b57cec5SDimitry Andric         .scalarize(0);
438*0b57cec5SDimitry Andric     }
439*0b57cec5SDimitry Andric   } else {
440*0b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
441*0b57cec5SDimitry Andric       .legalFor({S32})
442*0b57cec5SDimitry Andric       .clampScalar(0, S32, S32)
443*0b57cec5SDimitry Andric       .widenScalarToNextPow2(0)
444*0b57cec5SDimitry Andric       .scalarize(0);
445*0b57cec5SDimitry Andric   }
446*0b57cec5SDimitry Andric 
447*0b57cec5SDimitry Andric   auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
448*0b57cec5SDimitry Andric     return [=](const LegalityQuery &Query) {
449*0b57cec5SDimitry Andric       return Query.Types[TypeIdx0].getSizeInBits() <
450*0b57cec5SDimitry Andric              Query.Types[TypeIdx1].getSizeInBits();
451*0b57cec5SDimitry Andric     };
452*0b57cec5SDimitry Andric   };
453*0b57cec5SDimitry Andric 
454*0b57cec5SDimitry Andric   auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
455*0b57cec5SDimitry Andric     return [=](const LegalityQuery &Query) {
456*0b57cec5SDimitry Andric       return Query.Types[TypeIdx0].getSizeInBits() >
457*0b57cec5SDimitry Andric              Query.Types[TypeIdx1].getSizeInBits();
458*0b57cec5SDimitry Andric     };
459*0b57cec5SDimitry Andric   };
460*0b57cec5SDimitry Andric 
461*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_INTTOPTR)
462*0b57cec5SDimitry Andric     // List the common cases
463*0b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces64, {S64})
464*0b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces32, {S32})
465*0b57cec5SDimitry Andric     .scalarize(0)
466*0b57cec5SDimitry Andric     // Accept any address space as long as the size matches
467*0b57cec5SDimitry Andric     .legalIf(sameSize(0, 1))
468*0b57cec5SDimitry Andric     .widenScalarIf(smallerThan(1, 0),
469*0b57cec5SDimitry Andric       [](const LegalityQuery &Query) {
470*0b57cec5SDimitry Andric         return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
471*0b57cec5SDimitry Andric       })
472*0b57cec5SDimitry Andric     .narrowScalarIf(greaterThan(1, 0),
473*0b57cec5SDimitry Andric       [](const LegalityQuery &Query) {
474*0b57cec5SDimitry Andric         return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
475*0b57cec5SDimitry Andric       });
476*0b57cec5SDimitry Andric 
477*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_PTRTOINT)
478*0b57cec5SDimitry Andric     // List the common cases
479*0b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces64, {S64})
480*0b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces32, {S32})
481*0b57cec5SDimitry Andric     .scalarize(0)
482*0b57cec5SDimitry Andric     // Accept any address space as long as the size matches
483*0b57cec5SDimitry Andric     .legalIf(sameSize(0, 1))
484*0b57cec5SDimitry Andric     .widenScalarIf(smallerThan(0, 1),
485*0b57cec5SDimitry Andric       [](const LegalityQuery &Query) {
486*0b57cec5SDimitry Andric         return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
487*0b57cec5SDimitry Andric       })
488*0b57cec5SDimitry Andric     .narrowScalarIf(
489*0b57cec5SDimitry Andric       greaterThan(0, 1),
490*0b57cec5SDimitry Andric       [](const LegalityQuery &Query) {
491*0b57cec5SDimitry Andric         return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
492*0b57cec5SDimitry Andric       });
493*0b57cec5SDimitry Andric 
494*0b57cec5SDimitry Andric   if (ST.hasFlatAddressSpace()) {
495*0b57cec5SDimitry Andric     getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
496*0b57cec5SDimitry Andric       .scalarize(0)
497*0b57cec5SDimitry Andric       .custom();
498*0b57cec5SDimitry Andric   }
499*0b57cec5SDimitry Andric 
500*0b57cec5SDimitry Andric   // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
501*0b57cec5SDimitry Andric   // handle some operations by just promoting the register during
502*0b57cec5SDimitry Andric   // selection. There are also d16 loads on GFX9+ which preserve the high bits.
503*0b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_LOAD, G_STORE})
504*0b57cec5SDimitry Andric     .narrowScalarIf([](const LegalityQuery &Query) {
505*0b57cec5SDimitry Andric         unsigned Size = Query.Types[0].getSizeInBits();
506*0b57cec5SDimitry Andric         unsigned MemSize = Query.MMODescrs[0].SizeInBits;
507*0b57cec5SDimitry Andric         return (Size > 32 && MemSize < Size);
508*0b57cec5SDimitry Andric       },
509*0b57cec5SDimitry Andric       [](const LegalityQuery &Query) {
510*0b57cec5SDimitry Andric         return std::make_pair(0, LLT::scalar(32));
511*0b57cec5SDimitry Andric       })
512*0b57cec5SDimitry Andric     .fewerElementsIf([=](const LegalityQuery &Query) {
513*0b57cec5SDimitry Andric         unsigned MemSize = Query.MMODescrs[0].SizeInBits;
514*0b57cec5SDimitry Andric         return (MemSize == 96) &&
515*0b57cec5SDimitry Andric                Query.Types[0].isVector() &&
516*0b57cec5SDimitry Andric                !ST.hasDwordx3LoadStores();
517*0b57cec5SDimitry Andric       },
518*0b57cec5SDimitry Andric       [=](const LegalityQuery &Query) {
519*0b57cec5SDimitry Andric         return std::make_pair(0, V2S32);
520*0b57cec5SDimitry Andric       })
521*0b57cec5SDimitry Andric     .legalIf([=](const LegalityQuery &Query) {
522*0b57cec5SDimitry Andric         const LLT &Ty0 = Query.Types[0];
523*0b57cec5SDimitry Andric 
524*0b57cec5SDimitry Andric         unsigned Size = Ty0.getSizeInBits();
525*0b57cec5SDimitry Andric         unsigned MemSize = Query.MMODescrs[0].SizeInBits;
526*0b57cec5SDimitry Andric         if (Size < 32 || (Size > 32 && MemSize < Size))
527*0b57cec5SDimitry Andric           return false;
528*0b57cec5SDimitry Andric 
529*0b57cec5SDimitry Andric         if (Ty0.isVector() && Size != MemSize)
530*0b57cec5SDimitry Andric           return false;
531*0b57cec5SDimitry Andric 
532*0b57cec5SDimitry Andric         // TODO: Decompose private loads into 4-byte components.
533*0b57cec5SDimitry Andric         // TODO: Illegal flat loads on SI
534*0b57cec5SDimitry Andric         switch (MemSize) {
535*0b57cec5SDimitry Andric         case 8:
536*0b57cec5SDimitry Andric         case 16:
537*0b57cec5SDimitry Andric           return Size == 32;
538*0b57cec5SDimitry Andric         case 32:
539*0b57cec5SDimitry Andric         case 64:
540*0b57cec5SDimitry Andric         case 128:
541*0b57cec5SDimitry Andric           return true;
542*0b57cec5SDimitry Andric 
543*0b57cec5SDimitry Andric         case 96:
544*0b57cec5SDimitry Andric           return ST.hasDwordx3LoadStores();
545*0b57cec5SDimitry Andric 
546*0b57cec5SDimitry Andric         case 256:
547*0b57cec5SDimitry Andric         case 512:
548*0b57cec5SDimitry Andric           // TODO: Possibly support loads of i256 and i512 .  This will require
549*0b57cec5SDimitry Andric           // adding i256 and i512 types to MVT in order for to be able to use
550*0b57cec5SDimitry Andric           // TableGen.
551*0b57cec5SDimitry Andric           // TODO: Add support for other vector types, this will require
552*0b57cec5SDimitry Andric           //       defining more value mappings for the new types.
553*0b57cec5SDimitry Andric           return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 ||
554*0b57cec5SDimitry Andric                                     Ty0.getScalarType().getSizeInBits() == 64);
555*0b57cec5SDimitry Andric 
556*0b57cec5SDimitry Andric         default:
557*0b57cec5SDimitry Andric           return false;
558*0b57cec5SDimitry Andric         }
559*0b57cec5SDimitry Andric       })
560*0b57cec5SDimitry Andric     .clampScalar(0, S32, S64);
561*0b57cec5SDimitry Andric 
562*0b57cec5SDimitry Andric 
563*0b57cec5SDimitry Andric   // FIXME: Handle alignment requirements.
564*0b57cec5SDimitry Andric   auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
565*0b57cec5SDimitry Andric     .legalForTypesWithMemDesc({
566*0b57cec5SDimitry Andric         {S32, GlobalPtr, 8, 8},
567*0b57cec5SDimitry Andric         {S32, GlobalPtr, 16, 8},
568*0b57cec5SDimitry Andric         {S32, LocalPtr, 8, 8},
569*0b57cec5SDimitry Andric         {S32, LocalPtr, 16, 8},
570*0b57cec5SDimitry Andric         {S32, PrivatePtr, 8, 8},
571*0b57cec5SDimitry Andric         {S32, PrivatePtr, 16, 8}});
572*0b57cec5SDimitry Andric   if (ST.hasFlatAddressSpace()) {
573*0b57cec5SDimitry Andric     ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
574*0b57cec5SDimitry Andric                                        {S32, FlatPtr, 16, 8}});
575*0b57cec5SDimitry Andric   }
576*0b57cec5SDimitry Andric 
577*0b57cec5SDimitry Andric   ExtLoads.clampScalar(0, S32, S32)
578*0b57cec5SDimitry Andric           .widenScalarToNextPow2(0)
579*0b57cec5SDimitry Andric           .unsupportedIfMemSizeNotPow2()
580*0b57cec5SDimitry Andric           .lower();
581*0b57cec5SDimitry Andric 
582*0b57cec5SDimitry Andric   auto &Atomics = getActionDefinitionsBuilder(
583*0b57cec5SDimitry Andric     {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
584*0b57cec5SDimitry Andric      G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
585*0b57cec5SDimitry Andric      G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
586*0b57cec5SDimitry Andric      G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
587*0b57cec5SDimitry Andric     .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
588*0b57cec5SDimitry Andric                {S64, GlobalPtr}, {S64, LocalPtr}});
589*0b57cec5SDimitry Andric   if (ST.hasFlatAddressSpace()) {
590*0b57cec5SDimitry Andric     Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
591*0b57cec5SDimitry Andric   }
592*0b57cec5SDimitry Andric 
593*0b57cec5SDimitry Andric   // TODO: Pointer types, any 32-bit or 64-bit vector
594*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_SELECT)
595*0b57cec5SDimitry Andric     .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
596*0b57cec5SDimitry Andric           GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
597*0b57cec5SDimitry Andric           LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
598*0b57cec5SDimitry Andric     .clampScalar(0, S16, S64)
599*0b57cec5SDimitry Andric     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
600*0b57cec5SDimitry Andric     .fewerElementsIf(numElementsNotEven(0), scalarize(0))
601*0b57cec5SDimitry Andric     .scalarize(1)
602*0b57cec5SDimitry Andric     .clampMaxNumElements(0, S32, 2)
603*0b57cec5SDimitry Andric     .clampMaxNumElements(0, LocalPtr, 2)
604*0b57cec5SDimitry Andric     .clampMaxNumElements(0, PrivatePtr, 2)
605*0b57cec5SDimitry Andric     .scalarize(0)
606*0b57cec5SDimitry Andric     .widenScalarToNextPow2(0)
607*0b57cec5SDimitry Andric     .legalIf(all(isPointer(0), typeIs(1, S1)));
608*0b57cec5SDimitry Andric 
609*0b57cec5SDimitry Andric   // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
610*0b57cec5SDimitry Andric   // be more flexible with the shift amount type.
611*0b57cec5SDimitry Andric   auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
612*0b57cec5SDimitry Andric     .legalFor({{S32, S32}, {S64, S32}});
613*0b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
614*0b57cec5SDimitry Andric     if (ST.hasVOP3PInsts()) {
615*0b57cec5SDimitry Andric       Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
616*0b57cec5SDimitry Andric             .clampMaxNumElements(0, S16, 2);
617*0b57cec5SDimitry Andric     } else
618*0b57cec5SDimitry Andric       Shifts.legalFor({{S16, S32}, {S16, S16}});
619*0b57cec5SDimitry Andric 
620*0b57cec5SDimitry Andric     Shifts.clampScalar(1, S16, S32);
621*0b57cec5SDimitry Andric     Shifts.clampScalar(0, S16, S64);
622*0b57cec5SDimitry Andric     Shifts.widenScalarToNextPow2(0, 16);
623*0b57cec5SDimitry Andric   } else {
624*0b57cec5SDimitry Andric     // Make sure we legalize the shift amount type first, as the general
625*0b57cec5SDimitry Andric     // expansion for the shifted type will produce much worse code if it hasn't
626*0b57cec5SDimitry Andric     // been truncated already.
627*0b57cec5SDimitry Andric     Shifts.clampScalar(1, S32, S32);
628*0b57cec5SDimitry Andric     Shifts.clampScalar(0, S32, S64);
629*0b57cec5SDimitry Andric     Shifts.widenScalarToNextPow2(0, 32);
630*0b57cec5SDimitry Andric   }
631*0b57cec5SDimitry Andric   Shifts.scalarize(0);
632*0b57cec5SDimitry Andric 
633*0b57cec5SDimitry Andric   for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
634*0b57cec5SDimitry Andric     unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
635*0b57cec5SDimitry Andric     unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
636*0b57cec5SDimitry Andric     unsigned IdxTypeIdx = 2;
637*0b57cec5SDimitry Andric 
638*0b57cec5SDimitry Andric     getActionDefinitionsBuilder(Op)
639*0b57cec5SDimitry Andric       .customIf([=](const LegalityQuery &Query) {
640*0b57cec5SDimitry Andric           const LLT EltTy = Query.Types[EltTypeIdx];
641*0b57cec5SDimitry Andric           const LLT VecTy = Query.Types[VecTypeIdx];
642*0b57cec5SDimitry Andric           const LLT IdxTy = Query.Types[IdxTypeIdx];
643*0b57cec5SDimitry Andric           return (EltTy.getSizeInBits() == 16 ||
644*0b57cec5SDimitry Andric                   EltTy.getSizeInBits() % 32 == 0) &&
645*0b57cec5SDimitry Andric                  VecTy.getSizeInBits() % 32 == 0 &&
646*0b57cec5SDimitry Andric                  VecTy.getSizeInBits() <= 512 &&
647*0b57cec5SDimitry Andric                  IdxTy.getSizeInBits() == 32;
648*0b57cec5SDimitry Andric         })
649*0b57cec5SDimitry Andric       .clampScalar(EltTypeIdx, S32, S64)
650*0b57cec5SDimitry Andric       .clampScalar(VecTypeIdx, S32, S64)
651*0b57cec5SDimitry Andric       .clampScalar(IdxTypeIdx, S32, S32);
652*0b57cec5SDimitry Andric   }
653*0b57cec5SDimitry Andric 
654*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
655*0b57cec5SDimitry Andric     .unsupportedIf([=](const LegalityQuery &Query) {
656*0b57cec5SDimitry Andric         const LLT &EltTy = Query.Types[1].getElementType();
657*0b57cec5SDimitry Andric         return Query.Types[0] != EltTy;
658*0b57cec5SDimitry Andric       });
659*0b57cec5SDimitry Andric 
660*0b57cec5SDimitry Andric   for (unsigned Op : {G_EXTRACT, G_INSERT}) {
661*0b57cec5SDimitry Andric     unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
662*0b57cec5SDimitry Andric     unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
663*0b57cec5SDimitry Andric 
664*0b57cec5SDimitry Andric     // FIXME: Doesn't handle extract of illegal sizes.
665*0b57cec5SDimitry Andric     getActionDefinitionsBuilder(Op)
666*0b57cec5SDimitry Andric       .legalIf([=](const LegalityQuery &Query) {
667*0b57cec5SDimitry Andric           const LLT BigTy = Query.Types[BigTyIdx];
668*0b57cec5SDimitry Andric           const LLT LitTy = Query.Types[LitTyIdx];
669*0b57cec5SDimitry Andric           return (BigTy.getSizeInBits() % 32 == 0) &&
670*0b57cec5SDimitry Andric                  (LitTy.getSizeInBits() % 16 == 0);
671*0b57cec5SDimitry Andric         })
672*0b57cec5SDimitry Andric       .widenScalarIf(
673*0b57cec5SDimitry Andric         [=](const LegalityQuery &Query) {
674*0b57cec5SDimitry Andric           const LLT BigTy = Query.Types[BigTyIdx];
675*0b57cec5SDimitry Andric           return (BigTy.getScalarSizeInBits() < 16);
676*0b57cec5SDimitry Andric         },
677*0b57cec5SDimitry Andric         LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16))
678*0b57cec5SDimitry Andric       .widenScalarIf(
679*0b57cec5SDimitry Andric         [=](const LegalityQuery &Query) {
680*0b57cec5SDimitry Andric           const LLT LitTy = Query.Types[LitTyIdx];
681*0b57cec5SDimitry Andric           return (LitTy.getScalarSizeInBits() < 16);
682*0b57cec5SDimitry Andric         },
683*0b57cec5SDimitry Andric         LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16))
684*0b57cec5SDimitry Andric       .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
685*0b57cec5SDimitry Andric       .widenScalarToNextPow2(BigTyIdx, 32);
686*0b57cec5SDimitry Andric 
687*0b57cec5SDimitry Andric   }
688*0b57cec5SDimitry Andric 
689*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_BUILD_VECTOR)
690*0b57cec5SDimitry Andric       .legalForCartesianProduct(AllS32Vectors, {S32})
691*0b57cec5SDimitry Andric       .legalForCartesianProduct(AllS64Vectors, {S64})
692*0b57cec5SDimitry Andric       .clampNumElements(0, V16S32, V16S32)
693*0b57cec5SDimitry Andric       .clampNumElements(0, V2S64, V8S64)
694*0b57cec5SDimitry Andric       .minScalarSameAs(1, 0)
695*0b57cec5SDimitry Andric       .legalIf(isRegisterType(0))
696*0b57cec5SDimitry Andric       .minScalarOrElt(0, S32);
697*0b57cec5SDimitry Andric 
698*0b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
699*0b57cec5SDimitry Andric     .legalIf(isRegisterType(0));
700*0b57cec5SDimitry Andric 
701*0b57cec5SDimitry Andric   // Merge/Unmerge
702*0b57cec5SDimitry Andric   for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
703*0b57cec5SDimitry Andric     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
704*0b57cec5SDimitry Andric     unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
705*0b57cec5SDimitry Andric 
706*0b57cec5SDimitry Andric     auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
707*0b57cec5SDimitry Andric       const LLT &Ty = Query.Types[TypeIdx];
708*0b57cec5SDimitry Andric       if (Ty.isVector()) {
709*0b57cec5SDimitry Andric         const LLT &EltTy = Ty.getElementType();
710*0b57cec5SDimitry Andric         if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
711*0b57cec5SDimitry Andric           return true;
712*0b57cec5SDimitry Andric         if (!isPowerOf2_32(EltTy.getSizeInBits()))
713*0b57cec5SDimitry Andric           return true;
714*0b57cec5SDimitry Andric       }
715*0b57cec5SDimitry Andric       return false;
716*0b57cec5SDimitry Andric     };
717*0b57cec5SDimitry Andric 
718*0b57cec5SDimitry Andric     getActionDefinitionsBuilder(Op)
719*0b57cec5SDimitry Andric       .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
720*0b57cec5SDimitry Andric       // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
721*0b57cec5SDimitry Andric       // worth considering the multiples of 64 since 2*192 and 2*384 are not
722*0b57cec5SDimitry Andric       // valid.
723*0b57cec5SDimitry Andric       .clampScalar(LitTyIdx, S16, S256)
724*0b57cec5SDimitry Andric       .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
725*0b57cec5SDimitry Andric 
726*0b57cec5SDimitry Andric       // Break up vectors with weird elements into scalars
727*0b57cec5SDimitry Andric       .fewerElementsIf(
728*0b57cec5SDimitry Andric         [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
729*0b57cec5SDimitry Andric         scalarize(0))
730*0b57cec5SDimitry Andric       .fewerElementsIf(
731*0b57cec5SDimitry Andric         [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
732*0b57cec5SDimitry Andric         scalarize(1))
733*0b57cec5SDimitry Andric       .clampScalar(BigTyIdx, S32, S512)
734*0b57cec5SDimitry Andric       .widenScalarIf(
735*0b57cec5SDimitry Andric         [=](const LegalityQuery &Query) {
736*0b57cec5SDimitry Andric           const LLT &Ty = Query.Types[BigTyIdx];
737*0b57cec5SDimitry Andric           return !isPowerOf2_32(Ty.getSizeInBits()) &&
738*0b57cec5SDimitry Andric                  Ty.getSizeInBits() % 16 != 0;
739*0b57cec5SDimitry Andric         },
740*0b57cec5SDimitry Andric         [=](const LegalityQuery &Query) {
741*0b57cec5SDimitry Andric           // Pick the next power of 2, or a multiple of 64 over 128.
742*0b57cec5SDimitry Andric           // Whichever is smaller.
743*0b57cec5SDimitry Andric           const LLT &Ty = Query.Types[BigTyIdx];
744*0b57cec5SDimitry Andric           unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
745*0b57cec5SDimitry Andric           if (NewSizeInBits >= 256) {
746*0b57cec5SDimitry Andric             unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
747*0b57cec5SDimitry Andric             if (RoundedTo < NewSizeInBits)
748*0b57cec5SDimitry Andric               NewSizeInBits = RoundedTo;
749*0b57cec5SDimitry Andric           }
750*0b57cec5SDimitry Andric           return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
751*0b57cec5SDimitry Andric         })
752*0b57cec5SDimitry Andric       .legalIf([=](const LegalityQuery &Query) {
753*0b57cec5SDimitry Andric           const LLT &BigTy = Query.Types[BigTyIdx];
754*0b57cec5SDimitry Andric           const LLT &LitTy = Query.Types[LitTyIdx];
755*0b57cec5SDimitry Andric 
756*0b57cec5SDimitry Andric           if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
757*0b57cec5SDimitry Andric             return false;
758*0b57cec5SDimitry Andric           if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
759*0b57cec5SDimitry Andric             return false;
760*0b57cec5SDimitry Andric 
761*0b57cec5SDimitry Andric           return BigTy.getSizeInBits() % 16 == 0 &&
762*0b57cec5SDimitry Andric                  LitTy.getSizeInBits() % 16 == 0 &&
763*0b57cec5SDimitry Andric                  BigTy.getSizeInBits() <= 512;
764*0b57cec5SDimitry Andric         })
765*0b57cec5SDimitry Andric       // Any vectors left are the wrong size. Scalarize them.
766*0b57cec5SDimitry Andric       .scalarize(0)
767*0b57cec5SDimitry Andric       .scalarize(1);
768*0b57cec5SDimitry Andric   }
769*0b57cec5SDimitry Andric 
770*0b57cec5SDimitry Andric   computeTables();
771*0b57cec5SDimitry Andric   verify(*ST.getInstrInfo());
772*0b57cec5SDimitry Andric }
773*0b57cec5SDimitry Andric 
774*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
775*0b57cec5SDimitry Andric                                          MachineRegisterInfo &MRI,
776*0b57cec5SDimitry Andric                                          MachineIRBuilder &MIRBuilder,
777*0b57cec5SDimitry Andric                                          GISelChangeObserver &Observer) const {
778*0b57cec5SDimitry Andric   switch (MI.getOpcode()) {
779*0b57cec5SDimitry Andric   case TargetOpcode::G_ADDRSPACE_CAST:
780*0b57cec5SDimitry Andric     return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
781*0b57cec5SDimitry Andric   case TargetOpcode::G_FRINT:
782*0b57cec5SDimitry Andric     return legalizeFrint(MI, MRI, MIRBuilder);
783*0b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
784*0b57cec5SDimitry Andric     return legalizeFceil(MI, MRI, MIRBuilder);
785*0b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC_TRUNC:
786*0b57cec5SDimitry Andric     return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder);
787*0b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
788*0b57cec5SDimitry Andric     return legalizeITOFP(MI, MRI, MIRBuilder, true);
789*0b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP:
790*0b57cec5SDimitry Andric     return legalizeITOFP(MI, MRI, MIRBuilder, false);
791*0b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM:
792*0b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM:
793*0b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM_IEEE:
794*0b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM_IEEE:
795*0b57cec5SDimitry Andric     return legalizeMinNumMaxNum(MI, MRI, MIRBuilder);
796*0b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
797*0b57cec5SDimitry Andric     return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
798*0b57cec5SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT:
799*0b57cec5SDimitry Andric     return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
800*0b57cec5SDimitry Andric   default:
801*0b57cec5SDimitry Andric     return false;
802*0b57cec5SDimitry Andric   }
803*0b57cec5SDimitry Andric 
804*0b57cec5SDimitry Andric   llvm_unreachable("expected switch to return");
805*0b57cec5SDimitry Andric }
806*0b57cec5SDimitry Andric 
807*0b57cec5SDimitry Andric Register AMDGPULegalizerInfo::getSegmentAperture(
808*0b57cec5SDimitry Andric   unsigned AS,
809*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI,
810*0b57cec5SDimitry Andric   MachineIRBuilder &MIRBuilder) const {
811*0b57cec5SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
812*0b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
813*0b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
814*0b57cec5SDimitry Andric 
815*0b57cec5SDimitry Andric   if (ST.hasApertureRegs()) {
816*0b57cec5SDimitry Andric     // FIXME: Use inline constants (src_{shared, private}_base) instead of
817*0b57cec5SDimitry Andric     // getreg.
818*0b57cec5SDimitry Andric     unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
819*0b57cec5SDimitry Andric         AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
820*0b57cec5SDimitry Andric         AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
821*0b57cec5SDimitry Andric     unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
822*0b57cec5SDimitry Andric         AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
823*0b57cec5SDimitry Andric         AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
824*0b57cec5SDimitry Andric     unsigned Encoding =
825*0b57cec5SDimitry Andric         AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
826*0b57cec5SDimitry Andric         Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
827*0b57cec5SDimitry Andric         WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
828*0b57cec5SDimitry Andric 
829*0b57cec5SDimitry Andric     Register ApertureReg = MRI.createGenericVirtualRegister(S32);
830*0b57cec5SDimitry Andric     Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
831*0b57cec5SDimitry Andric 
832*0b57cec5SDimitry Andric     MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
833*0b57cec5SDimitry Andric       .addDef(GetReg)
834*0b57cec5SDimitry Andric       .addImm(Encoding);
835*0b57cec5SDimitry Andric     MRI.setType(GetReg, S32);
836*0b57cec5SDimitry Andric 
837*0b57cec5SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1);
838*0b57cec5SDimitry Andric     MIRBuilder.buildInstr(TargetOpcode::G_SHL)
839*0b57cec5SDimitry Andric       .addDef(ApertureReg)
840*0b57cec5SDimitry Andric       .addUse(GetReg)
841*0b57cec5SDimitry Andric       .addUse(ShiftAmt.getReg(0));
842*0b57cec5SDimitry Andric 
843*0b57cec5SDimitry Andric     return ApertureReg;
844*0b57cec5SDimitry Andric   }
845*0b57cec5SDimitry Andric 
846*0b57cec5SDimitry Andric   Register QueuePtr = MRI.createGenericVirtualRegister(
847*0b57cec5SDimitry Andric     LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
848*0b57cec5SDimitry Andric 
849*0b57cec5SDimitry Andric   // FIXME: Placeholder until we can track the input registers.
850*0b57cec5SDimitry Andric   MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
851*0b57cec5SDimitry Andric 
852*0b57cec5SDimitry Andric   // Offset into amd_queue_t for group_segment_aperture_base_hi /
853*0b57cec5SDimitry Andric   // private_segment_aperture_base_hi.
854*0b57cec5SDimitry Andric   uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
855*0b57cec5SDimitry Andric 
856*0b57cec5SDimitry Andric   // FIXME: Don't use undef
857*0b57cec5SDimitry Andric   Value *V = UndefValue::get(PointerType::get(
858*0b57cec5SDimitry Andric                                Type::getInt8Ty(MF.getFunction().getContext()),
859*0b57cec5SDimitry Andric                                AMDGPUAS::CONSTANT_ADDRESS));
860*0b57cec5SDimitry Andric 
861*0b57cec5SDimitry Andric   MachinePointerInfo PtrInfo(V, StructOffset);
862*0b57cec5SDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
863*0b57cec5SDimitry Andric     PtrInfo,
864*0b57cec5SDimitry Andric     MachineMemOperand::MOLoad |
865*0b57cec5SDimitry Andric     MachineMemOperand::MODereferenceable |
866*0b57cec5SDimitry Andric     MachineMemOperand::MOInvariant,
867*0b57cec5SDimitry Andric     4,
868*0b57cec5SDimitry Andric     MinAlign(64, StructOffset));
869*0b57cec5SDimitry Andric 
870*0b57cec5SDimitry Andric   Register LoadResult = MRI.createGenericVirtualRegister(S32);
871*0b57cec5SDimitry Andric   Register LoadAddr;
872*0b57cec5SDimitry Andric 
873*0b57cec5SDimitry Andric   MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
874*0b57cec5SDimitry Andric   MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
875*0b57cec5SDimitry Andric   return LoadResult;
876*0b57cec5SDimitry Andric }
877*0b57cec5SDimitry Andric 
878*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
879*0b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
880*0b57cec5SDimitry Andric   MachineIRBuilder &MIRBuilder) const {
881*0b57cec5SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
882*0b57cec5SDimitry Andric 
883*0b57cec5SDimitry Andric   MIRBuilder.setInstr(MI);
884*0b57cec5SDimitry Andric 
885*0b57cec5SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
886*0b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
887*0b57cec5SDimitry Andric 
888*0b57cec5SDimitry Andric   LLT DstTy = MRI.getType(Dst);
889*0b57cec5SDimitry Andric   LLT SrcTy = MRI.getType(Src);
890*0b57cec5SDimitry Andric   unsigned DestAS = DstTy.getAddressSpace();
891*0b57cec5SDimitry Andric   unsigned SrcAS = SrcTy.getAddressSpace();
892*0b57cec5SDimitry Andric 
893*0b57cec5SDimitry Andric   // TODO: Avoid reloading from the queue ptr for each cast, or at least each
894*0b57cec5SDimitry Andric   // vector element.
895*0b57cec5SDimitry Andric   assert(!DstTy.isVector());
896*0b57cec5SDimitry Andric 
897*0b57cec5SDimitry Andric   const AMDGPUTargetMachine &TM
898*0b57cec5SDimitry Andric     = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
899*0b57cec5SDimitry Andric 
900*0b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
901*0b57cec5SDimitry Andric   if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
902*0b57cec5SDimitry Andric     MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST));
903*0b57cec5SDimitry Andric     return true;
904*0b57cec5SDimitry Andric   }
905*0b57cec5SDimitry Andric 
906*0b57cec5SDimitry Andric   if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
907*0b57cec5SDimitry Andric     assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
908*0b57cec5SDimitry Andric            DestAS == AMDGPUAS::PRIVATE_ADDRESS);
909*0b57cec5SDimitry Andric     unsigned NullVal = TM.getNullPointerValue(DestAS);
910*0b57cec5SDimitry Andric 
911*0b57cec5SDimitry Andric     auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal);
912*0b57cec5SDimitry Andric     auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0);
913*0b57cec5SDimitry Andric 
914*0b57cec5SDimitry Andric     Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
915*0b57cec5SDimitry Andric 
916*0b57cec5SDimitry Andric     // Extract low 32-bits of the pointer.
917*0b57cec5SDimitry Andric     MIRBuilder.buildExtract(PtrLo32, Src, 0);
918*0b57cec5SDimitry Andric 
919*0b57cec5SDimitry Andric     Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
920*0b57cec5SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
921*0b57cec5SDimitry Andric     MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
922*0b57cec5SDimitry Andric 
923*0b57cec5SDimitry Andric     MI.eraseFromParent();
924*0b57cec5SDimitry Andric     return true;
925*0b57cec5SDimitry Andric   }
926*0b57cec5SDimitry Andric 
927*0b57cec5SDimitry Andric   assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
928*0b57cec5SDimitry Andric          SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
929*0b57cec5SDimitry Andric 
930*0b57cec5SDimitry Andric   auto SegmentNull =
931*0b57cec5SDimitry Andric       MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
932*0b57cec5SDimitry Andric   auto FlatNull =
933*0b57cec5SDimitry Andric       MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
934*0b57cec5SDimitry Andric 
935*0b57cec5SDimitry Andric   Register ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
936*0b57cec5SDimitry Andric 
937*0b57cec5SDimitry Andric   Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
938*0b57cec5SDimitry Andric   MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
939*0b57cec5SDimitry Andric 
940*0b57cec5SDimitry Andric   Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
941*0b57cec5SDimitry Andric 
942*0b57cec5SDimitry Andric   // Coerce the type of the low half of the result so we can use merge_values.
943*0b57cec5SDimitry Andric   Register SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32));
944*0b57cec5SDimitry Andric   MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
945*0b57cec5SDimitry Andric     .addDef(SrcAsInt)
946*0b57cec5SDimitry Andric     .addUse(Src);
947*0b57cec5SDimitry Andric 
948*0b57cec5SDimitry Andric   // TODO: Should we allow mismatched types but matching sizes in merges to
949*0b57cec5SDimitry Andric   // avoid the ptrtoint?
950*0b57cec5SDimitry Andric   MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
951*0b57cec5SDimitry Andric   MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
952*0b57cec5SDimitry Andric 
953*0b57cec5SDimitry Andric   MI.eraseFromParent();
954*0b57cec5SDimitry Andric   return true;
955*0b57cec5SDimitry Andric }
956*0b57cec5SDimitry Andric 
957*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeFrint(
958*0b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
959*0b57cec5SDimitry Andric   MachineIRBuilder &MIRBuilder) const {
960*0b57cec5SDimitry Andric   MIRBuilder.setInstr(MI);
961*0b57cec5SDimitry Andric 
962*0b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
963*0b57cec5SDimitry Andric   LLT Ty = MRI.getType(Src);
964*0b57cec5SDimitry Andric   assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
965*0b57cec5SDimitry Andric 
966*0b57cec5SDimitry Andric   APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
967*0b57cec5SDimitry Andric   APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
968*0b57cec5SDimitry Andric 
969*0b57cec5SDimitry Andric   auto C1 = MIRBuilder.buildFConstant(Ty, C1Val);
970*0b57cec5SDimitry Andric   auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src);
971*0b57cec5SDimitry Andric 
972*0b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
973*0b57cec5SDimitry Andric   auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign);
974*0b57cec5SDimitry Andric   auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign);
975*0b57cec5SDimitry Andric 
976*0b57cec5SDimitry Andric   auto C2 = MIRBuilder.buildFConstant(Ty, C2Val);
977*0b57cec5SDimitry Andric   auto Fabs = MIRBuilder.buildFAbs(Ty, Src);
978*0b57cec5SDimitry Andric 
979*0b57cec5SDimitry Andric   auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
980*0b57cec5SDimitry Andric   MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
981*0b57cec5SDimitry Andric   return true;
982*0b57cec5SDimitry Andric }
983*0b57cec5SDimitry Andric 
984*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeFceil(
985*0b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
986*0b57cec5SDimitry Andric   MachineIRBuilder &B) const {
987*0b57cec5SDimitry Andric   B.setInstr(MI);
988*0b57cec5SDimitry Andric 
989*0b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
990*0b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
991*0b57cec5SDimitry Andric 
992*0b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
993*0b57cec5SDimitry Andric   assert(MRI.getType(Src) == S64);
994*0b57cec5SDimitry Andric 
995*0b57cec5SDimitry Andric   // result = trunc(src)
996*0b57cec5SDimitry Andric   // if (src > 0.0 && src != result)
997*0b57cec5SDimitry Andric   //   result += 1.0
998*0b57cec5SDimitry Andric 
999*0b57cec5SDimitry Andric   auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
1000*0b57cec5SDimitry Andric 
1001*0b57cec5SDimitry Andric   const auto Zero = B.buildFConstant(S64, 0.0);
1002*0b57cec5SDimitry Andric   const auto One = B.buildFConstant(S64, 1.0);
1003*0b57cec5SDimitry Andric   auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero);
1004*0b57cec5SDimitry Andric   auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc);
1005*0b57cec5SDimitry Andric   auto And = B.buildAnd(S1, Lt0, NeTrunc);
1006*0b57cec5SDimitry Andric   auto Add = B.buildSelect(S64, And, One, Zero);
1007*0b57cec5SDimitry Andric 
1008*0b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
1009*0b57cec5SDimitry Andric   B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
1010*0b57cec5SDimitry Andric   return true;
1011*0b57cec5SDimitry Andric }
1012*0b57cec5SDimitry Andric 
1013*0b57cec5SDimitry Andric static MachineInstrBuilder extractF64Exponent(unsigned Hi,
1014*0b57cec5SDimitry Andric                                               MachineIRBuilder &B) {
1015*0b57cec5SDimitry Andric   const unsigned FractBits = 52;
1016*0b57cec5SDimitry Andric   const unsigned ExpBits = 11;
1017*0b57cec5SDimitry Andric   LLT S32 = LLT::scalar(32);
1018*0b57cec5SDimitry Andric 
1019*0b57cec5SDimitry Andric   auto Const0 = B.buildConstant(S32, FractBits - 32);
1020*0b57cec5SDimitry Andric   auto Const1 = B.buildConstant(S32, ExpBits);
1021*0b57cec5SDimitry Andric 
1022*0b57cec5SDimitry Andric   auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
1023*0b57cec5SDimitry Andric     .addUse(Const0.getReg(0))
1024*0b57cec5SDimitry Andric     .addUse(Const1.getReg(0));
1025*0b57cec5SDimitry Andric 
1026*0b57cec5SDimitry Andric   return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
1027*0b57cec5SDimitry Andric }
1028*0b57cec5SDimitry Andric 
1029*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
1030*0b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1031*0b57cec5SDimitry Andric   MachineIRBuilder &B) const {
1032*0b57cec5SDimitry Andric   B.setInstr(MI);
1033*0b57cec5SDimitry Andric 
1034*0b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
1035*0b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
1036*0b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
1037*0b57cec5SDimitry Andric 
1038*0b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
1039*0b57cec5SDimitry Andric   assert(MRI.getType(Src) == S64);
1040*0b57cec5SDimitry Andric 
1041*0b57cec5SDimitry Andric   // TODO: Should this use extract since the low half is unused?
1042*0b57cec5SDimitry Andric   auto Unmerge = B.buildUnmerge({S32, S32}, Src);
1043*0b57cec5SDimitry Andric   Register Hi = Unmerge.getReg(1);
1044*0b57cec5SDimitry Andric 
1045*0b57cec5SDimitry Andric   // Extract the upper half, since this is where we will find the sign and
1046*0b57cec5SDimitry Andric   // exponent.
1047*0b57cec5SDimitry Andric   auto Exp = extractF64Exponent(Hi, B);
1048*0b57cec5SDimitry Andric 
1049*0b57cec5SDimitry Andric   const unsigned FractBits = 52;
1050*0b57cec5SDimitry Andric 
1051*0b57cec5SDimitry Andric   // Extract the sign bit.
1052*0b57cec5SDimitry Andric   const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
1053*0b57cec5SDimitry Andric   auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
1054*0b57cec5SDimitry Andric 
1055*0b57cec5SDimitry Andric   const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
1056*0b57cec5SDimitry Andric 
1057*0b57cec5SDimitry Andric   const auto Zero32 = B.buildConstant(S32, 0);
1058*0b57cec5SDimitry Andric 
1059*0b57cec5SDimitry Andric   // Extend back to 64-bits.
1060*0b57cec5SDimitry Andric   auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
1061*0b57cec5SDimitry Andric 
1062*0b57cec5SDimitry Andric   auto Shr = B.buildAShr(S64, FractMask, Exp);
1063*0b57cec5SDimitry Andric   auto Not = B.buildNot(S64, Shr);
1064*0b57cec5SDimitry Andric   auto Tmp0 = B.buildAnd(S64, Src, Not);
1065*0b57cec5SDimitry Andric   auto FiftyOne = B.buildConstant(S32, FractBits - 1);
1066*0b57cec5SDimitry Andric 
1067*0b57cec5SDimitry Andric   auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32);
1068*0b57cec5SDimitry Andric   auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne);
1069*0b57cec5SDimitry Andric 
1070*0b57cec5SDimitry Andric   auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
1071*0b57cec5SDimitry Andric   B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
1072*0b57cec5SDimitry Andric   return true;
1073*0b57cec5SDimitry Andric }
1074*0b57cec5SDimitry Andric 
1075*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeITOFP(
1076*0b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1077*0b57cec5SDimitry Andric   MachineIRBuilder &B, bool Signed) const {
1078*0b57cec5SDimitry Andric   B.setInstr(MI);
1079*0b57cec5SDimitry Andric 
1080*0b57cec5SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
1081*0b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
1082*0b57cec5SDimitry Andric 
1083*0b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
1084*0b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
1085*0b57cec5SDimitry Andric 
1086*0b57cec5SDimitry Andric   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
1087*0b57cec5SDimitry Andric 
1088*0b57cec5SDimitry Andric   auto Unmerge = B.buildUnmerge({S32, S32}, Src);
1089*0b57cec5SDimitry Andric 
1090*0b57cec5SDimitry Andric   auto CvtHi = Signed ?
1091*0b57cec5SDimitry Andric     B.buildSITOFP(S64, Unmerge.getReg(1)) :
1092*0b57cec5SDimitry Andric     B.buildUITOFP(S64, Unmerge.getReg(1));
1093*0b57cec5SDimitry Andric 
1094*0b57cec5SDimitry Andric   auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
1095*0b57cec5SDimitry Andric 
1096*0b57cec5SDimitry Andric   auto ThirtyTwo = B.buildConstant(S32, 32);
1097*0b57cec5SDimitry Andric   auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
1098*0b57cec5SDimitry Andric     .addUse(CvtHi.getReg(0))
1099*0b57cec5SDimitry Andric     .addUse(ThirtyTwo.getReg(0));
1100*0b57cec5SDimitry Andric 
1101*0b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
1102*0b57cec5SDimitry Andric   B.buildFAdd(Dst, LdExp, CvtLo);
1103*0b57cec5SDimitry Andric   MI.eraseFromParent();
1104*0b57cec5SDimitry Andric   return true;
1105*0b57cec5SDimitry Andric }
1106*0b57cec5SDimitry Andric 
1107*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
1108*0b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1109*0b57cec5SDimitry Andric   MachineIRBuilder &B) const {
1110*0b57cec5SDimitry Andric   MachineFunction &MF = B.getMF();
1111*0b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1112*0b57cec5SDimitry Andric 
1113*0b57cec5SDimitry Andric   const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
1114*0b57cec5SDimitry Andric                         MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
1115*0b57cec5SDimitry Andric 
1116*0b57cec5SDimitry Andric   // With ieee_mode disabled, the instructions have the correct behavior
1117*0b57cec5SDimitry Andric   // already for G_FMINNUM/G_FMAXNUM
1118*0b57cec5SDimitry Andric   if (!MFI->getMode().IEEE)
1119*0b57cec5SDimitry Andric     return !IsIEEEOp;
1120*0b57cec5SDimitry Andric 
1121*0b57cec5SDimitry Andric   if (IsIEEEOp)
1122*0b57cec5SDimitry Andric     return true;
1123*0b57cec5SDimitry Andric 
1124*0b57cec5SDimitry Andric   MachineIRBuilder HelperBuilder(MI);
1125*0b57cec5SDimitry Andric   GISelObserverWrapper DummyObserver;
1126*0b57cec5SDimitry Andric   LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
1127*0b57cec5SDimitry Andric   HelperBuilder.setMBB(*MI.getParent());
1128*0b57cec5SDimitry Andric   return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
1129*0b57cec5SDimitry Andric }
1130*0b57cec5SDimitry Andric 
1131*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
1132*0b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1133*0b57cec5SDimitry Andric   MachineIRBuilder &B) const {
1134*0b57cec5SDimitry Andric   // TODO: Should move some of this into LegalizerHelper.
1135*0b57cec5SDimitry Andric 
1136*0b57cec5SDimitry Andric   // TODO: Promote dynamic indexing of s16 to s32
1137*0b57cec5SDimitry Andric   // TODO: Dynamic s64 indexing is only legal for SGPR.
1138*0b57cec5SDimitry Andric   Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
1139*0b57cec5SDimitry Andric   if (!IdxVal) // Dynamic case will be selected to register indexing.
1140*0b57cec5SDimitry Andric     return true;
1141*0b57cec5SDimitry Andric 
1142*0b57cec5SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
1143*0b57cec5SDimitry Andric   Register Vec = MI.getOperand(1).getReg();
1144*0b57cec5SDimitry Andric 
1145*0b57cec5SDimitry Andric   LLT VecTy = MRI.getType(Vec);
1146*0b57cec5SDimitry Andric   LLT EltTy = VecTy.getElementType();
1147*0b57cec5SDimitry Andric   assert(EltTy == MRI.getType(Dst));
1148*0b57cec5SDimitry Andric 
1149*0b57cec5SDimitry Andric   B.setInstr(MI);
1150*0b57cec5SDimitry Andric 
1151*0b57cec5SDimitry Andric   if (IdxVal.getValue() < VecTy.getNumElements())
1152*0b57cec5SDimitry Andric     B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
1153*0b57cec5SDimitry Andric   else
1154*0b57cec5SDimitry Andric     B.buildUndef(Dst);
1155*0b57cec5SDimitry Andric 
1156*0b57cec5SDimitry Andric   MI.eraseFromParent();
1157*0b57cec5SDimitry Andric   return true;
1158*0b57cec5SDimitry Andric }
1159*0b57cec5SDimitry Andric 
1160*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
1161*0b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1162*0b57cec5SDimitry Andric   MachineIRBuilder &B) const {
1163*0b57cec5SDimitry Andric   // TODO: Should move some of this into LegalizerHelper.
1164*0b57cec5SDimitry Andric 
1165*0b57cec5SDimitry Andric   // TODO: Promote dynamic indexing of s16 to s32
1166*0b57cec5SDimitry Andric   // TODO: Dynamic s64 indexing is only legal for SGPR.
1167*0b57cec5SDimitry Andric   Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
1168*0b57cec5SDimitry Andric   if (!IdxVal) // Dynamic case will be selected to register indexing.
1169*0b57cec5SDimitry Andric     return true;
1170*0b57cec5SDimitry Andric 
1171*0b57cec5SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
1172*0b57cec5SDimitry Andric   Register Vec = MI.getOperand(1).getReg();
1173*0b57cec5SDimitry Andric   Register Ins = MI.getOperand(2).getReg();
1174*0b57cec5SDimitry Andric 
1175*0b57cec5SDimitry Andric   LLT VecTy = MRI.getType(Vec);
1176*0b57cec5SDimitry Andric   LLT EltTy = VecTy.getElementType();
1177*0b57cec5SDimitry Andric   assert(EltTy == MRI.getType(Ins));
1178*0b57cec5SDimitry Andric 
1179*0b57cec5SDimitry Andric   B.setInstr(MI);
1180*0b57cec5SDimitry Andric 
1181*0b57cec5SDimitry Andric   if (IdxVal.getValue() < VecTy.getNumElements())
1182*0b57cec5SDimitry Andric     B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
1183*0b57cec5SDimitry Andric   else
1184*0b57cec5SDimitry Andric     B.buildUndef(Dst);
1185*0b57cec5SDimitry Andric 
1186*0b57cec5SDimitry Andric   MI.eraseFromParent();
1187*0b57cec5SDimitry Andric   return true;
1188*0b57cec5SDimitry Andric }
1189*0b57cec5SDimitry Andric 
1190*0b57cec5SDimitry Andric // Return the use branch instruction, otherwise null if the usage is invalid.
1191*0b57cec5SDimitry Andric static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
1192*0b57cec5SDimitry Andric                                        MachineRegisterInfo &MRI) {
1193*0b57cec5SDimitry Andric   Register CondDef = MI.getOperand(0).getReg();
1194*0b57cec5SDimitry Andric   if (!MRI.hasOneNonDBGUse(CondDef))
1195*0b57cec5SDimitry Andric     return nullptr;
1196*0b57cec5SDimitry Andric 
1197*0b57cec5SDimitry Andric   MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
1198*0b57cec5SDimitry Andric   return UseMI.getParent() == MI.getParent() &&
1199*0b57cec5SDimitry Andric     UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
1200*0b57cec5SDimitry Andric }
1201*0b57cec5SDimitry Andric 
1202*0b57cec5SDimitry Andric Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
1203*0b57cec5SDimitry Andric                                                 Register Reg, LLT Ty) const {
1204*0b57cec5SDimitry Andric   Register LiveIn = MRI.getLiveInVirtReg(Reg);
1205*0b57cec5SDimitry Andric   if (LiveIn)
1206*0b57cec5SDimitry Andric     return LiveIn;
1207*0b57cec5SDimitry Andric 
1208*0b57cec5SDimitry Andric   Register NewReg = MRI.createGenericVirtualRegister(Ty);
1209*0b57cec5SDimitry Andric   MRI.addLiveIn(Reg, NewReg);
1210*0b57cec5SDimitry Andric   return NewReg;
1211*0b57cec5SDimitry Andric }
1212*0b57cec5SDimitry Andric 
1213*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
1214*0b57cec5SDimitry Andric                                          const ArgDescriptor *Arg) const {
1215*0b57cec5SDimitry Andric   if (!Arg->isRegister())
1216*0b57cec5SDimitry Andric     return false; // TODO: Handle these
1217*0b57cec5SDimitry Andric 
1218*0b57cec5SDimitry Andric   assert(Arg->getRegister() != 0);
1219*0b57cec5SDimitry Andric   assert(Arg->getRegister().isPhysical());
1220*0b57cec5SDimitry Andric 
1221*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = *B.getMRI();
1222*0b57cec5SDimitry Andric 
1223*0b57cec5SDimitry Andric   LLT Ty = MRI.getType(DstReg);
1224*0b57cec5SDimitry Andric   Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
1225*0b57cec5SDimitry Andric 
1226*0b57cec5SDimitry Andric   if (Arg->isMasked()) {
1227*0b57cec5SDimitry Andric     // TODO: Should we try to emit this once in the entry block?
1228*0b57cec5SDimitry Andric     const LLT S32 = LLT::scalar(32);
1229*0b57cec5SDimitry Andric     const unsigned Mask = Arg->getMask();
1230*0b57cec5SDimitry Andric     const unsigned Shift = countTrailingZeros<unsigned>(Mask);
1231*0b57cec5SDimitry Andric 
1232*0b57cec5SDimitry Andric     auto ShiftAmt = B.buildConstant(S32, Shift);
1233*0b57cec5SDimitry Andric     auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt);
1234*0b57cec5SDimitry Andric     B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift));
1235*0b57cec5SDimitry Andric   } else
1236*0b57cec5SDimitry Andric     B.buildCopy(DstReg, LiveIn);
1237*0b57cec5SDimitry Andric 
1238*0b57cec5SDimitry Andric   // Insert the argument copy if it doens't already exist.
1239*0b57cec5SDimitry Andric   // FIXME: It seems EmitLiveInCopies isn't called anywhere?
1240*0b57cec5SDimitry Andric   if (!MRI.getVRegDef(LiveIn)) {
1241*0b57cec5SDimitry Andric     MachineBasicBlock &EntryMBB = B.getMF().front();
1242*0b57cec5SDimitry Andric     EntryMBB.addLiveIn(Arg->getRegister());
1243*0b57cec5SDimitry Andric     B.setInsertPt(EntryMBB, EntryMBB.begin());
1244*0b57cec5SDimitry Andric     B.buildCopy(LiveIn, Arg->getRegister());
1245*0b57cec5SDimitry Andric   }
1246*0b57cec5SDimitry Andric 
1247*0b57cec5SDimitry Andric   return true;
1248*0b57cec5SDimitry Andric }
1249*0b57cec5SDimitry Andric 
1250*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
1251*0b57cec5SDimitry Andric   MachineInstr &MI,
1252*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI,
1253*0b57cec5SDimitry Andric   MachineIRBuilder &B,
1254*0b57cec5SDimitry Andric   AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
1255*0b57cec5SDimitry Andric   B.setInstr(MI);
1256*0b57cec5SDimitry Andric 
1257*0b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
1258*0b57cec5SDimitry Andric 
1259*0b57cec5SDimitry Andric   const ArgDescriptor *Arg;
1260*0b57cec5SDimitry Andric   const TargetRegisterClass *RC;
1261*0b57cec5SDimitry Andric   std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
1262*0b57cec5SDimitry Andric   if (!Arg) {
1263*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Required arg register missing\n");
1264*0b57cec5SDimitry Andric     return false;
1265*0b57cec5SDimitry Andric   }
1266*0b57cec5SDimitry Andric 
1267*0b57cec5SDimitry Andric   if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
1268*0b57cec5SDimitry Andric     MI.eraseFromParent();
1269*0b57cec5SDimitry Andric     return true;
1270*0b57cec5SDimitry Andric   }
1271*0b57cec5SDimitry Andric 
1272*0b57cec5SDimitry Andric   return false;
1273*0b57cec5SDimitry Andric }
1274*0b57cec5SDimitry Andric 
1275*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
1276*0b57cec5SDimitry Andric                                                  MachineRegisterInfo &MRI,
1277*0b57cec5SDimitry Andric                                                  MachineIRBuilder &B) const {
1278*0b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
1279*0b57cec5SDimitry Andric   if (!MFI->isEntryFunction()) {
1280*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1281*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
1282*0b57cec5SDimitry Andric   }
1283*0b57cec5SDimitry Andric 
1284*0b57cec5SDimitry Andric   B.setInstr(MI);
1285*0b57cec5SDimitry Andric 
1286*0b57cec5SDimitry Andric   uint64_t Offset =
1287*0b57cec5SDimitry Andric     ST.getTargetLowering()->getImplicitParameterOffset(
1288*0b57cec5SDimitry Andric       B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
1289*0b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1290*0b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
1291*0b57cec5SDimitry Andric   LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
1292*0b57cec5SDimitry Andric 
1293*0b57cec5SDimitry Andric   const ArgDescriptor *Arg;
1294*0b57cec5SDimitry Andric   const TargetRegisterClass *RC;
1295*0b57cec5SDimitry Andric   std::tie(Arg, RC)
1296*0b57cec5SDimitry Andric     = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
1297*0b57cec5SDimitry Andric   if (!Arg)
1298*0b57cec5SDimitry Andric     return false;
1299*0b57cec5SDimitry Andric 
1300*0b57cec5SDimitry Andric   Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
1301*0b57cec5SDimitry Andric   if (!loadInputValue(KernargPtrReg, B, Arg))
1302*0b57cec5SDimitry Andric     return false;
1303*0b57cec5SDimitry Andric 
1304*0b57cec5SDimitry Andric   B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
1305*0b57cec5SDimitry Andric   MI.eraseFromParent();
1306*0b57cec5SDimitry Andric   return true;
1307*0b57cec5SDimitry Andric }
1308*0b57cec5SDimitry Andric 
1309*0b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
1310*0b57cec5SDimitry Andric                                             MachineRegisterInfo &MRI,
1311*0b57cec5SDimitry Andric                                             MachineIRBuilder &B) const {
1312*0b57cec5SDimitry Andric   // Replace the use G_BRCOND with the exec manipulate and branch pseudos.
1313*0b57cec5SDimitry Andric   switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1314*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_if: {
1315*0b57cec5SDimitry Andric     if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
1316*0b57cec5SDimitry Andric       const SIRegisterInfo *TRI
1317*0b57cec5SDimitry Andric         = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
1318*0b57cec5SDimitry Andric 
1319*0b57cec5SDimitry Andric       B.setInstr(*BrCond);
1320*0b57cec5SDimitry Andric       Register Def = MI.getOperand(1).getReg();
1321*0b57cec5SDimitry Andric       Register Use = MI.getOperand(3).getReg();
1322*0b57cec5SDimitry Andric       B.buildInstr(AMDGPU::SI_IF)
1323*0b57cec5SDimitry Andric         .addDef(Def)
1324*0b57cec5SDimitry Andric         .addUse(Use)
1325*0b57cec5SDimitry Andric         .addMBB(BrCond->getOperand(1).getMBB());
1326*0b57cec5SDimitry Andric 
1327*0b57cec5SDimitry Andric       MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
1328*0b57cec5SDimitry Andric       MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
1329*0b57cec5SDimitry Andric       MI.eraseFromParent();
1330*0b57cec5SDimitry Andric       BrCond->eraseFromParent();
1331*0b57cec5SDimitry Andric       return true;
1332*0b57cec5SDimitry Andric     }
1333*0b57cec5SDimitry Andric 
1334*0b57cec5SDimitry Andric     return false;
1335*0b57cec5SDimitry Andric   }
1336*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_loop: {
1337*0b57cec5SDimitry Andric     if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
1338*0b57cec5SDimitry Andric       const SIRegisterInfo *TRI
1339*0b57cec5SDimitry Andric         = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
1340*0b57cec5SDimitry Andric 
1341*0b57cec5SDimitry Andric       B.setInstr(*BrCond);
1342*0b57cec5SDimitry Andric       Register Reg = MI.getOperand(2).getReg();
1343*0b57cec5SDimitry Andric       B.buildInstr(AMDGPU::SI_LOOP)
1344*0b57cec5SDimitry Andric         .addUse(Reg)
1345*0b57cec5SDimitry Andric         .addMBB(BrCond->getOperand(1).getMBB());
1346*0b57cec5SDimitry Andric       MI.eraseFromParent();
1347*0b57cec5SDimitry Andric       BrCond->eraseFromParent();
1348*0b57cec5SDimitry Andric       MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
1349*0b57cec5SDimitry Andric       return true;
1350*0b57cec5SDimitry Andric     }
1351*0b57cec5SDimitry Andric 
1352*0b57cec5SDimitry Andric     return false;
1353*0b57cec5SDimitry Andric   }
1354*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_kernarg_segment_ptr:
1355*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(
1356*0b57cec5SDimitry Andric       MI, MRI, B, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
1357*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_implicitarg_ptr:
1358*0b57cec5SDimitry Andric     return legalizeImplicitArgPtr(MI, MRI, B);
1359*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
1360*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1361*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKITEM_ID_X);
1362*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
1363*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1364*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
1365*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
1366*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1367*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
1368*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_x:
1369*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1370*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
1371*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_y:
1372*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1373*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
1374*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_z:
1375*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1376*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
1377*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_ptr:
1378*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1379*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::DISPATCH_PTR);
1380*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_queue_ptr:
1381*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1382*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::QUEUE_PTR);
1383*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_implicit_buffer_ptr:
1384*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(
1385*0b57cec5SDimitry Andric       MI, MRI, B, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
1386*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_id:
1387*0b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
1388*0b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::DISPATCH_ID);
1389*0b57cec5SDimitry Andric   default:
1390*0b57cec5SDimitry Andric     return true;
1391*0b57cec5SDimitry Andric   }
1392*0b57cec5SDimitry Andric 
1393*0b57cec5SDimitry Andric   return true;
1394*0b57cec5SDimitry Andric }
1395