xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (revision 8bcb0991864975618c09697b1aca10683346d9f0)
10b57cec5SDimitry Andric //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// \file
90b57cec5SDimitry Andric /// This file implements the targeting of the Machinelegalizer class for
100b57cec5SDimitry Andric /// AMDGPU.
110b57cec5SDimitry Andric /// \todo This should be generated by TableGen.
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
14*8bcb0991SDimitry Andric #if defined(_MSC_VER) || defined(__MINGW32__)
15*8bcb0991SDimitry Andric // According to Microsoft, one must set _USE_MATH_DEFINES in order to get M_PI
16*8bcb0991SDimitry Andric // from the Visual C++ cmath / math.h headers:
17*8bcb0991SDimitry Andric // https://docs.microsoft.com/en-us/cpp/c-runtime-library/math-constants?view=vs-2019
18*8bcb0991SDimitry Andric #define _USE_MATH_DEFINES
19*8bcb0991SDimitry Andric #endif
20*8bcb0991SDimitry Andric 
210b57cec5SDimitry Andric #include "AMDGPU.h"
220b57cec5SDimitry Andric #include "AMDGPULegalizerInfo.h"
230b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h"
240b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/ValueTypes.h"
290b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
30*8bcb0991SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
310b57cec5SDimitry Andric #include "llvm/IR/Type.h"
320b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-legalinfo"
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric using namespace llvm;
370b57cec5SDimitry Andric using namespace LegalizeActions;
380b57cec5SDimitry Andric using namespace LegalizeMutations;
390b57cec5SDimitry Andric using namespace LegalityPredicates;
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric static LegalityPredicate isMultiple32(unsigned TypeIdx,
43*8bcb0991SDimitry Andric                                       unsigned MaxSize = 1024) {
440b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
450b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
460b57cec5SDimitry Andric     const LLT EltTy = Ty.getScalarType();
470b57cec5SDimitry Andric     return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
480b57cec5SDimitry Andric   };
490b57cec5SDimitry Andric }
500b57cec5SDimitry Andric 
51*8bcb0991SDimitry Andric static LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size) {
52*8bcb0991SDimitry Andric   return [=](const LegalityQuery &Query) {
53*8bcb0991SDimitry Andric     return Query.Types[TypeIdx].getSizeInBits() == Size;
54*8bcb0991SDimitry Andric   };
55*8bcb0991SDimitry Andric }
56*8bcb0991SDimitry Andric 
570b57cec5SDimitry Andric static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
580b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
590b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
600b57cec5SDimitry Andric     return Ty.isVector() &&
610b57cec5SDimitry Andric            Ty.getNumElements() % 2 != 0 &&
62*8bcb0991SDimitry Andric            Ty.getElementType().getSizeInBits() < 32 &&
63*8bcb0991SDimitry Andric            Ty.getSizeInBits() % 32 != 0;
64*8bcb0991SDimitry Andric   };
65*8bcb0991SDimitry Andric }
66*8bcb0991SDimitry Andric 
67*8bcb0991SDimitry Andric static LegalityPredicate isWideVec16(unsigned TypeIdx) {
68*8bcb0991SDimitry Andric   return [=](const LegalityQuery &Query) {
69*8bcb0991SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
70*8bcb0991SDimitry Andric     const LLT EltTy = Ty.getScalarType();
71*8bcb0991SDimitry Andric     return EltTy.getSizeInBits() == 16 && Ty.getNumElements() > 2;
720b57cec5SDimitry Andric   };
730b57cec5SDimitry Andric }
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
760b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
770b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
780b57cec5SDimitry Andric     const LLT EltTy = Ty.getElementType();
790b57cec5SDimitry Andric     return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
800b57cec5SDimitry Andric   };
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
840b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
850b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
860b57cec5SDimitry Andric     const LLT EltTy = Ty.getElementType();
870b57cec5SDimitry Andric     unsigned Size = Ty.getSizeInBits();
880b57cec5SDimitry Andric     unsigned Pieces = (Size + 63) / 64;
890b57cec5SDimitry Andric     unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
900b57cec5SDimitry Andric     return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
910b57cec5SDimitry Andric   };
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric 
94*8bcb0991SDimitry Andric // Increase the number of vector elements to reach the next multiple of 32-bit
95*8bcb0991SDimitry Andric // type.
96*8bcb0991SDimitry Andric static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
97*8bcb0991SDimitry Andric   return [=](const LegalityQuery &Query) {
98*8bcb0991SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
99*8bcb0991SDimitry Andric 
100*8bcb0991SDimitry Andric     const LLT EltTy = Ty.getElementType();
101*8bcb0991SDimitry Andric     const int Size = Ty.getSizeInBits();
102*8bcb0991SDimitry Andric     const int EltSize = EltTy.getSizeInBits();
103*8bcb0991SDimitry Andric     const int NextMul32 = (Size + 31) / 32;
104*8bcb0991SDimitry Andric 
105*8bcb0991SDimitry Andric     assert(EltSize < 32);
106*8bcb0991SDimitry Andric 
107*8bcb0991SDimitry Andric     const int NewNumElts = (32 * NextMul32 + EltSize - 1) / EltSize;
108*8bcb0991SDimitry Andric     return std::make_pair(TypeIdx, LLT::vector(NewNumElts, EltTy));
109*8bcb0991SDimitry Andric   };
110*8bcb0991SDimitry Andric }
111*8bcb0991SDimitry Andric 
112*8bcb0991SDimitry Andric static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) {
113*8bcb0991SDimitry Andric   return [=](const LegalityQuery &Query) {
114*8bcb0991SDimitry Andric     const LLT QueryTy = Query.Types[TypeIdx];
115*8bcb0991SDimitry Andric     return QueryTy.isVector() && QueryTy.getSizeInBits() < Size;
116*8bcb0991SDimitry Andric   };
117*8bcb0991SDimitry Andric }
118*8bcb0991SDimitry Andric 
1190b57cec5SDimitry Andric static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
1200b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
1210b57cec5SDimitry Andric     const LLT QueryTy = Query.Types[TypeIdx];
1220b57cec5SDimitry Andric     return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
1230b57cec5SDimitry Andric   };
1240b57cec5SDimitry Andric }
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
1270b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
1280b57cec5SDimitry Andric     const LLT QueryTy = Query.Types[TypeIdx];
1290b57cec5SDimitry Andric     return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
1300b57cec5SDimitry Andric   };
1310b57cec5SDimitry Andric }
1320b57cec5SDimitry Andric 
133*8bcb0991SDimitry Andric // Any combination of 32 or 64-bit elements up to 1024 bits, and multiples of
1340b57cec5SDimitry Andric // v2s16.
1350b57cec5SDimitry Andric static LegalityPredicate isRegisterType(unsigned TypeIdx) {
1360b57cec5SDimitry Andric   return [=](const LegalityQuery &Query) {
1370b57cec5SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
1380b57cec5SDimitry Andric     if (Ty.isVector()) {
1390b57cec5SDimitry Andric       const int EltSize = Ty.getElementType().getSizeInBits();
1400b57cec5SDimitry Andric       return EltSize == 32 || EltSize == 64 ||
1410b57cec5SDimitry Andric             (EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
1420b57cec5SDimitry Andric              EltSize == 128 || EltSize == 256;
1430b57cec5SDimitry Andric     }
1440b57cec5SDimitry Andric 
145*8bcb0991SDimitry Andric     return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 1024;
146*8bcb0991SDimitry Andric   };
147*8bcb0991SDimitry Andric }
148*8bcb0991SDimitry Andric 
149*8bcb0991SDimitry Andric static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
150*8bcb0991SDimitry Andric   return [=](const LegalityQuery &Query) {
151*8bcb0991SDimitry Andric     return Query.Types[TypeIdx].getElementType() == Type;
152*8bcb0991SDimitry Andric   };
153*8bcb0991SDimitry Andric }
154*8bcb0991SDimitry Andric 
155*8bcb0991SDimitry Andric static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) {
156*8bcb0991SDimitry Andric   return [=](const LegalityQuery &Query) {
157*8bcb0991SDimitry Andric     const LLT Ty = Query.Types[TypeIdx];
158*8bcb0991SDimitry Andric     return !Ty.isVector() && Ty.getSizeInBits() > 32 &&
159*8bcb0991SDimitry Andric            Query.MMODescrs[0].SizeInBits < Ty.getSizeInBits();
1600b57cec5SDimitry Andric   };
1610b57cec5SDimitry Andric }
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
1640b57cec5SDimitry Andric                                          const GCNTargetMachine &TM)
1650b57cec5SDimitry Andric   :  ST(ST_) {
1660b57cec5SDimitry Andric   using namespace TargetOpcode;
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric   auto GetAddrSpacePtr = [&TM](unsigned AS) {
1690b57cec5SDimitry Andric     return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
1700b57cec5SDimitry Andric   };
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
1730b57cec5SDimitry Andric   const LLT S8 = LLT::scalar(8);
1740b57cec5SDimitry Andric   const LLT S16 = LLT::scalar(16);
1750b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
1760b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
177*8bcb0991SDimitry Andric   const LLT S96 = LLT::scalar(96);
1780b57cec5SDimitry Andric   const LLT S128 = LLT::scalar(128);
1790b57cec5SDimitry Andric   const LLT S256 = LLT::scalar(256);
180*8bcb0991SDimitry Andric   const LLT S1024 = LLT::scalar(1024);
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric   const LLT V2S16 = LLT::vector(2, 16);
1830b57cec5SDimitry Andric   const LLT V4S16 = LLT::vector(4, 16);
1840b57cec5SDimitry Andric 
1850b57cec5SDimitry Andric   const LLT V2S32 = LLT::vector(2, 32);
1860b57cec5SDimitry Andric   const LLT V3S32 = LLT::vector(3, 32);
1870b57cec5SDimitry Andric   const LLT V4S32 = LLT::vector(4, 32);
1880b57cec5SDimitry Andric   const LLT V5S32 = LLT::vector(5, 32);
1890b57cec5SDimitry Andric   const LLT V6S32 = LLT::vector(6, 32);
1900b57cec5SDimitry Andric   const LLT V7S32 = LLT::vector(7, 32);
1910b57cec5SDimitry Andric   const LLT V8S32 = LLT::vector(8, 32);
1920b57cec5SDimitry Andric   const LLT V9S32 = LLT::vector(9, 32);
1930b57cec5SDimitry Andric   const LLT V10S32 = LLT::vector(10, 32);
1940b57cec5SDimitry Andric   const LLT V11S32 = LLT::vector(11, 32);
1950b57cec5SDimitry Andric   const LLT V12S32 = LLT::vector(12, 32);
1960b57cec5SDimitry Andric   const LLT V13S32 = LLT::vector(13, 32);
1970b57cec5SDimitry Andric   const LLT V14S32 = LLT::vector(14, 32);
1980b57cec5SDimitry Andric   const LLT V15S32 = LLT::vector(15, 32);
1990b57cec5SDimitry Andric   const LLT V16S32 = LLT::vector(16, 32);
200*8bcb0991SDimitry Andric   const LLT V32S32 = LLT::vector(32, 32);
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric   const LLT V2S64 = LLT::vector(2, 64);
2030b57cec5SDimitry Andric   const LLT V3S64 = LLT::vector(3, 64);
2040b57cec5SDimitry Andric   const LLT V4S64 = LLT::vector(4, 64);
2050b57cec5SDimitry Andric   const LLT V5S64 = LLT::vector(5, 64);
2060b57cec5SDimitry Andric   const LLT V6S64 = LLT::vector(6, 64);
2070b57cec5SDimitry Andric   const LLT V7S64 = LLT::vector(7, 64);
2080b57cec5SDimitry Andric   const LLT V8S64 = LLT::vector(8, 64);
209*8bcb0991SDimitry Andric   const LLT V16S64 = LLT::vector(16, 64);
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric   std::initializer_list<LLT> AllS32Vectors =
2120b57cec5SDimitry Andric     {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
213*8bcb0991SDimitry Andric      V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32, V32S32};
2140b57cec5SDimitry Andric   std::initializer_list<LLT> AllS64Vectors =
215*8bcb0991SDimitry Andric     {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64, V16S64};
2160b57cec5SDimitry Andric 
2170b57cec5SDimitry Andric   const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
2180b57cec5SDimitry Andric   const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
219*8bcb0991SDimitry Andric   const LLT Constant32Ptr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS_32BIT);
2200b57cec5SDimitry Andric   const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
221*8bcb0991SDimitry Andric   const LLT RegionPtr = GetAddrSpacePtr(AMDGPUAS::REGION_ADDRESS);
2220b57cec5SDimitry Andric   const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
2230b57cec5SDimitry Andric   const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric   const LLT CodePtr = FlatPtr;
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric   const std::initializer_list<LLT> AddrSpaces64 = {
2280b57cec5SDimitry Andric     GlobalPtr, ConstantPtr, FlatPtr
2290b57cec5SDimitry Andric   };
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric   const std::initializer_list<LLT> AddrSpaces32 = {
232*8bcb0991SDimitry Andric     LocalPtr, PrivatePtr, Constant32Ptr, RegionPtr
2330b57cec5SDimitry Andric   };
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   const std::initializer_list<LLT> FPTypesBase = {
2360b57cec5SDimitry Andric     S32, S64
2370b57cec5SDimitry Andric   };
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric   const std::initializer_list<LLT> FPTypes16 = {
2400b57cec5SDimitry Andric     S32, S64, S16
2410b57cec5SDimitry Andric   };
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric   const std::initializer_list<LLT> FPTypesPK16 = {
2440b57cec5SDimitry Andric     S32, S64, S16, V2S16
2450b57cec5SDimitry Andric   };
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric   setAction({G_BRCOND, S1}, Legal);
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric   // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
2500b57cec5SDimitry Andric   // elements for v3s16
2510b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_PHI)
2520b57cec5SDimitry Andric     .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
2530b57cec5SDimitry Andric     .legalFor(AllS32Vectors)
2540b57cec5SDimitry Andric     .legalFor(AllS64Vectors)
2550b57cec5SDimitry Andric     .legalFor(AddrSpaces64)
2560b57cec5SDimitry Andric     .legalFor(AddrSpaces32)
2570b57cec5SDimitry Andric     .clampScalar(0, S32, S256)
2580b57cec5SDimitry Andric     .widenScalarToNextPow2(0, 32)
2590b57cec5SDimitry Andric     .clampMaxNumElements(0, S32, 16)
2600b57cec5SDimitry Andric     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
2610b57cec5SDimitry Andric     .legalIf(isPointer(0));
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
2640b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
2650b57cec5SDimitry Andric       .legalFor({S32, S16})
2660b57cec5SDimitry Andric       .clampScalar(0, S16, S32)
2670b57cec5SDimitry Andric       .scalarize(0);
2680b57cec5SDimitry Andric   } else {
2690b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
2700b57cec5SDimitry Andric       .legalFor({S32})
2710b57cec5SDimitry Andric       .clampScalar(0, S32, S32)
2720b57cec5SDimitry Andric       .scalarize(0);
2730b57cec5SDimitry Andric   }
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_UMULH, G_SMULH})
2760b57cec5SDimitry Andric     .legalFor({S32})
2770b57cec5SDimitry Andric     .clampScalar(0, S32, S32)
2780b57cec5SDimitry Andric     .scalarize(0);
2790b57cec5SDimitry Andric 
2800b57cec5SDimitry Andric   // Report legal for any types we can handle anywhere. For the cases only legal
2810b57cec5SDimitry Andric   // on the SALU, RegBankSelect will be able to re-legalize.
2820b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
2830b57cec5SDimitry Andric     .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
2840b57cec5SDimitry Andric     .clampScalar(0, S32, S64)
2850b57cec5SDimitry Andric     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
286*8bcb0991SDimitry Andric     .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))
2870b57cec5SDimitry Andric     .widenScalarToNextPow2(0)
2880b57cec5SDimitry Andric     .scalarize(0);
2890b57cec5SDimitry Andric 
290*8bcb0991SDimitry Andric   getActionDefinitionsBuilder({G_UADDO, G_USUBO,
2910b57cec5SDimitry Andric                                G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
2920b57cec5SDimitry Andric     .legalFor({{S32, S1}})
293*8bcb0991SDimitry Andric     .clampScalar(0, S32, S32)
294*8bcb0991SDimitry Andric     .scalarize(0); // TODO: Implement.
295*8bcb0991SDimitry Andric 
296*8bcb0991SDimitry Andric   getActionDefinitionsBuilder({G_SADDO, G_SSUBO})
297*8bcb0991SDimitry Andric     .lower();
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_BITCAST)
3000b57cec5SDimitry Andric     // Don't worry about the size constraint.
301*8bcb0991SDimitry Andric     .legalIf(all(isRegisterType(0), isRegisterType(1)))
302*8bcb0991SDimitry Andric     // FIXME: Testing hack
303*8bcb0991SDimitry Andric     .legalForCartesianProduct({S16, LLT::vector(2, 8), });
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FCONSTANT)
3060b57cec5SDimitry Andric     .legalFor({S32, S64, S16})
3070b57cec5SDimitry Andric     .clampScalar(0, S16, S64);
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_IMPLICIT_DEF)
310*8bcb0991SDimitry Andric     .legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
3110b57cec5SDimitry Andric                ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
3120b57cec5SDimitry Andric     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
313*8bcb0991SDimitry Andric     .clampScalarOrElt(0, S32, S1024)
3140b57cec5SDimitry Andric     .legalIf(isMultiple32(0))
3150b57cec5SDimitry Andric     .widenScalarToNextPow2(0, 32)
3160b57cec5SDimitry Andric     .clampMaxNumElements(0, S32, 16);
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric 
3190b57cec5SDimitry Andric   // FIXME: i1 operands to intrinsics should always be legal, but other i1
3200b57cec5SDimitry Andric   // values may not be legal.  We need to figure out how to distinguish
3210b57cec5SDimitry Andric   // between these two scenarios.
3220b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_CONSTANT)
323*8bcb0991SDimitry Andric     .legalFor({S1, S32, S64, S16, GlobalPtr,
3240b57cec5SDimitry Andric                LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
3250b57cec5SDimitry Andric     .clampScalar(0, S32, S64)
3260b57cec5SDimitry Andric     .widenScalarToNextPow2(0)
3270b57cec5SDimitry Andric     .legalIf(isPointer(0));
3280b57cec5SDimitry Andric 
3290b57cec5SDimitry Andric   setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
330*8bcb0991SDimitry Andric   getActionDefinitionsBuilder(G_GLOBAL_VALUE)
331*8bcb0991SDimitry Andric     .customFor({LocalPtr, GlobalPtr, ConstantPtr, Constant32Ptr});
332*8bcb0991SDimitry Andric 
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   auto &FPOpActions = getActionDefinitionsBuilder(
335*8bcb0991SDimitry Andric     { G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE})
3360b57cec5SDimitry Andric     .legalFor({S32, S64});
337*8bcb0991SDimitry Andric   auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
338*8bcb0991SDimitry Andric     .customFor({S32, S64});
339*8bcb0991SDimitry Andric   auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV)
340*8bcb0991SDimitry Andric     .customFor({S32, S64});
3410b57cec5SDimitry Andric 
3420b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
3430b57cec5SDimitry Andric     if (ST.hasVOP3PInsts())
3440b57cec5SDimitry Andric       FPOpActions.legalFor({S16, V2S16});
3450b57cec5SDimitry Andric     else
3460b57cec5SDimitry Andric       FPOpActions.legalFor({S16});
347*8bcb0991SDimitry Andric 
348*8bcb0991SDimitry Andric     TrigActions.customFor({S16});
349*8bcb0991SDimitry Andric     FDIVActions.customFor({S16});
3500b57cec5SDimitry Andric   }
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric   auto &MinNumMaxNum = getActionDefinitionsBuilder({
3530b57cec5SDimitry Andric       G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
3540b57cec5SDimitry Andric 
3550b57cec5SDimitry Andric   if (ST.hasVOP3PInsts()) {
3560b57cec5SDimitry Andric     MinNumMaxNum.customFor(FPTypesPK16)
3570b57cec5SDimitry Andric       .clampMaxNumElements(0, S16, 2)
3580b57cec5SDimitry Andric       .clampScalar(0, S16, S64)
3590b57cec5SDimitry Andric       .scalarize(0);
3600b57cec5SDimitry Andric   } else if (ST.has16BitInsts()) {
3610b57cec5SDimitry Andric     MinNumMaxNum.customFor(FPTypes16)
3620b57cec5SDimitry Andric       .clampScalar(0, S16, S64)
3630b57cec5SDimitry Andric       .scalarize(0);
3640b57cec5SDimitry Andric   } else {
3650b57cec5SDimitry Andric     MinNumMaxNum.customFor(FPTypesBase)
3660b57cec5SDimitry Andric       .clampScalar(0, S32, S64)
3670b57cec5SDimitry Andric       .scalarize(0);
3680b57cec5SDimitry Andric   }
3690b57cec5SDimitry Andric 
3700b57cec5SDimitry Andric   if (ST.hasVOP3PInsts())
3710b57cec5SDimitry Andric     FPOpActions.clampMaxNumElements(0, S16, 2);
372*8bcb0991SDimitry Andric 
3730b57cec5SDimitry Andric   FPOpActions
3740b57cec5SDimitry Andric     .scalarize(0)
3750b57cec5SDimitry Andric     .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
3760b57cec5SDimitry Andric 
377*8bcb0991SDimitry Andric   TrigActions
378*8bcb0991SDimitry Andric     .scalarize(0)
379*8bcb0991SDimitry Andric     .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
380*8bcb0991SDimitry Andric 
381*8bcb0991SDimitry Andric   FDIVActions
382*8bcb0991SDimitry Andric     .scalarize(0)
383*8bcb0991SDimitry Andric     .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
384*8bcb0991SDimitry Andric 
385*8bcb0991SDimitry Andric   getActionDefinitionsBuilder({G_FNEG, G_FABS})
386*8bcb0991SDimitry Andric     .legalFor(FPTypesPK16)
387*8bcb0991SDimitry Andric     .clampMaxNumElements(0, S16, 2)
388*8bcb0991SDimitry Andric     .scalarize(0)
389*8bcb0991SDimitry Andric     .clampScalar(0, S16, S64);
390*8bcb0991SDimitry Andric 
391*8bcb0991SDimitry Andric   // TODO: Implement
392*8bcb0991SDimitry Andric   getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
393*8bcb0991SDimitry Andric 
3940b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
395*8bcb0991SDimitry Andric     getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
3960b57cec5SDimitry Andric       .legalFor({S32, S64, S16})
3970b57cec5SDimitry Andric       .scalarize(0)
3980b57cec5SDimitry Andric       .clampScalar(0, S16, S64);
3990b57cec5SDimitry Andric   } else {
400*8bcb0991SDimitry Andric     getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
4010b57cec5SDimitry Andric       .legalFor({S32, S64})
4020b57cec5SDimitry Andric       .scalarize(0)
4030b57cec5SDimitry Andric       .clampScalar(0, S32, S64);
4040b57cec5SDimitry Andric   }
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FPTRUNC)
4070b57cec5SDimitry Andric     .legalFor({{S32, S64}, {S16, S32}})
4080b57cec5SDimitry Andric     .scalarize(0);
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FPEXT)
4110b57cec5SDimitry Andric     .legalFor({{S64, S32}, {S32, S16}})
4120b57cec5SDimitry Andric     .lowerFor({{S64, S16}}) // FIXME: Implement
4130b57cec5SDimitry Andric     .scalarize(0);
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric   // TODO: Verify V_BFI_B32 is generated from expanded bit ops.
4160b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FSUB)
4190b57cec5SDimitry Andric       // Use actual fsub instruction
4200b57cec5SDimitry Andric       .legalFor({S32})
4210b57cec5SDimitry Andric       // Must use fadd + fneg
4220b57cec5SDimitry Andric       .lowerFor({S64, S16, V2S16})
4230b57cec5SDimitry Andric       .scalarize(0)
4240b57cec5SDimitry Andric       .clampScalar(0, S32, S64);
4250b57cec5SDimitry Andric 
426*8bcb0991SDimitry Andric   // Whether this is legal depends on the floating point mode for the function.
427*8bcb0991SDimitry Andric   auto &FMad = getActionDefinitionsBuilder(G_FMAD);
428*8bcb0991SDimitry Andric   if (ST.hasMadF16())
429*8bcb0991SDimitry Andric     FMad.customFor({S32, S16});
430*8bcb0991SDimitry Andric   else
431*8bcb0991SDimitry Andric     FMad.customFor({S32});
432*8bcb0991SDimitry Andric   FMad.scalarize(0)
433*8bcb0991SDimitry Andric       .lower();
434*8bcb0991SDimitry Andric 
4350b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
4360b57cec5SDimitry Andric     .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
4370b57cec5SDimitry Andric                {S32, S1}, {S64, S1}, {S16, S1},
438*8bcb0991SDimitry Andric                {S96, S32},
4390b57cec5SDimitry Andric                // FIXME: Hack
4400b57cec5SDimitry Andric                {S64, LLT::scalar(33)},
4410b57cec5SDimitry Andric                {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
4420b57cec5SDimitry Andric     .scalarize(0);
4430b57cec5SDimitry Andric 
444*8bcb0991SDimitry Andric   // TODO: Split s1->s64 during regbankselect for VALU.
445*8bcb0991SDimitry Andric   auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
446*8bcb0991SDimitry Andric     .legalFor({{S32, S32}, {S64, S32}, {S16, S32}, {S32, S1}, {S16, S1}, {S64, S1}})
4470b57cec5SDimitry Andric     .lowerFor({{S32, S64}})
448*8bcb0991SDimitry Andric     .customFor({{S64, S64}});
449*8bcb0991SDimitry Andric   if (ST.has16BitInsts())
450*8bcb0991SDimitry Andric     IToFP.legalFor({{S16, S16}});
451*8bcb0991SDimitry Andric   IToFP.clampScalar(1, S32, S64)
4520b57cec5SDimitry Andric        .scalarize(0);
4530b57cec5SDimitry Andric 
454*8bcb0991SDimitry Andric   auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
455*8bcb0991SDimitry Andric     .legalFor({{S32, S32}, {S32, S64}, {S32, S16}});
456*8bcb0991SDimitry Andric   if (ST.has16BitInsts())
457*8bcb0991SDimitry Andric     FPToI.legalFor({{S16, S16}});
458*8bcb0991SDimitry Andric   else
459*8bcb0991SDimitry Andric     FPToI.minScalar(1, S32);
460*8bcb0991SDimitry Andric 
461*8bcb0991SDimitry Andric   FPToI.minScalar(0, S32)
4620b57cec5SDimitry Andric        .scalarize(0);
4630b57cec5SDimitry Andric 
4640b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
4650b57cec5SDimitry Andric     .legalFor({S32, S64})
4660b57cec5SDimitry Andric     .scalarize(0);
4670b57cec5SDimitry Andric 
4680b57cec5SDimitry Andric   if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
4690b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
4700b57cec5SDimitry Andric       .legalFor({S32, S64})
4710b57cec5SDimitry Andric       .clampScalar(0, S32, S64)
4720b57cec5SDimitry Andric       .scalarize(0);
4730b57cec5SDimitry Andric   } else {
4740b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
4750b57cec5SDimitry Andric       .legalFor({S32})
4760b57cec5SDimitry Andric       .customFor({S64})
4770b57cec5SDimitry Andric       .clampScalar(0, S32, S64)
4780b57cec5SDimitry Andric       .scalarize(0);
4790b57cec5SDimitry Andric   }
4800b57cec5SDimitry Andric 
4810b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_GEP)
4820b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces64, {S64})
4830b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces32, {S32})
4840b57cec5SDimitry Andric     .scalarize(0);
4850b57cec5SDimitry Andric 
486*8bcb0991SDimitry Andric   getActionDefinitionsBuilder(G_PTR_MASK)
487*8bcb0991SDimitry Andric     .scalarize(0)
488*8bcb0991SDimitry Andric     .alwaysLegal();
489*8bcb0991SDimitry Andric 
4900b57cec5SDimitry Andric   setAction({G_BLOCK_ADDR, CodePtr}, Legal);
4910b57cec5SDimitry Andric 
4920b57cec5SDimitry Andric   auto &CmpBuilder =
4930b57cec5SDimitry Andric     getActionDefinitionsBuilder(G_ICMP)
4940b57cec5SDimitry Andric     .legalForCartesianProduct(
4950b57cec5SDimitry Andric       {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
4960b57cec5SDimitry Andric     .legalFor({{S1, S32}, {S1, S64}});
4970b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
4980b57cec5SDimitry Andric     CmpBuilder.legalFor({{S1, S16}});
4990b57cec5SDimitry Andric   }
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric   CmpBuilder
5020b57cec5SDimitry Andric     .widenScalarToNextPow2(1)
5030b57cec5SDimitry Andric     .clampScalar(1, S32, S64)
5040b57cec5SDimitry Andric     .scalarize(0)
5050b57cec5SDimitry Andric     .legalIf(all(typeIs(0, S1), isPointer(1)));
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_FCMP)
5080b57cec5SDimitry Andric     .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
5090b57cec5SDimitry Andric     .widenScalarToNextPow2(1)
5100b57cec5SDimitry Andric     .clampScalar(1, S32, S64)
5110b57cec5SDimitry Andric     .scalarize(0);
5120b57cec5SDimitry Andric 
5130b57cec5SDimitry Andric   // FIXME: fexp, flog2, flog10 needs to be custom lowered.
5140b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
5150b57cec5SDimitry Andric                                G_FLOG, G_FLOG2, G_FLOG10})
5160b57cec5SDimitry Andric     .legalFor({S32})
5170b57cec5SDimitry Andric     .scalarize(0);
5180b57cec5SDimitry Andric 
5190b57cec5SDimitry Andric   // The 64-bit versions produce 32-bit results, but only on the SALU.
5200b57cec5SDimitry Andric   getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
5210b57cec5SDimitry Andric                                G_CTTZ, G_CTTZ_ZERO_UNDEF,
5220b57cec5SDimitry Andric                                G_CTPOP})
5230b57cec5SDimitry Andric     .legalFor({{S32, S32}, {S32, S64}})
5240b57cec5SDimitry Andric     .clampScalar(0, S32, S32)
5250b57cec5SDimitry Andric     .clampScalar(1, S32, S64)
5260b57cec5SDimitry Andric     .scalarize(0)
5270b57cec5SDimitry Andric     .widenScalarToNextPow2(0, 32)
5280b57cec5SDimitry Andric     .widenScalarToNextPow2(1, 32);
5290b57cec5SDimitry Andric 
5300b57cec5SDimitry Andric   // TODO: Expand for > s32
531*8bcb0991SDimitry Andric   getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE})
5320b57cec5SDimitry Andric     .legalFor({S32})
5330b57cec5SDimitry Andric     .clampScalar(0, S32, S32)
5340b57cec5SDimitry Andric     .scalarize(0);
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
5370b57cec5SDimitry Andric     if (ST.hasVOP3PInsts()) {
5380b57cec5SDimitry Andric       getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
5390b57cec5SDimitry Andric         .legalFor({S32, S16, V2S16})
5400b57cec5SDimitry Andric         .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
5410b57cec5SDimitry Andric         .clampMaxNumElements(0, S16, 2)
5420b57cec5SDimitry Andric         .clampScalar(0, S16, S32)
5430b57cec5SDimitry Andric         .widenScalarToNextPow2(0)
5440b57cec5SDimitry Andric         .scalarize(0);
5450b57cec5SDimitry Andric     } else {
5460b57cec5SDimitry Andric       getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
5470b57cec5SDimitry Andric         .legalFor({S32, S16})
5480b57cec5SDimitry Andric         .widenScalarToNextPow2(0)
5490b57cec5SDimitry Andric         .clampScalar(0, S16, S32)
5500b57cec5SDimitry Andric         .scalarize(0);
5510b57cec5SDimitry Andric     }
5520b57cec5SDimitry Andric   } else {
5530b57cec5SDimitry Andric     getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
5540b57cec5SDimitry Andric       .legalFor({S32})
5550b57cec5SDimitry Andric       .clampScalar(0, S32, S32)
5560b57cec5SDimitry Andric       .widenScalarToNextPow2(0)
5570b57cec5SDimitry Andric       .scalarize(0);
5580b57cec5SDimitry Andric   }
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric   auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
5610b57cec5SDimitry Andric     return [=](const LegalityQuery &Query) {
5620b57cec5SDimitry Andric       return Query.Types[TypeIdx0].getSizeInBits() <
5630b57cec5SDimitry Andric              Query.Types[TypeIdx1].getSizeInBits();
5640b57cec5SDimitry Andric     };
5650b57cec5SDimitry Andric   };
5660b57cec5SDimitry Andric 
5670b57cec5SDimitry Andric   auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
5680b57cec5SDimitry Andric     return [=](const LegalityQuery &Query) {
5690b57cec5SDimitry Andric       return Query.Types[TypeIdx0].getSizeInBits() >
5700b57cec5SDimitry Andric              Query.Types[TypeIdx1].getSizeInBits();
5710b57cec5SDimitry Andric     };
5720b57cec5SDimitry Andric   };
5730b57cec5SDimitry Andric 
5740b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_INTTOPTR)
5750b57cec5SDimitry Andric     // List the common cases
5760b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces64, {S64})
5770b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces32, {S32})
5780b57cec5SDimitry Andric     .scalarize(0)
5790b57cec5SDimitry Andric     // Accept any address space as long as the size matches
5800b57cec5SDimitry Andric     .legalIf(sameSize(0, 1))
5810b57cec5SDimitry Andric     .widenScalarIf(smallerThan(1, 0),
5820b57cec5SDimitry Andric       [](const LegalityQuery &Query) {
5830b57cec5SDimitry Andric         return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
5840b57cec5SDimitry Andric       })
5850b57cec5SDimitry Andric     .narrowScalarIf(greaterThan(1, 0),
5860b57cec5SDimitry Andric       [](const LegalityQuery &Query) {
5870b57cec5SDimitry Andric         return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
5880b57cec5SDimitry Andric       });
5890b57cec5SDimitry Andric 
5900b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_PTRTOINT)
5910b57cec5SDimitry Andric     // List the common cases
5920b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces64, {S64})
5930b57cec5SDimitry Andric     .legalForCartesianProduct(AddrSpaces32, {S32})
5940b57cec5SDimitry Andric     .scalarize(0)
5950b57cec5SDimitry Andric     // Accept any address space as long as the size matches
5960b57cec5SDimitry Andric     .legalIf(sameSize(0, 1))
5970b57cec5SDimitry Andric     .widenScalarIf(smallerThan(0, 1),
5980b57cec5SDimitry Andric       [](const LegalityQuery &Query) {
5990b57cec5SDimitry Andric         return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
6000b57cec5SDimitry Andric       })
6010b57cec5SDimitry Andric     .narrowScalarIf(
6020b57cec5SDimitry Andric       greaterThan(0, 1),
6030b57cec5SDimitry Andric       [](const LegalityQuery &Query) {
6040b57cec5SDimitry Andric         return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
6050b57cec5SDimitry Andric       });
6060b57cec5SDimitry Andric 
6070b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
6080b57cec5SDimitry Andric     .scalarize(0)
6090b57cec5SDimitry Andric     .custom();
6100b57cec5SDimitry Andric 
6110b57cec5SDimitry Andric   // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
6120b57cec5SDimitry Andric   // handle some operations by just promoting the register during
6130b57cec5SDimitry Andric   // selection. There are also d16 loads on GFX9+ which preserve the high bits.
614*8bcb0991SDimitry Andric   auto maxSizeForAddrSpace = [this](unsigned AS) -> unsigned {
615*8bcb0991SDimitry Andric     switch (AS) {
616*8bcb0991SDimitry Andric     // FIXME: Private element size.
617*8bcb0991SDimitry Andric     case AMDGPUAS::PRIVATE_ADDRESS:
618*8bcb0991SDimitry Andric       return 32;
619*8bcb0991SDimitry Andric     // FIXME: Check subtarget
620*8bcb0991SDimitry Andric     case AMDGPUAS::LOCAL_ADDRESS:
621*8bcb0991SDimitry Andric       return ST.useDS128() ? 128 : 64;
6220b57cec5SDimitry Andric 
623*8bcb0991SDimitry Andric     // Treat constant and global as identical. SMRD loads are sometimes usable
624*8bcb0991SDimitry Andric     // for global loads (ideally constant address space should be eliminated)
625*8bcb0991SDimitry Andric     // depending on the context. Legality cannot be context dependent, but
626*8bcb0991SDimitry Andric     // RegBankSelect can split the load as necessary depending on the pointer
627*8bcb0991SDimitry Andric     // register bank/uniformity and if the memory is invariant or not written in
628*8bcb0991SDimitry Andric     // a kernel.
629*8bcb0991SDimitry Andric     case AMDGPUAS::CONSTANT_ADDRESS:
630*8bcb0991SDimitry Andric     case AMDGPUAS::GLOBAL_ADDRESS:
631*8bcb0991SDimitry Andric       return 512;
632*8bcb0991SDimitry Andric     default:
633*8bcb0991SDimitry Andric       return 128;
634*8bcb0991SDimitry Andric     }
635*8bcb0991SDimitry Andric   };
636*8bcb0991SDimitry Andric 
637*8bcb0991SDimitry Andric   const auto needToSplitLoad = [=](const LegalityQuery &Query) -> bool {
638*8bcb0991SDimitry Andric     const LLT DstTy = Query.Types[0];
639*8bcb0991SDimitry Andric 
640*8bcb0991SDimitry Andric     // Split vector extloads.
641*8bcb0991SDimitry Andric     unsigned MemSize = Query.MMODescrs[0].SizeInBits;
642*8bcb0991SDimitry Andric     if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
643*8bcb0991SDimitry Andric       return true;
644*8bcb0991SDimitry Andric 
645*8bcb0991SDimitry Andric     const LLT PtrTy = Query.Types[1];
646*8bcb0991SDimitry Andric     unsigned AS = PtrTy.getAddressSpace();
647*8bcb0991SDimitry Andric     if (MemSize > maxSizeForAddrSpace(AS))
648*8bcb0991SDimitry Andric       return true;
649*8bcb0991SDimitry Andric 
650*8bcb0991SDimitry Andric     // Catch weird sized loads that don't evenly divide into the access sizes
651*8bcb0991SDimitry Andric     // TODO: May be able to widen depending on alignment etc.
652*8bcb0991SDimitry Andric     unsigned NumRegs = MemSize / 32;
653*8bcb0991SDimitry Andric     if (NumRegs == 3 && !ST.hasDwordx3LoadStores())
654*8bcb0991SDimitry Andric       return true;
655*8bcb0991SDimitry Andric 
656*8bcb0991SDimitry Andric     unsigned Align = Query.MMODescrs[0].AlignInBits;
657*8bcb0991SDimitry Andric     if (Align < MemSize) {
658*8bcb0991SDimitry Andric       const SITargetLowering *TLI = ST.getTargetLowering();
659*8bcb0991SDimitry Andric       return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8);
660*8bcb0991SDimitry Andric     }
661*8bcb0991SDimitry Andric 
662*8bcb0991SDimitry Andric     return false;
663*8bcb0991SDimitry Andric   };
664*8bcb0991SDimitry Andric 
665*8bcb0991SDimitry Andric   unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
666*8bcb0991SDimitry Andric   unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
667*8bcb0991SDimitry Andric   unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
668*8bcb0991SDimitry Andric 
669*8bcb0991SDimitry Andric   // TODO: Refine based on subtargets which support unaligned access or 128-bit
670*8bcb0991SDimitry Andric   // LDS
671*8bcb0991SDimitry Andric   // TODO: Unsupported flat for SI.
672*8bcb0991SDimitry Andric 
673*8bcb0991SDimitry Andric   for (unsigned Op : {G_LOAD, G_STORE}) {
674*8bcb0991SDimitry Andric     const bool IsStore = Op == G_STORE;
675*8bcb0991SDimitry Andric 
676*8bcb0991SDimitry Andric     auto &Actions = getActionDefinitionsBuilder(Op);
677*8bcb0991SDimitry Andric     // Whitelist the common cases.
678*8bcb0991SDimitry Andric     // TODO: Pointer loads
679*8bcb0991SDimitry Andric     // TODO: Wide constant loads
680*8bcb0991SDimitry Andric     // TODO: Only CI+ has 3x loads
681*8bcb0991SDimitry Andric     // TODO: Loads to s16 on gfx9
682*8bcb0991SDimitry Andric     Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32},
683*8bcb0991SDimitry Andric                                       {V2S32, GlobalPtr, 64, GlobalAlign32},
684*8bcb0991SDimitry Andric                                       {V3S32, GlobalPtr, 96, GlobalAlign32},
685*8bcb0991SDimitry Andric                                       {S96, GlobalPtr, 96, GlobalAlign32},
686*8bcb0991SDimitry Andric                                       {V4S32, GlobalPtr, 128, GlobalAlign32},
687*8bcb0991SDimitry Andric                                       {S128, GlobalPtr, 128, GlobalAlign32},
688*8bcb0991SDimitry Andric                                       {S64, GlobalPtr, 64, GlobalAlign32},
689*8bcb0991SDimitry Andric                                       {V2S64, GlobalPtr, 128, GlobalAlign32},
690*8bcb0991SDimitry Andric                                       {V2S16, GlobalPtr, 32, GlobalAlign32},
691*8bcb0991SDimitry Andric                                       {S32, GlobalPtr, 8, GlobalAlign8},
692*8bcb0991SDimitry Andric                                       {S32, GlobalPtr, 16, GlobalAlign16},
693*8bcb0991SDimitry Andric 
694*8bcb0991SDimitry Andric                                       {S32, LocalPtr, 32, 32},
695*8bcb0991SDimitry Andric                                       {S64, LocalPtr, 64, 32},
696*8bcb0991SDimitry Andric                                       {V2S32, LocalPtr, 64, 32},
697*8bcb0991SDimitry Andric                                       {S32, LocalPtr, 8, 8},
698*8bcb0991SDimitry Andric                                       {S32, LocalPtr, 16, 16},
699*8bcb0991SDimitry Andric                                       {V2S16, LocalPtr, 32, 32},
700*8bcb0991SDimitry Andric 
701*8bcb0991SDimitry Andric                                       {S32, PrivatePtr, 32, 32},
702*8bcb0991SDimitry Andric                                       {S32, PrivatePtr, 8, 8},
703*8bcb0991SDimitry Andric                                       {S32, PrivatePtr, 16, 16},
704*8bcb0991SDimitry Andric                                       {V2S16, PrivatePtr, 32, 32},
705*8bcb0991SDimitry Andric 
706*8bcb0991SDimitry Andric                                       {S32, FlatPtr, 32, GlobalAlign32},
707*8bcb0991SDimitry Andric                                       {S32, FlatPtr, 16, GlobalAlign16},
708*8bcb0991SDimitry Andric                                       {S32, FlatPtr, 8, GlobalAlign8},
709*8bcb0991SDimitry Andric                                       {V2S16, FlatPtr, 32, GlobalAlign32},
710*8bcb0991SDimitry Andric 
711*8bcb0991SDimitry Andric                                       {S32, ConstantPtr, 32, GlobalAlign32},
712*8bcb0991SDimitry Andric                                       {V2S32, ConstantPtr, 64, GlobalAlign32},
713*8bcb0991SDimitry Andric                                       {V3S32, ConstantPtr, 96, GlobalAlign32},
714*8bcb0991SDimitry Andric                                       {V4S32, ConstantPtr, 128, GlobalAlign32},
715*8bcb0991SDimitry Andric                                       {S64, ConstantPtr, 64, GlobalAlign32},
716*8bcb0991SDimitry Andric                                       {S128, ConstantPtr, 128, GlobalAlign32},
717*8bcb0991SDimitry Andric                                       {V2S32, ConstantPtr, 32, GlobalAlign32}});
718*8bcb0991SDimitry Andric     Actions
719*8bcb0991SDimitry Andric         .customIf(typeIs(1, Constant32Ptr))
720*8bcb0991SDimitry Andric         .narrowScalarIf(
721*8bcb0991SDimitry Andric             [=](const LegalityQuery &Query) -> bool {
722*8bcb0991SDimitry Andric               return !Query.Types[0].isVector() && needToSplitLoad(Query);
723*8bcb0991SDimitry Andric             },
724*8bcb0991SDimitry Andric             [=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
725*8bcb0991SDimitry Andric               const LLT DstTy = Query.Types[0];
726*8bcb0991SDimitry Andric               const LLT PtrTy = Query.Types[1];
727*8bcb0991SDimitry Andric 
728*8bcb0991SDimitry Andric               const unsigned DstSize = DstTy.getSizeInBits();
729*8bcb0991SDimitry Andric               unsigned MemSize = Query.MMODescrs[0].SizeInBits;
730*8bcb0991SDimitry Andric 
731*8bcb0991SDimitry Andric               // Split extloads.
732*8bcb0991SDimitry Andric               if (DstSize > MemSize)
733*8bcb0991SDimitry Andric                 return std::make_pair(0, LLT::scalar(MemSize));
734*8bcb0991SDimitry Andric 
735*8bcb0991SDimitry Andric               if (DstSize > 32 && (DstSize % 32 != 0)) {
736*8bcb0991SDimitry Andric                 // FIXME: Need a way to specify non-extload of larger size if
737*8bcb0991SDimitry Andric                 // suitably aligned.
738*8bcb0991SDimitry Andric                 return std::make_pair(0, LLT::scalar(32 * (DstSize / 32)));
739*8bcb0991SDimitry Andric               }
740*8bcb0991SDimitry Andric 
741*8bcb0991SDimitry Andric               unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
742*8bcb0991SDimitry Andric               if (MemSize > MaxSize)
743*8bcb0991SDimitry Andric                 return std::make_pair(0, LLT::scalar(MaxSize));
744*8bcb0991SDimitry Andric 
745*8bcb0991SDimitry Andric               unsigned Align = Query.MMODescrs[0].AlignInBits;
746*8bcb0991SDimitry Andric               return std::make_pair(0, LLT::scalar(Align));
747*8bcb0991SDimitry Andric             })
748*8bcb0991SDimitry Andric         .fewerElementsIf(
749*8bcb0991SDimitry Andric             [=](const LegalityQuery &Query) -> bool {
750*8bcb0991SDimitry Andric               return Query.Types[0].isVector() && needToSplitLoad(Query);
751*8bcb0991SDimitry Andric             },
752*8bcb0991SDimitry Andric             [=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
753*8bcb0991SDimitry Andric               const LLT DstTy = Query.Types[0];
754*8bcb0991SDimitry Andric               const LLT PtrTy = Query.Types[1];
755*8bcb0991SDimitry Andric 
756*8bcb0991SDimitry Andric               LLT EltTy = DstTy.getElementType();
757*8bcb0991SDimitry Andric               unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
758*8bcb0991SDimitry Andric 
759*8bcb0991SDimitry Andric               // Split if it's too large for the address space.
760*8bcb0991SDimitry Andric               if (Query.MMODescrs[0].SizeInBits > MaxSize) {
761*8bcb0991SDimitry Andric                 unsigned NumElts = DstTy.getNumElements();
762*8bcb0991SDimitry Andric                 unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize;
763*8bcb0991SDimitry Andric 
764*8bcb0991SDimitry Andric                 // FIXME: Refine when odd breakdowns handled
765*8bcb0991SDimitry Andric                 // The scalars will need to be re-legalized.
766*8bcb0991SDimitry Andric                 if (NumPieces == 1 || NumPieces >= NumElts ||
767*8bcb0991SDimitry Andric                     NumElts % NumPieces != 0)
768*8bcb0991SDimitry Andric                   return std::make_pair(0, EltTy);
769*8bcb0991SDimitry Andric 
770*8bcb0991SDimitry Andric                 return std::make_pair(0,
771*8bcb0991SDimitry Andric                                       LLT::vector(NumElts / NumPieces, EltTy));
772*8bcb0991SDimitry Andric               }
773*8bcb0991SDimitry Andric 
774*8bcb0991SDimitry Andric               // Need to split because of alignment.
775*8bcb0991SDimitry Andric               unsigned Align = Query.MMODescrs[0].AlignInBits;
776*8bcb0991SDimitry Andric               unsigned EltSize = EltTy.getSizeInBits();
777*8bcb0991SDimitry Andric               if (EltSize > Align &&
778*8bcb0991SDimitry Andric                   (EltSize / Align < DstTy.getNumElements())) {
779*8bcb0991SDimitry Andric                 return std::make_pair(0, LLT::vector(EltSize / Align, EltTy));
780*8bcb0991SDimitry Andric               }
781*8bcb0991SDimitry Andric 
782*8bcb0991SDimitry Andric               // May need relegalization for the scalars.
783*8bcb0991SDimitry Andric               return std::make_pair(0, EltTy);
784*8bcb0991SDimitry Andric             })
785*8bcb0991SDimitry Andric         .minScalar(0, S32);
786*8bcb0991SDimitry Andric 
787*8bcb0991SDimitry Andric     if (IsStore)
788*8bcb0991SDimitry Andric       Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32));
789*8bcb0991SDimitry Andric 
790*8bcb0991SDimitry Andric     // TODO: Need a bitcast lower option?
791*8bcb0991SDimitry Andric     Actions
792*8bcb0991SDimitry Andric         .legalIf([=](const LegalityQuery &Query) {
793*8bcb0991SDimitry Andric           const LLT Ty0 = Query.Types[0];
7940b57cec5SDimitry Andric           unsigned Size = Ty0.getSizeInBits();
7950b57cec5SDimitry Andric           unsigned MemSize = Query.MMODescrs[0].SizeInBits;
796*8bcb0991SDimitry Andric           unsigned Align = Query.MMODescrs[0].AlignInBits;
797*8bcb0991SDimitry Andric 
798*8bcb0991SDimitry Andric           // No extending vector loads.
799*8bcb0991SDimitry Andric           if (Size > MemSize && Ty0.isVector())
8000b57cec5SDimitry Andric             return false;
8010b57cec5SDimitry Andric 
802*8bcb0991SDimitry Andric           // FIXME: Widening store from alignment not valid.
803*8bcb0991SDimitry Andric           if (MemSize < Size)
804*8bcb0991SDimitry Andric             MemSize = std::max(MemSize, Align);
8050b57cec5SDimitry Andric 
8060b57cec5SDimitry Andric           switch (MemSize) {
8070b57cec5SDimitry Andric           case 8:
8080b57cec5SDimitry Andric           case 16:
8090b57cec5SDimitry Andric             return Size == 32;
8100b57cec5SDimitry Andric           case 32:
8110b57cec5SDimitry Andric           case 64:
8120b57cec5SDimitry Andric           case 128:
8130b57cec5SDimitry Andric             return true;
8140b57cec5SDimitry Andric           case 96:
8150b57cec5SDimitry Andric             return ST.hasDwordx3LoadStores();
8160b57cec5SDimitry Andric           case 256:
8170b57cec5SDimitry Andric           case 512:
818*8bcb0991SDimitry Andric             return true;
8190b57cec5SDimitry Andric           default:
8200b57cec5SDimitry Andric             return false;
8210b57cec5SDimitry Andric           }
8220b57cec5SDimitry Andric         })
823*8bcb0991SDimitry Andric         .widenScalarToNextPow2(0)
824*8bcb0991SDimitry Andric         // TODO: v3s32->v4s32 with alignment
825*8bcb0991SDimitry Andric         .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0));
826*8bcb0991SDimitry Andric   }
8270b57cec5SDimitry Andric 
8280b57cec5SDimitry Andric   auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
829*8bcb0991SDimitry Andric                        .legalForTypesWithMemDesc({{S32, GlobalPtr, 8, 8},
830*8bcb0991SDimitry Andric                                                   {S32, GlobalPtr, 16, 2 * 8},
8310b57cec5SDimitry Andric                                                   {S32, LocalPtr, 8, 8},
832*8bcb0991SDimitry Andric                                                   {S32, LocalPtr, 16, 16},
8330b57cec5SDimitry Andric                                                   {S32, PrivatePtr, 8, 8},
834*8bcb0991SDimitry Andric                                                   {S32, PrivatePtr, 16, 16},
835*8bcb0991SDimitry Andric                                                   {S32, ConstantPtr, 8, 8},
836*8bcb0991SDimitry Andric                                                   {S32, ConstantPtr, 16, 2 * 8}});
8370b57cec5SDimitry Andric   if (ST.hasFlatAddressSpace()) {
838*8bcb0991SDimitry Andric     ExtLoads.legalForTypesWithMemDesc(
839*8bcb0991SDimitry Andric         {{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}});
8400b57cec5SDimitry Andric   }
8410b57cec5SDimitry Andric 
8420b57cec5SDimitry Andric   ExtLoads.clampScalar(0, S32, S32)
8430b57cec5SDimitry Andric           .widenScalarToNextPow2(0)
8440b57cec5SDimitry Andric           .unsupportedIfMemSizeNotPow2()
8450b57cec5SDimitry Andric           .lower();
8460b57cec5SDimitry Andric 
8470b57cec5SDimitry Andric   auto &Atomics = getActionDefinitionsBuilder(
8480b57cec5SDimitry Andric     {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
8490b57cec5SDimitry Andric      G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
8500b57cec5SDimitry Andric      G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
8510b57cec5SDimitry Andric      G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
8520b57cec5SDimitry Andric     .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
8530b57cec5SDimitry Andric                {S64, GlobalPtr}, {S64, LocalPtr}});
8540b57cec5SDimitry Andric   if (ST.hasFlatAddressSpace()) {
8550b57cec5SDimitry Andric     Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
8560b57cec5SDimitry Andric   }
8570b57cec5SDimitry Andric 
858*8bcb0991SDimitry Andric   getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
859*8bcb0991SDimitry Andric     .legalFor({{S32, LocalPtr}});
860*8bcb0991SDimitry Andric 
861*8bcb0991SDimitry Andric   getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
862*8bcb0991SDimitry Andric     .lower();
863*8bcb0991SDimitry Andric 
8640b57cec5SDimitry Andric   // TODO: Pointer types, any 32-bit or 64-bit vector
8650b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_SELECT)
8660b57cec5SDimitry Andric     .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
8670b57cec5SDimitry Andric           GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
8680b57cec5SDimitry Andric           LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
8690b57cec5SDimitry Andric     .clampScalar(0, S16, S64)
8700b57cec5SDimitry Andric     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
8710b57cec5SDimitry Andric     .fewerElementsIf(numElementsNotEven(0), scalarize(0))
8720b57cec5SDimitry Andric     .scalarize(1)
8730b57cec5SDimitry Andric     .clampMaxNumElements(0, S32, 2)
8740b57cec5SDimitry Andric     .clampMaxNumElements(0, LocalPtr, 2)
8750b57cec5SDimitry Andric     .clampMaxNumElements(0, PrivatePtr, 2)
8760b57cec5SDimitry Andric     .scalarize(0)
8770b57cec5SDimitry Andric     .widenScalarToNextPow2(0)
8780b57cec5SDimitry Andric     .legalIf(all(isPointer(0), typeIs(1, S1)));
8790b57cec5SDimitry Andric 
8800b57cec5SDimitry Andric   // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
8810b57cec5SDimitry Andric   // be more flexible with the shift amount type.
8820b57cec5SDimitry Andric   auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
8830b57cec5SDimitry Andric     .legalFor({{S32, S32}, {S64, S32}});
8840b57cec5SDimitry Andric   if (ST.has16BitInsts()) {
8850b57cec5SDimitry Andric     if (ST.hasVOP3PInsts()) {
8860b57cec5SDimitry Andric       Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
8870b57cec5SDimitry Andric             .clampMaxNumElements(0, S16, 2);
8880b57cec5SDimitry Andric     } else
8890b57cec5SDimitry Andric       Shifts.legalFor({{S16, S32}, {S16, S16}});
8900b57cec5SDimitry Andric 
8910b57cec5SDimitry Andric     Shifts.clampScalar(1, S16, S32);
8920b57cec5SDimitry Andric     Shifts.clampScalar(0, S16, S64);
8930b57cec5SDimitry Andric     Shifts.widenScalarToNextPow2(0, 16);
8940b57cec5SDimitry Andric   } else {
8950b57cec5SDimitry Andric     // Make sure we legalize the shift amount type first, as the general
8960b57cec5SDimitry Andric     // expansion for the shifted type will produce much worse code if it hasn't
8970b57cec5SDimitry Andric     // been truncated already.
8980b57cec5SDimitry Andric     Shifts.clampScalar(1, S32, S32);
8990b57cec5SDimitry Andric     Shifts.clampScalar(0, S32, S64);
9000b57cec5SDimitry Andric     Shifts.widenScalarToNextPow2(0, 32);
9010b57cec5SDimitry Andric   }
9020b57cec5SDimitry Andric   Shifts.scalarize(0);
9030b57cec5SDimitry Andric 
9040b57cec5SDimitry Andric   for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
9050b57cec5SDimitry Andric     unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
9060b57cec5SDimitry Andric     unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
9070b57cec5SDimitry Andric     unsigned IdxTypeIdx = 2;
9080b57cec5SDimitry Andric 
9090b57cec5SDimitry Andric     getActionDefinitionsBuilder(Op)
9100b57cec5SDimitry Andric       .customIf([=](const LegalityQuery &Query) {
9110b57cec5SDimitry Andric           const LLT EltTy = Query.Types[EltTypeIdx];
9120b57cec5SDimitry Andric           const LLT VecTy = Query.Types[VecTypeIdx];
9130b57cec5SDimitry Andric           const LLT IdxTy = Query.Types[IdxTypeIdx];
9140b57cec5SDimitry Andric           return (EltTy.getSizeInBits() == 16 ||
9150b57cec5SDimitry Andric                   EltTy.getSizeInBits() % 32 == 0) &&
9160b57cec5SDimitry Andric                  VecTy.getSizeInBits() % 32 == 0 &&
917*8bcb0991SDimitry Andric                  VecTy.getSizeInBits() <= 1024 &&
9180b57cec5SDimitry Andric                  IdxTy.getSizeInBits() == 32;
9190b57cec5SDimitry Andric         })
9200b57cec5SDimitry Andric       .clampScalar(EltTypeIdx, S32, S64)
9210b57cec5SDimitry Andric       .clampScalar(VecTypeIdx, S32, S64)
9220b57cec5SDimitry Andric       .clampScalar(IdxTypeIdx, S32, S32);
9230b57cec5SDimitry Andric   }
9240b57cec5SDimitry Andric 
9250b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
9260b57cec5SDimitry Andric     .unsupportedIf([=](const LegalityQuery &Query) {
9270b57cec5SDimitry Andric         const LLT &EltTy = Query.Types[1].getElementType();
9280b57cec5SDimitry Andric         return Query.Types[0] != EltTy;
9290b57cec5SDimitry Andric       });
9300b57cec5SDimitry Andric 
9310b57cec5SDimitry Andric   for (unsigned Op : {G_EXTRACT, G_INSERT}) {
9320b57cec5SDimitry Andric     unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
9330b57cec5SDimitry Andric     unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
9340b57cec5SDimitry Andric 
9350b57cec5SDimitry Andric     // FIXME: Doesn't handle extract of illegal sizes.
9360b57cec5SDimitry Andric     getActionDefinitionsBuilder(Op)
937*8bcb0991SDimitry Andric       .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32)))
938*8bcb0991SDimitry Andric       // FIXME: Multiples of 16 should not be legal.
9390b57cec5SDimitry Andric       .legalIf([=](const LegalityQuery &Query) {
9400b57cec5SDimitry Andric           const LLT BigTy = Query.Types[BigTyIdx];
9410b57cec5SDimitry Andric           const LLT LitTy = Query.Types[LitTyIdx];
9420b57cec5SDimitry Andric           return (BigTy.getSizeInBits() % 32 == 0) &&
9430b57cec5SDimitry Andric                  (LitTy.getSizeInBits() % 16 == 0);
9440b57cec5SDimitry Andric         })
9450b57cec5SDimitry Andric       .widenScalarIf(
9460b57cec5SDimitry Andric         [=](const LegalityQuery &Query) {
9470b57cec5SDimitry Andric           const LLT BigTy = Query.Types[BigTyIdx];
9480b57cec5SDimitry Andric           return (BigTy.getScalarSizeInBits() < 16);
9490b57cec5SDimitry Andric         },
9500b57cec5SDimitry Andric         LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16))
9510b57cec5SDimitry Andric       .widenScalarIf(
9520b57cec5SDimitry Andric         [=](const LegalityQuery &Query) {
9530b57cec5SDimitry Andric           const LLT LitTy = Query.Types[LitTyIdx];
9540b57cec5SDimitry Andric           return (LitTy.getScalarSizeInBits() < 16);
9550b57cec5SDimitry Andric         },
9560b57cec5SDimitry Andric         LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16))
9570b57cec5SDimitry Andric       .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
9580b57cec5SDimitry Andric       .widenScalarToNextPow2(BigTyIdx, 32);
9590b57cec5SDimitry Andric 
9600b57cec5SDimitry Andric   }
9610b57cec5SDimitry Andric 
962*8bcb0991SDimitry Andric   auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR)
9630b57cec5SDimitry Andric     .legalForCartesianProduct(AllS32Vectors, {S32})
9640b57cec5SDimitry Andric     .legalForCartesianProduct(AllS64Vectors, {S64})
965*8bcb0991SDimitry Andric     .clampNumElements(0, V16S32, V32S32)
966*8bcb0991SDimitry Andric     .clampNumElements(0, V2S64, V16S64)
967*8bcb0991SDimitry Andric     .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16));
968*8bcb0991SDimitry Andric 
969*8bcb0991SDimitry Andric   if (ST.hasScalarPackInsts())
970*8bcb0991SDimitry Andric     BuildVector.legalFor({V2S16, S32});
971*8bcb0991SDimitry Andric 
972*8bcb0991SDimitry Andric   BuildVector
9730b57cec5SDimitry Andric     .minScalarSameAs(1, 0)
9740b57cec5SDimitry Andric     .legalIf(isRegisterType(0))
9750b57cec5SDimitry Andric     .minScalarOrElt(0, S32);
9760b57cec5SDimitry Andric 
977*8bcb0991SDimitry Andric   if (ST.hasScalarPackInsts()) {
978*8bcb0991SDimitry Andric     getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
979*8bcb0991SDimitry Andric       .legalFor({V2S16, S32})
980*8bcb0991SDimitry Andric       .lower();
981*8bcb0991SDimitry Andric   } else {
982*8bcb0991SDimitry Andric     getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
983*8bcb0991SDimitry Andric       .lower();
984*8bcb0991SDimitry Andric   }
985*8bcb0991SDimitry Andric 
9860b57cec5SDimitry Andric   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
9870b57cec5SDimitry Andric     .legalIf(isRegisterType(0));
9880b57cec5SDimitry Andric 
989*8bcb0991SDimitry Andric   // TODO: Don't fully scalarize v2s16 pieces
990*8bcb0991SDimitry Andric   getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
991*8bcb0991SDimitry Andric 
9920b57cec5SDimitry Andric   // Merge/Unmerge
9930b57cec5SDimitry Andric   for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
9940b57cec5SDimitry Andric     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
9950b57cec5SDimitry Andric     unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
9960b57cec5SDimitry Andric 
9970b57cec5SDimitry Andric     auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
9980b57cec5SDimitry Andric       const LLT &Ty = Query.Types[TypeIdx];
9990b57cec5SDimitry Andric       if (Ty.isVector()) {
10000b57cec5SDimitry Andric         const LLT &EltTy = Ty.getElementType();
10010b57cec5SDimitry Andric         if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
10020b57cec5SDimitry Andric           return true;
10030b57cec5SDimitry Andric         if (!isPowerOf2_32(EltTy.getSizeInBits()))
10040b57cec5SDimitry Andric           return true;
10050b57cec5SDimitry Andric       }
10060b57cec5SDimitry Andric       return false;
10070b57cec5SDimitry Andric     };
10080b57cec5SDimitry Andric 
1009*8bcb0991SDimitry Andric     auto &Builder = getActionDefinitionsBuilder(Op)
10100b57cec5SDimitry Andric       .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
10110b57cec5SDimitry Andric       // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
10120b57cec5SDimitry Andric       // worth considering the multiples of 64 since 2*192 and 2*384 are not
10130b57cec5SDimitry Andric       // valid.
10140b57cec5SDimitry Andric       .clampScalar(LitTyIdx, S16, S256)
10150b57cec5SDimitry Andric       .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
1016*8bcb0991SDimitry Andric       .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
1017*8bcb0991SDimitry Andric       .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
1018*8bcb0991SDimitry Andric                            elementTypeIs(1, S16)),
1019*8bcb0991SDimitry Andric                        changeTo(1, V2S16))
10200b57cec5SDimitry Andric       // Break up vectors with weird elements into scalars
10210b57cec5SDimitry Andric       .fewerElementsIf(
10220b57cec5SDimitry Andric         [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
10230b57cec5SDimitry Andric         scalarize(0))
10240b57cec5SDimitry Andric       .fewerElementsIf(
10250b57cec5SDimitry Andric         [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
10260b57cec5SDimitry Andric         scalarize(1))
1027*8bcb0991SDimitry Andric       .clampScalar(BigTyIdx, S32, S1024)
1028*8bcb0991SDimitry Andric       .lowerFor({{S16, V2S16}});
1029*8bcb0991SDimitry Andric 
1030*8bcb0991SDimitry Andric     if (Op == G_MERGE_VALUES) {
1031*8bcb0991SDimitry Andric       Builder.widenScalarIf(
1032*8bcb0991SDimitry Andric         // TODO: Use 16-bit shifts if legal for 8-bit values?
10330b57cec5SDimitry Andric         [=](const LegalityQuery &Query) {
1034*8bcb0991SDimitry Andric           const LLT Ty = Query.Types[LitTyIdx];
1035*8bcb0991SDimitry Andric           return Ty.getSizeInBits() < 32;
1036*8bcb0991SDimitry Andric         },
1037*8bcb0991SDimitry Andric         changeTo(LitTyIdx, S32));
1038*8bcb0991SDimitry Andric     }
1039*8bcb0991SDimitry Andric 
1040*8bcb0991SDimitry Andric     Builder.widenScalarIf(
1041*8bcb0991SDimitry Andric       [=](const LegalityQuery &Query) {
1042*8bcb0991SDimitry Andric         const LLT Ty = Query.Types[BigTyIdx];
10430b57cec5SDimitry Andric         return !isPowerOf2_32(Ty.getSizeInBits()) &&
10440b57cec5SDimitry Andric           Ty.getSizeInBits() % 16 != 0;
10450b57cec5SDimitry Andric       },
10460b57cec5SDimitry Andric       [=](const LegalityQuery &Query) {
10470b57cec5SDimitry Andric         // Pick the next power of 2, or a multiple of 64 over 128.
10480b57cec5SDimitry Andric         // Whichever is smaller.
10490b57cec5SDimitry Andric         const LLT &Ty = Query.Types[BigTyIdx];
10500b57cec5SDimitry Andric         unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
10510b57cec5SDimitry Andric         if (NewSizeInBits >= 256) {
10520b57cec5SDimitry Andric           unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
10530b57cec5SDimitry Andric           if (RoundedTo < NewSizeInBits)
10540b57cec5SDimitry Andric             NewSizeInBits = RoundedTo;
10550b57cec5SDimitry Andric         }
10560b57cec5SDimitry Andric         return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
10570b57cec5SDimitry Andric       })
10580b57cec5SDimitry Andric       .legalIf([=](const LegalityQuery &Query) {
10590b57cec5SDimitry Andric           const LLT &BigTy = Query.Types[BigTyIdx];
10600b57cec5SDimitry Andric           const LLT &LitTy = Query.Types[LitTyIdx];
10610b57cec5SDimitry Andric 
10620b57cec5SDimitry Andric           if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
10630b57cec5SDimitry Andric             return false;
10640b57cec5SDimitry Andric           if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
10650b57cec5SDimitry Andric             return false;
10660b57cec5SDimitry Andric 
10670b57cec5SDimitry Andric           return BigTy.getSizeInBits() % 16 == 0 &&
10680b57cec5SDimitry Andric                  LitTy.getSizeInBits() % 16 == 0 &&
1069*8bcb0991SDimitry Andric                  BigTy.getSizeInBits() <= 1024;
10700b57cec5SDimitry Andric         })
10710b57cec5SDimitry Andric       // Any vectors left are the wrong size. Scalarize them.
10720b57cec5SDimitry Andric       .scalarize(0)
10730b57cec5SDimitry Andric       .scalarize(1);
10740b57cec5SDimitry Andric   }
10750b57cec5SDimitry Andric 
1076*8bcb0991SDimitry Andric   getActionDefinitionsBuilder(G_SEXT_INREG).lower();
1077*8bcb0991SDimitry Andric 
10780b57cec5SDimitry Andric   computeTables();
10790b57cec5SDimitry Andric   verify(*ST.getInstrInfo());
10800b57cec5SDimitry Andric }
10810b57cec5SDimitry Andric 
10820b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
10830b57cec5SDimitry Andric                                          MachineRegisterInfo &MRI,
1084*8bcb0991SDimitry Andric                                          MachineIRBuilder &B,
10850b57cec5SDimitry Andric                                          GISelChangeObserver &Observer) const {
10860b57cec5SDimitry Andric   switch (MI.getOpcode()) {
10870b57cec5SDimitry Andric   case TargetOpcode::G_ADDRSPACE_CAST:
1088*8bcb0991SDimitry Andric     return legalizeAddrSpaceCast(MI, MRI, B);
10890b57cec5SDimitry Andric   case TargetOpcode::G_FRINT:
1090*8bcb0991SDimitry Andric     return legalizeFrint(MI, MRI, B);
10910b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
1092*8bcb0991SDimitry Andric     return legalizeFceil(MI, MRI, B);
10930b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC_TRUNC:
1094*8bcb0991SDimitry Andric     return legalizeIntrinsicTrunc(MI, MRI, B);
10950b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
1096*8bcb0991SDimitry Andric     return legalizeITOFP(MI, MRI, B, true);
10970b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP:
1098*8bcb0991SDimitry Andric     return legalizeITOFP(MI, MRI, B, false);
10990b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM:
11000b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM:
11010b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM_IEEE:
11020b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM_IEEE:
1103*8bcb0991SDimitry Andric     return legalizeMinNumMaxNum(MI, MRI, B);
11040b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1105*8bcb0991SDimitry Andric     return legalizeExtractVectorElt(MI, MRI, B);
11060b57cec5SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT:
1107*8bcb0991SDimitry Andric     return legalizeInsertVectorElt(MI, MRI, B);
1108*8bcb0991SDimitry Andric   case TargetOpcode::G_FSIN:
1109*8bcb0991SDimitry Andric   case TargetOpcode::G_FCOS:
1110*8bcb0991SDimitry Andric     return legalizeSinCos(MI, MRI, B);
1111*8bcb0991SDimitry Andric   case TargetOpcode::G_GLOBAL_VALUE:
1112*8bcb0991SDimitry Andric     return legalizeGlobalValue(MI, MRI, B);
1113*8bcb0991SDimitry Andric   case TargetOpcode::G_LOAD:
1114*8bcb0991SDimitry Andric     return legalizeLoad(MI, MRI, B, Observer);
1115*8bcb0991SDimitry Andric   case TargetOpcode::G_FMAD:
1116*8bcb0991SDimitry Andric     return legalizeFMad(MI, MRI, B);
1117*8bcb0991SDimitry Andric   case TargetOpcode::G_FDIV:
1118*8bcb0991SDimitry Andric     return legalizeFDIV(MI, MRI, B);
11190b57cec5SDimitry Andric   default:
11200b57cec5SDimitry Andric     return false;
11210b57cec5SDimitry Andric   }
11220b57cec5SDimitry Andric 
11230b57cec5SDimitry Andric   llvm_unreachable("expected switch to return");
11240b57cec5SDimitry Andric }
11250b57cec5SDimitry Andric 
11260b57cec5SDimitry Andric Register AMDGPULegalizerInfo::getSegmentAperture(
11270b57cec5SDimitry Andric   unsigned AS,
11280b57cec5SDimitry Andric   MachineRegisterInfo &MRI,
1129*8bcb0991SDimitry Andric   MachineIRBuilder &B) const {
1130*8bcb0991SDimitry Andric   MachineFunction &MF = B.getMF();
11310b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
11320b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
11330b57cec5SDimitry Andric 
1134*8bcb0991SDimitry Andric   assert(AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS);
1135*8bcb0991SDimitry Andric 
11360b57cec5SDimitry Andric   if (ST.hasApertureRegs()) {
11370b57cec5SDimitry Andric     // FIXME: Use inline constants (src_{shared, private}_base) instead of
11380b57cec5SDimitry Andric     // getreg.
11390b57cec5SDimitry Andric     unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
11400b57cec5SDimitry Andric         AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
11410b57cec5SDimitry Andric         AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
11420b57cec5SDimitry Andric     unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
11430b57cec5SDimitry Andric         AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
11440b57cec5SDimitry Andric         AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
11450b57cec5SDimitry Andric     unsigned Encoding =
11460b57cec5SDimitry Andric         AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
11470b57cec5SDimitry Andric         Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
11480b57cec5SDimitry Andric         WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
11490b57cec5SDimitry Andric 
11500b57cec5SDimitry Andric     Register ApertureReg = MRI.createGenericVirtualRegister(S32);
11510b57cec5SDimitry Andric     Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
11520b57cec5SDimitry Andric 
1153*8bcb0991SDimitry Andric     B.buildInstr(AMDGPU::S_GETREG_B32)
11540b57cec5SDimitry Andric       .addDef(GetReg)
11550b57cec5SDimitry Andric       .addImm(Encoding);
11560b57cec5SDimitry Andric     MRI.setType(GetReg, S32);
11570b57cec5SDimitry Andric 
1158*8bcb0991SDimitry Andric     auto ShiftAmt = B.buildConstant(S32, WidthM1 + 1);
1159*8bcb0991SDimitry Andric     B.buildInstr(TargetOpcode::G_SHL)
11600b57cec5SDimitry Andric       .addDef(ApertureReg)
11610b57cec5SDimitry Andric       .addUse(GetReg)
11620b57cec5SDimitry Andric       .addUse(ShiftAmt.getReg(0));
11630b57cec5SDimitry Andric 
11640b57cec5SDimitry Andric     return ApertureReg;
11650b57cec5SDimitry Andric   }
11660b57cec5SDimitry Andric 
11670b57cec5SDimitry Andric   Register QueuePtr = MRI.createGenericVirtualRegister(
11680b57cec5SDimitry Andric     LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
11690b57cec5SDimitry Andric 
1170*8bcb0991SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1171*8bcb0991SDimitry Andric   if (!loadInputValue(QueuePtr, B, &MFI->getArgInfo().QueuePtr))
1172*8bcb0991SDimitry Andric     return Register();
11730b57cec5SDimitry Andric 
11740b57cec5SDimitry Andric   // Offset into amd_queue_t for group_segment_aperture_base_hi /
11750b57cec5SDimitry Andric   // private_segment_aperture_base_hi.
11760b57cec5SDimitry Andric   uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
11770b57cec5SDimitry Andric 
11780b57cec5SDimitry Andric   // FIXME: Don't use undef
11790b57cec5SDimitry Andric   Value *V = UndefValue::get(PointerType::get(
11800b57cec5SDimitry Andric                                Type::getInt8Ty(MF.getFunction().getContext()),
11810b57cec5SDimitry Andric                                AMDGPUAS::CONSTANT_ADDRESS));
11820b57cec5SDimitry Andric 
11830b57cec5SDimitry Andric   MachinePointerInfo PtrInfo(V, StructOffset);
11840b57cec5SDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
11850b57cec5SDimitry Andric     PtrInfo,
11860b57cec5SDimitry Andric     MachineMemOperand::MOLoad |
11870b57cec5SDimitry Andric     MachineMemOperand::MODereferenceable |
11880b57cec5SDimitry Andric     MachineMemOperand::MOInvariant,
11890b57cec5SDimitry Andric     4,
11900b57cec5SDimitry Andric     MinAlign(64, StructOffset));
11910b57cec5SDimitry Andric 
11920b57cec5SDimitry Andric   Register LoadResult = MRI.createGenericVirtualRegister(S32);
11930b57cec5SDimitry Andric   Register LoadAddr;
11940b57cec5SDimitry Andric 
1195*8bcb0991SDimitry Andric   B.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
1196*8bcb0991SDimitry Andric   B.buildLoad(LoadResult, LoadAddr, *MMO);
11970b57cec5SDimitry Andric   return LoadResult;
11980b57cec5SDimitry Andric }
11990b57cec5SDimitry Andric 
12000b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
12010b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1202*8bcb0991SDimitry Andric   MachineIRBuilder &B) const {
1203*8bcb0991SDimitry Andric   MachineFunction &MF = B.getMF();
12040b57cec5SDimitry Andric 
1205*8bcb0991SDimitry Andric   B.setInstr(MI);
12060b57cec5SDimitry Andric 
1207*8bcb0991SDimitry Andric   const LLT S32 = LLT::scalar(32);
12080b57cec5SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
12090b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
12100b57cec5SDimitry Andric 
12110b57cec5SDimitry Andric   LLT DstTy = MRI.getType(Dst);
12120b57cec5SDimitry Andric   LLT SrcTy = MRI.getType(Src);
12130b57cec5SDimitry Andric   unsigned DestAS = DstTy.getAddressSpace();
12140b57cec5SDimitry Andric   unsigned SrcAS = SrcTy.getAddressSpace();
12150b57cec5SDimitry Andric 
12160b57cec5SDimitry Andric   // TODO: Avoid reloading from the queue ptr for each cast, or at least each
12170b57cec5SDimitry Andric   // vector element.
12180b57cec5SDimitry Andric   assert(!DstTy.isVector());
12190b57cec5SDimitry Andric 
12200b57cec5SDimitry Andric   const AMDGPUTargetMachine &TM
12210b57cec5SDimitry Andric     = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
12220b57cec5SDimitry Andric 
12230b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
12240b57cec5SDimitry Andric   if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
1225*8bcb0991SDimitry Andric     MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST));
1226*8bcb0991SDimitry Andric     return true;
1227*8bcb0991SDimitry Andric   }
1228*8bcb0991SDimitry Andric 
1229*8bcb0991SDimitry Andric   if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
1230*8bcb0991SDimitry Andric     // Truncate.
1231*8bcb0991SDimitry Andric     B.buildExtract(Dst, Src, 0);
1232*8bcb0991SDimitry Andric     MI.eraseFromParent();
1233*8bcb0991SDimitry Andric     return true;
1234*8bcb0991SDimitry Andric   }
1235*8bcb0991SDimitry Andric 
1236*8bcb0991SDimitry Andric   if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
1237*8bcb0991SDimitry Andric     const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1238*8bcb0991SDimitry Andric     uint32_t AddrHiVal = Info->get32BitAddressHighBits();
1239*8bcb0991SDimitry Andric 
1240*8bcb0991SDimitry Andric     // FIXME: This is a bit ugly due to creating a merge of 2 pointers to
1241*8bcb0991SDimitry Andric     // another. Merge operands are required to be the same type, but creating an
1242*8bcb0991SDimitry Andric     // extra ptrtoint would be kind of pointless.
1243*8bcb0991SDimitry Andric     auto HighAddr = B.buildConstant(
1244*8bcb0991SDimitry Andric       LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal);
1245*8bcb0991SDimitry Andric     B.buildMerge(Dst, {Src, HighAddr.getReg(0)});
1246*8bcb0991SDimitry Andric     MI.eraseFromParent();
12470b57cec5SDimitry Andric     return true;
12480b57cec5SDimitry Andric   }
12490b57cec5SDimitry Andric 
12500b57cec5SDimitry Andric   if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
12510b57cec5SDimitry Andric     assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
12520b57cec5SDimitry Andric            DestAS == AMDGPUAS::PRIVATE_ADDRESS);
12530b57cec5SDimitry Andric     unsigned NullVal = TM.getNullPointerValue(DestAS);
12540b57cec5SDimitry Andric 
1255*8bcb0991SDimitry Andric     auto SegmentNull = B.buildConstant(DstTy, NullVal);
1256*8bcb0991SDimitry Andric     auto FlatNull = B.buildConstant(SrcTy, 0);
12570b57cec5SDimitry Andric 
12580b57cec5SDimitry Andric     Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
12590b57cec5SDimitry Andric 
12600b57cec5SDimitry Andric     // Extract low 32-bits of the pointer.
1261*8bcb0991SDimitry Andric     B.buildExtract(PtrLo32, Src, 0);
12620b57cec5SDimitry Andric 
12630b57cec5SDimitry Andric     Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
1264*8bcb0991SDimitry Andric     B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
1265*8bcb0991SDimitry Andric     B.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
12660b57cec5SDimitry Andric 
12670b57cec5SDimitry Andric     MI.eraseFromParent();
12680b57cec5SDimitry Andric     return true;
12690b57cec5SDimitry Andric   }
12700b57cec5SDimitry Andric 
1271*8bcb0991SDimitry Andric   if (SrcAS != AMDGPUAS::LOCAL_ADDRESS && SrcAS != AMDGPUAS::PRIVATE_ADDRESS)
1272*8bcb0991SDimitry Andric     return false;
1273*8bcb0991SDimitry Andric 
1274*8bcb0991SDimitry Andric   if (!ST.hasFlatAddressSpace())
1275*8bcb0991SDimitry Andric     return false;
12760b57cec5SDimitry Andric 
12770b57cec5SDimitry Andric   auto SegmentNull =
1278*8bcb0991SDimitry Andric       B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
12790b57cec5SDimitry Andric   auto FlatNull =
1280*8bcb0991SDimitry Andric       B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
12810b57cec5SDimitry Andric 
1282*8bcb0991SDimitry Andric   Register ApertureReg = getSegmentAperture(SrcAS, MRI, B);
1283*8bcb0991SDimitry Andric   if (!ApertureReg.isValid())
1284*8bcb0991SDimitry Andric     return false;
12850b57cec5SDimitry Andric 
12860b57cec5SDimitry Andric   Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
1287*8bcb0991SDimitry Andric   B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
12880b57cec5SDimitry Andric 
12890b57cec5SDimitry Andric   Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
12900b57cec5SDimitry Andric 
12910b57cec5SDimitry Andric   // Coerce the type of the low half of the result so we can use merge_values.
1292*8bcb0991SDimitry Andric   Register SrcAsInt = MRI.createGenericVirtualRegister(S32);
1293*8bcb0991SDimitry Andric   B.buildInstr(TargetOpcode::G_PTRTOINT)
12940b57cec5SDimitry Andric     .addDef(SrcAsInt)
12950b57cec5SDimitry Andric     .addUse(Src);
12960b57cec5SDimitry Andric 
12970b57cec5SDimitry Andric   // TODO: Should we allow mismatched types but matching sizes in merges to
12980b57cec5SDimitry Andric   // avoid the ptrtoint?
1299*8bcb0991SDimitry Andric   B.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
1300*8bcb0991SDimitry Andric   B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
13010b57cec5SDimitry Andric 
13020b57cec5SDimitry Andric   MI.eraseFromParent();
13030b57cec5SDimitry Andric   return true;
13040b57cec5SDimitry Andric }
13050b57cec5SDimitry Andric 
13060b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeFrint(
13070b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1308*8bcb0991SDimitry Andric   MachineIRBuilder &B) const {
1309*8bcb0991SDimitry Andric   B.setInstr(MI);
13100b57cec5SDimitry Andric 
13110b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
13120b57cec5SDimitry Andric   LLT Ty = MRI.getType(Src);
13130b57cec5SDimitry Andric   assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
13140b57cec5SDimitry Andric 
13150b57cec5SDimitry Andric   APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
13160b57cec5SDimitry Andric   APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
13170b57cec5SDimitry Andric 
1318*8bcb0991SDimitry Andric   auto C1 = B.buildFConstant(Ty, C1Val);
1319*8bcb0991SDimitry Andric   auto CopySign = B.buildFCopysign(Ty, C1, Src);
13200b57cec5SDimitry Andric 
13210b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
1322*8bcb0991SDimitry Andric   auto Tmp1 = B.buildFAdd(Ty, Src, CopySign);
1323*8bcb0991SDimitry Andric   auto Tmp2 = B.buildFSub(Ty, Tmp1, CopySign);
13240b57cec5SDimitry Andric 
1325*8bcb0991SDimitry Andric   auto C2 = B.buildFConstant(Ty, C2Val);
1326*8bcb0991SDimitry Andric   auto Fabs = B.buildFAbs(Ty, Src);
13270b57cec5SDimitry Andric 
1328*8bcb0991SDimitry Andric   auto Cond = B.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
1329*8bcb0991SDimitry Andric   B.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
13300b57cec5SDimitry Andric   return true;
13310b57cec5SDimitry Andric }
13320b57cec5SDimitry Andric 
13330b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeFceil(
13340b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
13350b57cec5SDimitry Andric   MachineIRBuilder &B) const {
13360b57cec5SDimitry Andric   B.setInstr(MI);
13370b57cec5SDimitry Andric 
13380b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
13390b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
13400b57cec5SDimitry Andric 
13410b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
13420b57cec5SDimitry Andric   assert(MRI.getType(Src) == S64);
13430b57cec5SDimitry Andric 
13440b57cec5SDimitry Andric   // result = trunc(src)
13450b57cec5SDimitry Andric   // if (src > 0.0 && src != result)
13460b57cec5SDimitry Andric   //   result += 1.0
13470b57cec5SDimitry Andric 
13480b57cec5SDimitry Andric   auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
13490b57cec5SDimitry Andric 
13500b57cec5SDimitry Andric   const auto Zero = B.buildFConstant(S64, 0.0);
13510b57cec5SDimitry Andric   const auto One = B.buildFConstant(S64, 1.0);
13520b57cec5SDimitry Andric   auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero);
13530b57cec5SDimitry Andric   auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc);
13540b57cec5SDimitry Andric   auto And = B.buildAnd(S1, Lt0, NeTrunc);
13550b57cec5SDimitry Andric   auto Add = B.buildSelect(S64, And, One, Zero);
13560b57cec5SDimitry Andric 
13570b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
13580b57cec5SDimitry Andric   B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
13590b57cec5SDimitry Andric   return true;
13600b57cec5SDimitry Andric }
13610b57cec5SDimitry Andric 
13620b57cec5SDimitry Andric static MachineInstrBuilder extractF64Exponent(unsigned Hi,
13630b57cec5SDimitry Andric                                               MachineIRBuilder &B) {
13640b57cec5SDimitry Andric   const unsigned FractBits = 52;
13650b57cec5SDimitry Andric   const unsigned ExpBits = 11;
13660b57cec5SDimitry Andric   LLT S32 = LLT::scalar(32);
13670b57cec5SDimitry Andric 
13680b57cec5SDimitry Andric   auto Const0 = B.buildConstant(S32, FractBits - 32);
13690b57cec5SDimitry Andric   auto Const1 = B.buildConstant(S32, ExpBits);
13700b57cec5SDimitry Andric 
13710b57cec5SDimitry Andric   auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
13720b57cec5SDimitry Andric     .addUse(Const0.getReg(0))
13730b57cec5SDimitry Andric     .addUse(Const1.getReg(0));
13740b57cec5SDimitry Andric 
13750b57cec5SDimitry Andric   return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
13760b57cec5SDimitry Andric }
13770b57cec5SDimitry Andric 
13780b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
13790b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
13800b57cec5SDimitry Andric   MachineIRBuilder &B) const {
13810b57cec5SDimitry Andric   B.setInstr(MI);
13820b57cec5SDimitry Andric 
13830b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
13840b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
13850b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
13860b57cec5SDimitry Andric 
13870b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
13880b57cec5SDimitry Andric   assert(MRI.getType(Src) == S64);
13890b57cec5SDimitry Andric 
13900b57cec5SDimitry Andric   // TODO: Should this use extract since the low half is unused?
13910b57cec5SDimitry Andric   auto Unmerge = B.buildUnmerge({S32, S32}, Src);
13920b57cec5SDimitry Andric   Register Hi = Unmerge.getReg(1);
13930b57cec5SDimitry Andric 
13940b57cec5SDimitry Andric   // Extract the upper half, since this is where we will find the sign and
13950b57cec5SDimitry Andric   // exponent.
13960b57cec5SDimitry Andric   auto Exp = extractF64Exponent(Hi, B);
13970b57cec5SDimitry Andric 
13980b57cec5SDimitry Andric   const unsigned FractBits = 52;
13990b57cec5SDimitry Andric 
14000b57cec5SDimitry Andric   // Extract the sign bit.
14010b57cec5SDimitry Andric   const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
14020b57cec5SDimitry Andric   auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
14030b57cec5SDimitry Andric 
14040b57cec5SDimitry Andric   const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
14050b57cec5SDimitry Andric 
14060b57cec5SDimitry Andric   const auto Zero32 = B.buildConstant(S32, 0);
14070b57cec5SDimitry Andric 
14080b57cec5SDimitry Andric   // Extend back to 64-bits.
14090b57cec5SDimitry Andric   auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
14100b57cec5SDimitry Andric 
14110b57cec5SDimitry Andric   auto Shr = B.buildAShr(S64, FractMask, Exp);
14120b57cec5SDimitry Andric   auto Not = B.buildNot(S64, Shr);
14130b57cec5SDimitry Andric   auto Tmp0 = B.buildAnd(S64, Src, Not);
14140b57cec5SDimitry Andric   auto FiftyOne = B.buildConstant(S32, FractBits - 1);
14150b57cec5SDimitry Andric 
14160b57cec5SDimitry Andric   auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32);
14170b57cec5SDimitry Andric   auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne);
14180b57cec5SDimitry Andric 
14190b57cec5SDimitry Andric   auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
14200b57cec5SDimitry Andric   B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
14210b57cec5SDimitry Andric   return true;
14220b57cec5SDimitry Andric }
14230b57cec5SDimitry Andric 
14240b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeITOFP(
14250b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
14260b57cec5SDimitry Andric   MachineIRBuilder &B, bool Signed) const {
14270b57cec5SDimitry Andric   B.setInstr(MI);
14280b57cec5SDimitry Andric 
14290b57cec5SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
14300b57cec5SDimitry Andric   Register Src = MI.getOperand(1).getReg();
14310b57cec5SDimitry Andric 
14320b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
14330b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
14340b57cec5SDimitry Andric 
14350b57cec5SDimitry Andric   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
14360b57cec5SDimitry Andric 
14370b57cec5SDimitry Andric   auto Unmerge = B.buildUnmerge({S32, S32}, Src);
14380b57cec5SDimitry Andric 
14390b57cec5SDimitry Andric   auto CvtHi = Signed ?
14400b57cec5SDimitry Andric     B.buildSITOFP(S64, Unmerge.getReg(1)) :
14410b57cec5SDimitry Andric     B.buildUITOFP(S64, Unmerge.getReg(1));
14420b57cec5SDimitry Andric 
14430b57cec5SDimitry Andric   auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
14440b57cec5SDimitry Andric 
14450b57cec5SDimitry Andric   auto ThirtyTwo = B.buildConstant(S32, 32);
14460b57cec5SDimitry Andric   auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
14470b57cec5SDimitry Andric     .addUse(CvtHi.getReg(0))
14480b57cec5SDimitry Andric     .addUse(ThirtyTwo.getReg(0));
14490b57cec5SDimitry Andric 
14500b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
14510b57cec5SDimitry Andric   B.buildFAdd(Dst, LdExp, CvtLo);
14520b57cec5SDimitry Andric   MI.eraseFromParent();
14530b57cec5SDimitry Andric   return true;
14540b57cec5SDimitry Andric }
14550b57cec5SDimitry Andric 
14560b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
14570b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
14580b57cec5SDimitry Andric   MachineIRBuilder &B) const {
14590b57cec5SDimitry Andric   MachineFunction &MF = B.getMF();
14600b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
14610b57cec5SDimitry Andric 
14620b57cec5SDimitry Andric   const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
14630b57cec5SDimitry Andric                         MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
14640b57cec5SDimitry Andric 
14650b57cec5SDimitry Andric   // With ieee_mode disabled, the instructions have the correct behavior
14660b57cec5SDimitry Andric   // already for G_FMINNUM/G_FMAXNUM
14670b57cec5SDimitry Andric   if (!MFI->getMode().IEEE)
14680b57cec5SDimitry Andric     return !IsIEEEOp;
14690b57cec5SDimitry Andric 
14700b57cec5SDimitry Andric   if (IsIEEEOp)
14710b57cec5SDimitry Andric     return true;
14720b57cec5SDimitry Andric 
14730b57cec5SDimitry Andric   MachineIRBuilder HelperBuilder(MI);
14740b57cec5SDimitry Andric   GISelObserverWrapper DummyObserver;
14750b57cec5SDimitry Andric   LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
1476*8bcb0991SDimitry Andric   HelperBuilder.setInstr(MI);
14770b57cec5SDimitry Andric   return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
14780b57cec5SDimitry Andric }
14790b57cec5SDimitry Andric 
14800b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
14810b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
14820b57cec5SDimitry Andric   MachineIRBuilder &B) const {
14830b57cec5SDimitry Andric   // TODO: Should move some of this into LegalizerHelper.
14840b57cec5SDimitry Andric 
14850b57cec5SDimitry Andric   // TODO: Promote dynamic indexing of s16 to s32
14860b57cec5SDimitry Andric   // TODO: Dynamic s64 indexing is only legal for SGPR.
14870b57cec5SDimitry Andric   Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
14880b57cec5SDimitry Andric   if (!IdxVal) // Dynamic case will be selected to register indexing.
14890b57cec5SDimitry Andric     return true;
14900b57cec5SDimitry Andric 
14910b57cec5SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
14920b57cec5SDimitry Andric   Register Vec = MI.getOperand(1).getReg();
14930b57cec5SDimitry Andric 
14940b57cec5SDimitry Andric   LLT VecTy = MRI.getType(Vec);
14950b57cec5SDimitry Andric   LLT EltTy = VecTy.getElementType();
14960b57cec5SDimitry Andric   assert(EltTy == MRI.getType(Dst));
14970b57cec5SDimitry Andric 
14980b57cec5SDimitry Andric   B.setInstr(MI);
14990b57cec5SDimitry Andric 
15000b57cec5SDimitry Andric   if (IdxVal.getValue() < VecTy.getNumElements())
15010b57cec5SDimitry Andric     B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
15020b57cec5SDimitry Andric   else
15030b57cec5SDimitry Andric     B.buildUndef(Dst);
15040b57cec5SDimitry Andric 
15050b57cec5SDimitry Andric   MI.eraseFromParent();
15060b57cec5SDimitry Andric   return true;
15070b57cec5SDimitry Andric }
15080b57cec5SDimitry Andric 
15090b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
15100b57cec5SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
15110b57cec5SDimitry Andric   MachineIRBuilder &B) const {
15120b57cec5SDimitry Andric   // TODO: Should move some of this into LegalizerHelper.
15130b57cec5SDimitry Andric 
15140b57cec5SDimitry Andric   // TODO: Promote dynamic indexing of s16 to s32
15150b57cec5SDimitry Andric   // TODO: Dynamic s64 indexing is only legal for SGPR.
15160b57cec5SDimitry Andric   Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
15170b57cec5SDimitry Andric   if (!IdxVal) // Dynamic case will be selected to register indexing.
15180b57cec5SDimitry Andric     return true;
15190b57cec5SDimitry Andric 
15200b57cec5SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
15210b57cec5SDimitry Andric   Register Vec = MI.getOperand(1).getReg();
15220b57cec5SDimitry Andric   Register Ins = MI.getOperand(2).getReg();
15230b57cec5SDimitry Andric 
15240b57cec5SDimitry Andric   LLT VecTy = MRI.getType(Vec);
15250b57cec5SDimitry Andric   LLT EltTy = VecTy.getElementType();
15260b57cec5SDimitry Andric   assert(EltTy == MRI.getType(Ins));
15270b57cec5SDimitry Andric 
15280b57cec5SDimitry Andric   B.setInstr(MI);
15290b57cec5SDimitry Andric 
15300b57cec5SDimitry Andric   if (IdxVal.getValue() < VecTy.getNumElements())
15310b57cec5SDimitry Andric     B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
15320b57cec5SDimitry Andric   else
15330b57cec5SDimitry Andric     B.buildUndef(Dst);
15340b57cec5SDimitry Andric 
15350b57cec5SDimitry Andric   MI.eraseFromParent();
15360b57cec5SDimitry Andric   return true;
15370b57cec5SDimitry Andric }
15380b57cec5SDimitry Andric 
1539*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::legalizeSinCos(
1540*8bcb0991SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1541*8bcb0991SDimitry Andric   MachineIRBuilder &B) const {
1542*8bcb0991SDimitry Andric   B.setInstr(MI);
1543*8bcb0991SDimitry Andric 
1544*8bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1545*8bcb0991SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
1546*8bcb0991SDimitry Andric   LLT Ty = MRI.getType(DstReg);
1547*8bcb0991SDimitry Andric   unsigned Flags = MI.getFlags();
1548*8bcb0991SDimitry Andric 
1549*8bcb0991SDimitry Andric   Register TrigVal;
1550*8bcb0991SDimitry Andric   auto OneOver2Pi = B.buildFConstant(Ty, 0.5 / M_PI);
1551*8bcb0991SDimitry Andric   if (ST.hasTrigReducedRange()) {
1552*8bcb0991SDimitry Andric     auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
1553*8bcb0991SDimitry Andric     TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
1554*8bcb0991SDimitry Andric       .addUse(MulVal.getReg(0))
1555*8bcb0991SDimitry Andric       .setMIFlags(Flags).getReg(0);
1556*8bcb0991SDimitry Andric   } else
1557*8bcb0991SDimitry Andric     TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
1558*8bcb0991SDimitry Andric 
1559*8bcb0991SDimitry Andric   Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
1560*8bcb0991SDimitry Andric     Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
1561*8bcb0991SDimitry Andric   B.buildIntrinsic(TrigIntrin, makeArrayRef<Register>(DstReg), false)
1562*8bcb0991SDimitry Andric     .addUse(TrigVal)
1563*8bcb0991SDimitry Andric     .setMIFlags(Flags);
1564*8bcb0991SDimitry Andric   MI.eraseFromParent();
1565*8bcb0991SDimitry Andric   return true;
1566*8bcb0991SDimitry Andric }
1567*8bcb0991SDimitry Andric 
1568*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(
1569*8bcb0991SDimitry Andric   Register DstReg, LLT PtrTy,
1570*8bcb0991SDimitry Andric   MachineIRBuilder &B, const GlobalValue *GV,
1571*8bcb0991SDimitry Andric   unsigned Offset, unsigned GAFlags) const {
1572*8bcb0991SDimitry Andric   // In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered
1573*8bcb0991SDimitry Andric   // to the following code sequence:
1574*8bcb0991SDimitry Andric   //
1575*8bcb0991SDimitry Andric   // For constant address space:
1576*8bcb0991SDimitry Andric   //   s_getpc_b64 s[0:1]
1577*8bcb0991SDimitry Andric   //   s_add_u32 s0, s0, $symbol
1578*8bcb0991SDimitry Andric   //   s_addc_u32 s1, s1, 0
1579*8bcb0991SDimitry Andric   //
1580*8bcb0991SDimitry Andric   //   s_getpc_b64 returns the address of the s_add_u32 instruction and then
1581*8bcb0991SDimitry Andric   //   a fixup or relocation is emitted to replace $symbol with a literal
1582*8bcb0991SDimitry Andric   //   constant, which is a pc-relative offset from the encoding of the $symbol
1583*8bcb0991SDimitry Andric   //   operand to the global variable.
1584*8bcb0991SDimitry Andric   //
1585*8bcb0991SDimitry Andric   // For global address space:
1586*8bcb0991SDimitry Andric   //   s_getpc_b64 s[0:1]
1587*8bcb0991SDimitry Andric   //   s_add_u32 s0, s0, $symbol@{gotpc}rel32@lo
1588*8bcb0991SDimitry Andric   //   s_addc_u32 s1, s1, $symbol@{gotpc}rel32@hi
1589*8bcb0991SDimitry Andric   //
1590*8bcb0991SDimitry Andric   //   s_getpc_b64 returns the address of the s_add_u32 instruction and then
1591*8bcb0991SDimitry Andric   //   fixups or relocations are emitted to replace $symbol@*@lo and
1592*8bcb0991SDimitry Andric   //   $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
1593*8bcb0991SDimitry Andric   //   which is a 64-bit pc-relative offset from the encoding of the $symbol
1594*8bcb0991SDimitry Andric   //   operand to the global variable.
1595*8bcb0991SDimitry Andric   //
1596*8bcb0991SDimitry Andric   // What we want here is an offset from the value returned by s_getpc
1597*8bcb0991SDimitry Andric   // (which is the address of the s_add_u32 instruction) to the global
1598*8bcb0991SDimitry Andric   // variable, but since the encoding of $symbol starts 4 bytes after the start
1599*8bcb0991SDimitry Andric   // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
1600*8bcb0991SDimitry Andric   // small. This requires us to add 4 to the global variable offset in order to
1601*8bcb0991SDimitry Andric   // compute the correct address.
1602*8bcb0991SDimitry Andric 
1603*8bcb0991SDimitry Andric   LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
1604*8bcb0991SDimitry Andric 
1605*8bcb0991SDimitry Andric   Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg :
1606*8bcb0991SDimitry Andric     B.getMRI()->createGenericVirtualRegister(ConstPtrTy);
1607*8bcb0991SDimitry Andric 
1608*8bcb0991SDimitry Andric   MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET)
1609*8bcb0991SDimitry Andric     .addDef(PCReg);
1610*8bcb0991SDimitry Andric 
1611*8bcb0991SDimitry Andric   MIB.addGlobalAddress(GV, Offset + 4, GAFlags);
1612*8bcb0991SDimitry Andric   if (GAFlags == SIInstrInfo::MO_NONE)
1613*8bcb0991SDimitry Andric     MIB.addImm(0);
1614*8bcb0991SDimitry Andric   else
1615*8bcb0991SDimitry Andric     MIB.addGlobalAddress(GV, Offset + 4, GAFlags + 1);
1616*8bcb0991SDimitry Andric 
1617*8bcb0991SDimitry Andric   B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
1618*8bcb0991SDimitry Andric 
1619*8bcb0991SDimitry Andric   if (PtrTy.getSizeInBits() == 32)
1620*8bcb0991SDimitry Andric     B.buildExtract(DstReg, PCReg, 0);
1621*8bcb0991SDimitry Andric   return true;
1622*8bcb0991SDimitry Andric  }
1623*8bcb0991SDimitry Andric 
1624*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::legalizeGlobalValue(
1625*8bcb0991SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1626*8bcb0991SDimitry Andric   MachineIRBuilder &B) const {
1627*8bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1628*8bcb0991SDimitry Andric   LLT Ty = MRI.getType(DstReg);
1629*8bcb0991SDimitry Andric   unsigned AS = Ty.getAddressSpace();
1630*8bcb0991SDimitry Andric 
1631*8bcb0991SDimitry Andric   const GlobalValue *GV = MI.getOperand(1).getGlobal();
1632*8bcb0991SDimitry Andric   MachineFunction &MF = B.getMF();
1633*8bcb0991SDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1634*8bcb0991SDimitry Andric   B.setInstr(MI);
1635*8bcb0991SDimitry Andric 
1636*8bcb0991SDimitry Andric   if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
1637*8bcb0991SDimitry Andric     if (!MFI->isEntryFunction()) {
1638*8bcb0991SDimitry Andric       const Function &Fn = MF.getFunction();
1639*8bcb0991SDimitry Andric       DiagnosticInfoUnsupported BadLDSDecl(
1640*8bcb0991SDimitry Andric         Fn, "local memory global used by non-kernel function", MI.getDebugLoc());
1641*8bcb0991SDimitry Andric       Fn.getContext().diagnose(BadLDSDecl);
1642*8bcb0991SDimitry Andric     }
1643*8bcb0991SDimitry Andric 
1644*8bcb0991SDimitry Andric     // TODO: We could emit code to handle the initialization somewhere.
1645*8bcb0991SDimitry Andric     if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) {
1646*8bcb0991SDimitry Andric       B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), *GV));
1647*8bcb0991SDimitry Andric       MI.eraseFromParent();
1648*8bcb0991SDimitry Andric       return true;
1649*8bcb0991SDimitry Andric     }
1650*8bcb0991SDimitry Andric 
1651*8bcb0991SDimitry Andric     const Function &Fn = MF.getFunction();
1652*8bcb0991SDimitry Andric     DiagnosticInfoUnsupported BadInit(
1653*8bcb0991SDimitry Andric       Fn, "unsupported initializer for address space", MI.getDebugLoc());
1654*8bcb0991SDimitry Andric     Fn.getContext().diagnose(BadInit);
1655*8bcb0991SDimitry Andric     return true;
1656*8bcb0991SDimitry Andric   }
1657*8bcb0991SDimitry Andric 
1658*8bcb0991SDimitry Andric   const SITargetLowering *TLI = ST.getTargetLowering();
1659*8bcb0991SDimitry Andric 
1660*8bcb0991SDimitry Andric   if (TLI->shouldEmitFixup(GV)) {
1661*8bcb0991SDimitry Andric     buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0);
1662*8bcb0991SDimitry Andric     MI.eraseFromParent();
1663*8bcb0991SDimitry Andric     return true;
1664*8bcb0991SDimitry Andric   }
1665*8bcb0991SDimitry Andric 
1666*8bcb0991SDimitry Andric   if (TLI->shouldEmitPCReloc(GV)) {
1667*8bcb0991SDimitry Andric     buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0, SIInstrInfo::MO_REL32);
1668*8bcb0991SDimitry Andric     MI.eraseFromParent();
1669*8bcb0991SDimitry Andric     return true;
1670*8bcb0991SDimitry Andric   }
1671*8bcb0991SDimitry Andric 
1672*8bcb0991SDimitry Andric   LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
1673*8bcb0991SDimitry Andric   Register GOTAddr = MRI.createGenericVirtualRegister(PtrTy);
1674*8bcb0991SDimitry Andric 
1675*8bcb0991SDimitry Andric   MachineMemOperand *GOTMMO = MF.getMachineMemOperand(
1676*8bcb0991SDimitry Andric     MachinePointerInfo::getGOT(MF),
1677*8bcb0991SDimitry Andric     MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1678*8bcb0991SDimitry Andric     MachineMemOperand::MOInvariant,
1679*8bcb0991SDimitry Andric     8 /*Size*/, 8 /*Align*/);
1680*8bcb0991SDimitry Andric 
1681*8bcb0991SDimitry Andric   buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
1682*8bcb0991SDimitry Andric 
1683*8bcb0991SDimitry Andric   if (Ty.getSizeInBits() == 32) {
1684*8bcb0991SDimitry Andric     // Truncate if this is a 32-bit constant adrdess.
1685*8bcb0991SDimitry Andric     auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
1686*8bcb0991SDimitry Andric     B.buildExtract(DstReg, Load, 0);
1687*8bcb0991SDimitry Andric   } else
1688*8bcb0991SDimitry Andric     B.buildLoad(DstReg, GOTAddr, *GOTMMO);
1689*8bcb0991SDimitry Andric 
1690*8bcb0991SDimitry Andric   MI.eraseFromParent();
1691*8bcb0991SDimitry Andric   return true;
1692*8bcb0991SDimitry Andric }
1693*8bcb0991SDimitry Andric 
1694*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::legalizeLoad(
1695*8bcb0991SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1696*8bcb0991SDimitry Andric   MachineIRBuilder &B, GISelChangeObserver &Observer) const {
1697*8bcb0991SDimitry Andric   B.setInstr(MI);
1698*8bcb0991SDimitry Andric   LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
1699*8bcb0991SDimitry Andric   auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg());
1700*8bcb0991SDimitry Andric   Observer.changingInstr(MI);
1701*8bcb0991SDimitry Andric   MI.getOperand(1).setReg(Cast.getReg(0));
1702*8bcb0991SDimitry Andric   Observer.changedInstr(MI);
1703*8bcb0991SDimitry Andric   return true;
1704*8bcb0991SDimitry Andric }
1705*8bcb0991SDimitry Andric 
1706*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::legalizeFMad(
1707*8bcb0991SDimitry Andric   MachineInstr &MI, MachineRegisterInfo &MRI,
1708*8bcb0991SDimitry Andric   MachineIRBuilder &B) const {
1709*8bcb0991SDimitry Andric   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1710*8bcb0991SDimitry Andric   assert(Ty.isScalar());
1711*8bcb0991SDimitry Andric 
1712*8bcb0991SDimitry Andric   // TODO: Always legal with future ftz flag.
1713*8bcb0991SDimitry Andric   if (Ty == LLT::scalar(32) && !ST.hasFP32Denormals())
1714*8bcb0991SDimitry Andric     return true;
1715*8bcb0991SDimitry Andric   if (Ty == LLT::scalar(16) && !ST.hasFP16Denormals())
1716*8bcb0991SDimitry Andric     return true;
1717*8bcb0991SDimitry Andric 
1718*8bcb0991SDimitry Andric   MachineFunction &MF = B.getMF();
1719*8bcb0991SDimitry Andric 
1720*8bcb0991SDimitry Andric   MachineIRBuilder HelperBuilder(MI);
1721*8bcb0991SDimitry Andric   GISelObserverWrapper DummyObserver;
1722*8bcb0991SDimitry Andric   LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
1723*8bcb0991SDimitry Andric   HelperBuilder.setMBB(*MI.getParent());
1724*8bcb0991SDimitry Andric   return Helper.lowerFMad(MI) == LegalizerHelper::Legalized;
1725*8bcb0991SDimitry Andric }
1726*8bcb0991SDimitry Andric 
17270b57cec5SDimitry Andric // Return the use branch instruction, otherwise null if the usage is invalid.
17280b57cec5SDimitry Andric static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
17290b57cec5SDimitry Andric                                        MachineRegisterInfo &MRI) {
17300b57cec5SDimitry Andric   Register CondDef = MI.getOperand(0).getReg();
17310b57cec5SDimitry Andric   if (!MRI.hasOneNonDBGUse(CondDef))
17320b57cec5SDimitry Andric     return nullptr;
17330b57cec5SDimitry Andric 
17340b57cec5SDimitry Andric   MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
17350b57cec5SDimitry Andric   return UseMI.getParent() == MI.getParent() &&
17360b57cec5SDimitry Andric     UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
17370b57cec5SDimitry Andric }
17380b57cec5SDimitry Andric 
17390b57cec5SDimitry Andric Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
17400b57cec5SDimitry Andric                                                 Register Reg, LLT Ty) const {
17410b57cec5SDimitry Andric   Register LiveIn = MRI.getLiveInVirtReg(Reg);
17420b57cec5SDimitry Andric   if (LiveIn)
17430b57cec5SDimitry Andric     return LiveIn;
17440b57cec5SDimitry Andric 
17450b57cec5SDimitry Andric   Register NewReg = MRI.createGenericVirtualRegister(Ty);
17460b57cec5SDimitry Andric   MRI.addLiveIn(Reg, NewReg);
17470b57cec5SDimitry Andric   return NewReg;
17480b57cec5SDimitry Andric }
17490b57cec5SDimitry Andric 
17500b57cec5SDimitry Andric bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
17510b57cec5SDimitry Andric                                          const ArgDescriptor *Arg) const {
1752*8bcb0991SDimitry Andric   if (!Arg->isRegister() || !Arg->getRegister().isValid())
17530b57cec5SDimitry Andric     return false; // TODO: Handle these
17540b57cec5SDimitry Andric 
17550b57cec5SDimitry Andric   assert(Arg->getRegister().isPhysical());
17560b57cec5SDimitry Andric 
17570b57cec5SDimitry Andric   MachineRegisterInfo &MRI = *B.getMRI();
17580b57cec5SDimitry Andric 
17590b57cec5SDimitry Andric   LLT Ty = MRI.getType(DstReg);
17600b57cec5SDimitry Andric   Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
17610b57cec5SDimitry Andric 
17620b57cec5SDimitry Andric   if (Arg->isMasked()) {
17630b57cec5SDimitry Andric     // TODO: Should we try to emit this once in the entry block?
17640b57cec5SDimitry Andric     const LLT S32 = LLT::scalar(32);
17650b57cec5SDimitry Andric     const unsigned Mask = Arg->getMask();
17660b57cec5SDimitry Andric     const unsigned Shift = countTrailingZeros<unsigned>(Mask);
17670b57cec5SDimitry Andric 
1768*8bcb0991SDimitry Andric     Register AndMaskSrc = LiveIn;
1769*8bcb0991SDimitry Andric 
1770*8bcb0991SDimitry Andric     if (Shift != 0) {
17710b57cec5SDimitry Andric       auto ShiftAmt = B.buildConstant(S32, Shift);
1772*8bcb0991SDimitry Andric       AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0);
1773*8bcb0991SDimitry Andric     }
1774*8bcb0991SDimitry Andric 
1775*8bcb0991SDimitry Andric     B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift));
17760b57cec5SDimitry Andric   } else
17770b57cec5SDimitry Andric     B.buildCopy(DstReg, LiveIn);
17780b57cec5SDimitry Andric 
17790b57cec5SDimitry Andric   // Insert the argument copy if it doens't already exist.
17800b57cec5SDimitry Andric   // FIXME: It seems EmitLiveInCopies isn't called anywhere?
17810b57cec5SDimitry Andric   if (!MRI.getVRegDef(LiveIn)) {
1782*8bcb0991SDimitry Andric     // FIXME: Should have scoped insert pt
1783*8bcb0991SDimitry Andric     MachineBasicBlock &OrigInsBB = B.getMBB();
1784*8bcb0991SDimitry Andric     auto OrigInsPt = B.getInsertPt();
1785*8bcb0991SDimitry Andric 
17860b57cec5SDimitry Andric     MachineBasicBlock &EntryMBB = B.getMF().front();
17870b57cec5SDimitry Andric     EntryMBB.addLiveIn(Arg->getRegister());
17880b57cec5SDimitry Andric     B.setInsertPt(EntryMBB, EntryMBB.begin());
17890b57cec5SDimitry Andric     B.buildCopy(LiveIn, Arg->getRegister());
1790*8bcb0991SDimitry Andric 
1791*8bcb0991SDimitry Andric     B.setInsertPt(OrigInsBB, OrigInsPt);
17920b57cec5SDimitry Andric   }
17930b57cec5SDimitry Andric 
17940b57cec5SDimitry Andric   return true;
17950b57cec5SDimitry Andric }
17960b57cec5SDimitry Andric 
17970b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
17980b57cec5SDimitry Andric   MachineInstr &MI,
17990b57cec5SDimitry Andric   MachineRegisterInfo &MRI,
18000b57cec5SDimitry Andric   MachineIRBuilder &B,
18010b57cec5SDimitry Andric   AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
18020b57cec5SDimitry Andric   B.setInstr(MI);
18030b57cec5SDimitry Andric 
18040b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
18050b57cec5SDimitry Andric 
18060b57cec5SDimitry Andric   const ArgDescriptor *Arg;
18070b57cec5SDimitry Andric   const TargetRegisterClass *RC;
18080b57cec5SDimitry Andric   std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
18090b57cec5SDimitry Andric   if (!Arg) {
18100b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Required arg register missing\n");
18110b57cec5SDimitry Andric     return false;
18120b57cec5SDimitry Andric   }
18130b57cec5SDimitry Andric 
18140b57cec5SDimitry Andric   if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
18150b57cec5SDimitry Andric     MI.eraseFromParent();
18160b57cec5SDimitry Andric     return true;
18170b57cec5SDimitry Andric   }
18180b57cec5SDimitry Andric 
18190b57cec5SDimitry Andric   return false;
18200b57cec5SDimitry Andric }
18210b57cec5SDimitry Andric 
1822*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
1823*8bcb0991SDimitry Andric                                        MachineRegisterInfo &MRI,
1824*8bcb0991SDimitry Andric                                        MachineIRBuilder &B) const {
1825*8bcb0991SDimitry Andric   B.setInstr(MI);
1826*8bcb0991SDimitry Andric 
1827*8bcb0991SDimitry Andric   if (legalizeFastUnsafeFDIV(MI, MRI, B))
1828*8bcb0991SDimitry Andric     return true;
1829*8bcb0991SDimitry Andric 
1830*8bcb0991SDimitry Andric   return false;
1831*8bcb0991SDimitry Andric }
1832*8bcb0991SDimitry Andric 
1833*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
1834*8bcb0991SDimitry Andric                                                  MachineRegisterInfo &MRI,
1835*8bcb0991SDimitry Andric                                                  MachineIRBuilder &B) const {
1836*8bcb0991SDimitry Andric   Register Res = MI.getOperand(0).getReg();
1837*8bcb0991SDimitry Andric   Register LHS = MI.getOperand(1).getReg();
1838*8bcb0991SDimitry Andric   Register RHS = MI.getOperand(2).getReg();
1839*8bcb0991SDimitry Andric 
1840*8bcb0991SDimitry Andric   uint16_t Flags = MI.getFlags();
1841*8bcb0991SDimitry Andric 
1842*8bcb0991SDimitry Andric   LLT ResTy = MRI.getType(Res);
1843*8bcb0991SDimitry Andric   LLT S32 = LLT::scalar(32);
1844*8bcb0991SDimitry Andric   LLT S64 = LLT::scalar(64);
1845*8bcb0991SDimitry Andric 
1846*8bcb0991SDimitry Andric   const MachineFunction &MF = B.getMF();
1847*8bcb0991SDimitry Andric   bool Unsafe =
1848*8bcb0991SDimitry Andric     MF.getTarget().Options.UnsafeFPMath || MI.getFlag(MachineInstr::FmArcp);
1849*8bcb0991SDimitry Andric 
1850*8bcb0991SDimitry Andric   if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64)
1851*8bcb0991SDimitry Andric     return false;
1852*8bcb0991SDimitry Andric 
1853*8bcb0991SDimitry Andric   if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals())
1854*8bcb0991SDimitry Andric     return false;
1855*8bcb0991SDimitry Andric 
1856*8bcb0991SDimitry Andric   if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
1857*8bcb0991SDimitry Andric     // 1 / x -> RCP(x)
1858*8bcb0991SDimitry Andric     if (CLHS->isExactlyValue(1.0)) {
1859*8bcb0991SDimitry Andric       B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
1860*8bcb0991SDimitry Andric         .addUse(RHS)
1861*8bcb0991SDimitry Andric         .setMIFlags(Flags);
1862*8bcb0991SDimitry Andric 
1863*8bcb0991SDimitry Andric       MI.eraseFromParent();
1864*8bcb0991SDimitry Andric       return true;
1865*8bcb0991SDimitry Andric     }
1866*8bcb0991SDimitry Andric 
1867*8bcb0991SDimitry Andric     // -1 / x -> RCP( FNEG(x) )
1868*8bcb0991SDimitry Andric     if (CLHS->isExactlyValue(-1.0)) {
1869*8bcb0991SDimitry Andric       auto FNeg = B.buildFNeg(ResTy, RHS, Flags);
1870*8bcb0991SDimitry Andric       B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
1871*8bcb0991SDimitry Andric         .addUse(FNeg.getReg(0))
1872*8bcb0991SDimitry Andric         .setMIFlags(Flags);
1873*8bcb0991SDimitry Andric 
1874*8bcb0991SDimitry Andric       MI.eraseFromParent();
1875*8bcb0991SDimitry Andric       return true;
1876*8bcb0991SDimitry Andric     }
1877*8bcb0991SDimitry Andric   }
1878*8bcb0991SDimitry Andric 
1879*8bcb0991SDimitry Andric   // x / y -> x * (1.0 / y)
1880*8bcb0991SDimitry Andric   if (Unsafe) {
1881*8bcb0991SDimitry Andric     auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
1882*8bcb0991SDimitry Andric       .addUse(RHS)
1883*8bcb0991SDimitry Andric       .setMIFlags(Flags);
1884*8bcb0991SDimitry Andric     B.buildFMul(Res, LHS, RCP, Flags);
1885*8bcb0991SDimitry Andric 
1886*8bcb0991SDimitry Andric     MI.eraseFromParent();
1887*8bcb0991SDimitry Andric     return true;
1888*8bcb0991SDimitry Andric   }
1889*8bcb0991SDimitry Andric 
1890*8bcb0991SDimitry Andric   return false;
1891*8bcb0991SDimitry Andric }
1892*8bcb0991SDimitry Andric 
1893*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
1894*8bcb0991SDimitry Andric                                                  MachineRegisterInfo &MRI,
1895*8bcb0991SDimitry Andric                                                  MachineIRBuilder &B) const {
1896*8bcb0991SDimitry Andric   B.setInstr(MI);
1897*8bcb0991SDimitry Andric   Register Res = MI.getOperand(0).getReg();
1898*8bcb0991SDimitry Andric   Register LHS = MI.getOperand(2).getReg();
1899*8bcb0991SDimitry Andric   Register RHS = MI.getOperand(3).getReg();
1900*8bcb0991SDimitry Andric   uint16_t Flags = MI.getFlags();
1901*8bcb0991SDimitry Andric 
1902*8bcb0991SDimitry Andric   LLT S32 = LLT::scalar(32);
1903*8bcb0991SDimitry Andric   LLT S1 = LLT::scalar(1);
1904*8bcb0991SDimitry Andric 
1905*8bcb0991SDimitry Andric   auto Abs = B.buildFAbs(S32, RHS, Flags);
1906*8bcb0991SDimitry Andric   const APFloat C0Val(1.0f);
1907*8bcb0991SDimitry Andric 
1908*8bcb0991SDimitry Andric   auto C0 = B.buildConstant(S32, 0x6f800000);
1909*8bcb0991SDimitry Andric   auto C1 = B.buildConstant(S32, 0x2f800000);
1910*8bcb0991SDimitry Andric   auto C2 = B.buildConstant(S32, FloatToBits(1.0f));
1911*8bcb0991SDimitry Andric 
1912*8bcb0991SDimitry Andric   auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags);
1913*8bcb0991SDimitry Andric   auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
1914*8bcb0991SDimitry Andric 
1915*8bcb0991SDimitry Andric   auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
1916*8bcb0991SDimitry Andric 
1917*8bcb0991SDimitry Andric   auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
1918*8bcb0991SDimitry Andric     .addUse(Mul0.getReg(0))
1919*8bcb0991SDimitry Andric     .setMIFlags(Flags);
1920*8bcb0991SDimitry Andric 
1921*8bcb0991SDimitry Andric   auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
1922*8bcb0991SDimitry Andric 
1923*8bcb0991SDimitry Andric   B.buildFMul(Res, Sel, Mul1, Flags);
1924*8bcb0991SDimitry Andric 
1925*8bcb0991SDimitry Andric   MI.eraseFromParent();
1926*8bcb0991SDimitry Andric   return true;
1927*8bcb0991SDimitry Andric }
1928*8bcb0991SDimitry Andric 
19290b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
19300b57cec5SDimitry Andric                                                  MachineRegisterInfo &MRI,
19310b57cec5SDimitry Andric                                                  MachineIRBuilder &B) const {
19320b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
19330b57cec5SDimitry Andric   if (!MFI->isEntryFunction()) {
19340b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
19350b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
19360b57cec5SDimitry Andric   }
19370b57cec5SDimitry Andric 
19380b57cec5SDimitry Andric   B.setInstr(MI);
19390b57cec5SDimitry Andric 
19400b57cec5SDimitry Andric   uint64_t Offset =
19410b57cec5SDimitry Andric     ST.getTargetLowering()->getImplicitParameterOffset(
19420b57cec5SDimitry Andric       B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
19430b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
19440b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
19450b57cec5SDimitry Andric   LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
19460b57cec5SDimitry Andric 
19470b57cec5SDimitry Andric   const ArgDescriptor *Arg;
19480b57cec5SDimitry Andric   const TargetRegisterClass *RC;
19490b57cec5SDimitry Andric   std::tie(Arg, RC)
19500b57cec5SDimitry Andric     = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
19510b57cec5SDimitry Andric   if (!Arg)
19520b57cec5SDimitry Andric     return false;
19530b57cec5SDimitry Andric 
19540b57cec5SDimitry Andric   Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
19550b57cec5SDimitry Andric   if (!loadInputValue(KernargPtrReg, B, Arg))
19560b57cec5SDimitry Andric     return false;
19570b57cec5SDimitry Andric 
19580b57cec5SDimitry Andric   B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
19590b57cec5SDimitry Andric   MI.eraseFromParent();
19600b57cec5SDimitry Andric   return true;
19610b57cec5SDimitry Andric }
19620b57cec5SDimitry Andric 
1963*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
1964*8bcb0991SDimitry Andric                                               MachineRegisterInfo &MRI,
1965*8bcb0991SDimitry Andric                                               MachineIRBuilder &B,
1966*8bcb0991SDimitry Andric                                               unsigned AddrSpace) const {
1967*8bcb0991SDimitry Andric   B.setInstr(MI);
1968*8bcb0991SDimitry Andric   Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B);
1969*8bcb0991SDimitry Andric   auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32);
1970*8bcb0991SDimitry Andric   B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg);
1971*8bcb0991SDimitry Andric   MI.eraseFromParent();
1972*8bcb0991SDimitry Andric   return true;
1973*8bcb0991SDimitry Andric }
1974*8bcb0991SDimitry Andric 
1975*8bcb0991SDimitry Andric /// Handle register layout difference for f16 images for some subtargets.
1976*8bcb0991SDimitry Andric Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
1977*8bcb0991SDimitry Andric                                              MachineRegisterInfo &MRI,
1978*8bcb0991SDimitry Andric                                              Register Reg) const {
1979*8bcb0991SDimitry Andric   if (!ST.hasUnpackedD16VMem())
1980*8bcb0991SDimitry Andric     return Reg;
1981*8bcb0991SDimitry Andric 
1982*8bcb0991SDimitry Andric   const LLT S16 = LLT::scalar(16);
1983*8bcb0991SDimitry Andric   const LLT S32 = LLT::scalar(32);
1984*8bcb0991SDimitry Andric   LLT StoreVT = MRI.getType(Reg);
1985*8bcb0991SDimitry Andric   assert(StoreVT.isVector() && StoreVT.getElementType() == S16);
1986*8bcb0991SDimitry Andric 
1987*8bcb0991SDimitry Andric   auto Unmerge = B.buildUnmerge(S16, Reg);
1988*8bcb0991SDimitry Andric 
1989*8bcb0991SDimitry Andric   SmallVector<Register, 4> WideRegs;
1990*8bcb0991SDimitry Andric   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
1991*8bcb0991SDimitry Andric     WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
1992*8bcb0991SDimitry Andric 
1993*8bcb0991SDimitry Andric   int NumElts = StoreVT.getNumElements();
1994*8bcb0991SDimitry Andric 
1995*8bcb0991SDimitry Andric   return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
1996*8bcb0991SDimitry Andric }
1997*8bcb0991SDimitry Andric 
1998*8bcb0991SDimitry Andric bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
1999*8bcb0991SDimitry Andric                                                  MachineRegisterInfo &MRI,
2000*8bcb0991SDimitry Andric                                                  MachineIRBuilder &B,
2001*8bcb0991SDimitry Andric                                                  bool IsFormat) const {
2002*8bcb0991SDimitry Andric   // TODO: Reject f16 format on targets where unsupported.
2003*8bcb0991SDimitry Andric   Register VData = MI.getOperand(1).getReg();
2004*8bcb0991SDimitry Andric   LLT Ty = MRI.getType(VData);
2005*8bcb0991SDimitry Andric 
2006*8bcb0991SDimitry Andric   B.setInstr(MI);
2007*8bcb0991SDimitry Andric 
2008*8bcb0991SDimitry Andric   const LLT S32 = LLT::scalar(32);
2009*8bcb0991SDimitry Andric   const LLT S16 = LLT::scalar(16);
2010*8bcb0991SDimitry Andric 
2011*8bcb0991SDimitry Andric   // Fixup illegal register types for i8 stores.
2012*8bcb0991SDimitry Andric   if (Ty == LLT::scalar(8) || Ty == S16) {
2013*8bcb0991SDimitry Andric     Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
2014*8bcb0991SDimitry Andric     MI.getOperand(1).setReg(AnyExt);
2015*8bcb0991SDimitry Andric     return true;
2016*8bcb0991SDimitry Andric   }
2017*8bcb0991SDimitry Andric 
2018*8bcb0991SDimitry Andric   if (Ty.isVector()) {
2019*8bcb0991SDimitry Andric     if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) {
2020*8bcb0991SDimitry Andric       if (IsFormat)
2021*8bcb0991SDimitry Andric         MI.getOperand(1).setReg(handleD16VData(B, MRI, VData));
2022*8bcb0991SDimitry Andric       return true;
2023*8bcb0991SDimitry Andric     }
2024*8bcb0991SDimitry Andric 
2025*8bcb0991SDimitry Andric     return Ty.getElementType() == S32 && Ty.getNumElements() <= 4;
2026*8bcb0991SDimitry Andric   }
2027*8bcb0991SDimitry Andric 
2028*8bcb0991SDimitry Andric   return Ty == S32;
2029*8bcb0991SDimitry Andric }
2030*8bcb0991SDimitry Andric 
20310b57cec5SDimitry Andric bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
20320b57cec5SDimitry Andric                                             MachineRegisterInfo &MRI,
20330b57cec5SDimitry Andric                                             MachineIRBuilder &B) const {
20340b57cec5SDimitry Andric   // Replace the use G_BRCOND with the exec manipulate and branch pseudos.
2035*8bcb0991SDimitry Andric   switch (MI.getIntrinsicID()) {
20360b57cec5SDimitry Andric   case Intrinsic::amdgcn_if: {
20370b57cec5SDimitry Andric     if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
20380b57cec5SDimitry Andric       const SIRegisterInfo *TRI
20390b57cec5SDimitry Andric         = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
20400b57cec5SDimitry Andric 
20410b57cec5SDimitry Andric       B.setInstr(*BrCond);
20420b57cec5SDimitry Andric       Register Def = MI.getOperand(1).getReg();
20430b57cec5SDimitry Andric       Register Use = MI.getOperand(3).getReg();
20440b57cec5SDimitry Andric       B.buildInstr(AMDGPU::SI_IF)
20450b57cec5SDimitry Andric         .addDef(Def)
20460b57cec5SDimitry Andric         .addUse(Use)
20470b57cec5SDimitry Andric         .addMBB(BrCond->getOperand(1).getMBB());
20480b57cec5SDimitry Andric 
20490b57cec5SDimitry Andric       MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
20500b57cec5SDimitry Andric       MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
20510b57cec5SDimitry Andric       MI.eraseFromParent();
20520b57cec5SDimitry Andric       BrCond->eraseFromParent();
20530b57cec5SDimitry Andric       return true;
20540b57cec5SDimitry Andric     }
20550b57cec5SDimitry Andric 
20560b57cec5SDimitry Andric     return false;
20570b57cec5SDimitry Andric   }
20580b57cec5SDimitry Andric   case Intrinsic::amdgcn_loop: {
20590b57cec5SDimitry Andric     if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
20600b57cec5SDimitry Andric       const SIRegisterInfo *TRI
20610b57cec5SDimitry Andric         = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
20620b57cec5SDimitry Andric 
20630b57cec5SDimitry Andric       B.setInstr(*BrCond);
20640b57cec5SDimitry Andric       Register Reg = MI.getOperand(2).getReg();
20650b57cec5SDimitry Andric       B.buildInstr(AMDGPU::SI_LOOP)
20660b57cec5SDimitry Andric         .addUse(Reg)
20670b57cec5SDimitry Andric         .addMBB(BrCond->getOperand(1).getMBB());
20680b57cec5SDimitry Andric       MI.eraseFromParent();
20690b57cec5SDimitry Andric       BrCond->eraseFromParent();
20700b57cec5SDimitry Andric       MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
20710b57cec5SDimitry Andric       return true;
20720b57cec5SDimitry Andric     }
20730b57cec5SDimitry Andric 
20740b57cec5SDimitry Andric     return false;
20750b57cec5SDimitry Andric   }
20760b57cec5SDimitry Andric   case Intrinsic::amdgcn_kernarg_segment_ptr:
20770b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(
20780b57cec5SDimitry Andric       MI, MRI, B, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
20790b57cec5SDimitry Andric   case Intrinsic::amdgcn_implicitarg_ptr:
20800b57cec5SDimitry Andric     return legalizeImplicitArgPtr(MI, MRI, B);
20810b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
20820b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
20830b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKITEM_ID_X);
20840b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
20850b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
20860b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
20870b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
20880b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
20890b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
20900b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_x:
20910b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
20920b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
20930b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_y:
20940b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
20950b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
20960b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_z:
20970b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
20980b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
20990b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_ptr:
21000b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
21010b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::DISPATCH_PTR);
21020b57cec5SDimitry Andric   case Intrinsic::amdgcn_queue_ptr:
21030b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
21040b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::QUEUE_PTR);
21050b57cec5SDimitry Andric   case Intrinsic::amdgcn_implicit_buffer_ptr:
21060b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(
21070b57cec5SDimitry Andric       MI, MRI, B, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
21080b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_id:
21090b57cec5SDimitry Andric     return legalizePreloadedArgIntrin(MI, MRI, B,
21100b57cec5SDimitry Andric                                       AMDGPUFunctionArgInfo::DISPATCH_ID);
2111*8bcb0991SDimitry Andric   case Intrinsic::amdgcn_fdiv_fast:
2112*8bcb0991SDimitry Andric     return legalizeFDIVFastIntrin(MI, MRI, B);
2113*8bcb0991SDimitry Andric   case Intrinsic::amdgcn_is_shared:
2114*8bcb0991SDimitry Andric     return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS);
2115*8bcb0991SDimitry Andric   case Intrinsic::amdgcn_is_private:
2116*8bcb0991SDimitry Andric     return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::PRIVATE_ADDRESS);
2117*8bcb0991SDimitry Andric   case Intrinsic::amdgcn_wavefrontsize: {
2118*8bcb0991SDimitry Andric     B.setInstr(MI);
2119*8bcb0991SDimitry Andric     B.buildConstant(MI.getOperand(0), ST.getWavefrontSize());
2120*8bcb0991SDimitry Andric     MI.eraseFromParent();
2121*8bcb0991SDimitry Andric     return true;
2122*8bcb0991SDimitry Andric   }
2123*8bcb0991SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_store:
2124*8bcb0991SDimitry Andric     return legalizeRawBufferStore(MI, MRI, B, false);
2125*8bcb0991SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_store_format:
2126*8bcb0991SDimitry Andric     return legalizeRawBufferStore(MI, MRI, B, true);
21270b57cec5SDimitry Andric   default:
21280b57cec5SDimitry Andric     return true;
21290b57cec5SDimitry Andric   }
21300b57cec5SDimitry Andric 
21310b57cec5SDimitry Andric   return true;
21320b57cec5SDimitry Andric }
2133