xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //===------- AMDCPU.cpp - Emit LLVM Code for builtins ---------------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // This contains code to emit Builtin calls as LLVM code.
10*700637cbSDimitry Andric //
11*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
12*700637cbSDimitry Andric 
13*700637cbSDimitry Andric #include "CGBuiltin.h"
14*700637cbSDimitry Andric #include "clang/Basic/TargetBuiltins.h"
15*700637cbSDimitry Andric #include "llvm/Analysis/ValueTracking.h"
16*700637cbSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
17*700637cbSDimitry Andric #include "llvm/IR/IntrinsicsR600.h"
18*700637cbSDimitry Andric #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
19*700637cbSDimitry Andric #include "llvm/Support/AMDGPUAddrSpace.h"
20*700637cbSDimitry Andric 
21*700637cbSDimitry Andric using namespace clang;
22*700637cbSDimitry Andric using namespace CodeGen;
23*700637cbSDimitry Andric using namespace llvm;
24*700637cbSDimitry Andric 
25*700637cbSDimitry Andric namespace {
26*700637cbSDimitry Andric 
27*700637cbSDimitry Andric // Has second type mangled argument.
28*700637cbSDimitry Andric static Value *
emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction & CGF,const CallExpr * E,Intrinsic::ID IntrinsicID,Intrinsic::ID ConstrainedIntrinsicID)29*700637cbSDimitry Andric emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E,
30*700637cbSDimitry Andric                                        Intrinsic::ID IntrinsicID,
31*700637cbSDimitry Andric                                        Intrinsic::ID ConstrainedIntrinsicID) {
32*700637cbSDimitry Andric   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
33*700637cbSDimitry Andric   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
34*700637cbSDimitry Andric 
35*700637cbSDimitry Andric   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
36*700637cbSDimitry Andric   if (CGF.Builder.getIsFPConstrained()) {
37*700637cbSDimitry Andric     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
38*700637cbSDimitry Andric                                        {Src0->getType(), Src1->getType()});
39*700637cbSDimitry Andric     return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
40*700637cbSDimitry Andric   }
41*700637cbSDimitry Andric 
42*700637cbSDimitry Andric   Function *F =
43*700637cbSDimitry Andric       CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
44*700637cbSDimitry Andric   return CGF.Builder.CreateCall(F, {Src0, Src1});
45*700637cbSDimitry Andric }
46*700637cbSDimitry Andric 
47*700637cbSDimitry Andric // If \p E is not null pointer, insert address space cast to match return
48*700637cbSDimitry Andric // type of \p E if necessary.
EmitAMDGPUDispatchPtr(CodeGenFunction & CGF,const CallExpr * E=nullptr)49*700637cbSDimitry Andric Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
50*700637cbSDimitry Andric                              const CallExpr *E = nullptr) {
51*700637cbSDimitry Andric   auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
52*700637cbSDimitry Andric   auto *Call = CGF.Builder.CreateCall(F);
53*700637cbSDimitry Andric   Call->addRetAttr(
54*700637cbSDimitry Andric       Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
55*700637cbSDimitry Andric   Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
56*700637cbSDimitry Andric   if (!E)
57*700637cbSDimitry Andric     return Call;
58*700637cbSDimitry Andric   QualType BuiltinRetType = E->getType();
59*700637cbSDimitry Andric   auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
60*700637cbSDimitry Andric   if (RetTy == Call->getType())
61*700637cbSDimitry Andric     return Call;
62*700637cbSDimitry Andric   return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
63*700637cbSDimitry Andric }
64*700637cbSDimitry Andric 
EmitAMDGPUImplicitArgPtr(CodeGenFunction & CGF)65*700637cbSDimitry Andric Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
66*700637cbSDimitry Andric   auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
67*700637cbSDimitry Andric   auto *Call = CGF.Builder.CreateCall(F);
68*700637cbSDimitry Andric   Call->addRetAttr(
69*700637cbSDimitry Andric       Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
70*700637cbSDimitry Andric   Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
71*700637cbSDimitry Andric   return Call;
72*700637cbSDimitry Andric }
73*700637cbSDimitry Andric 
74*700637cbSDimitry Andric // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
75*700637cbSDimitry Andric /// Emit code based on Code Object ABI version.
76*700637cbSDimitry Andric /// COV_4    : Emit code to use dispatch ptr
77*700637cbSDimitry Andric /// COV_5+   : Emit code to use implicitarg ptr
78*700637cbSDimitry Andric /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
79*700637cbSDimitry Andric ///            and use its value for COV_4 or COV_5+ approach. It is used for
80*700637cbSDimitry Andric ///            compiling device libraries in an ABI-agnostic way.
EmitAMDGPUWorkGroupSize(CodeGenFunction & CGF,unsigned Index)81*700637cbSDimitry Andric Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
82*700637cbSDimitry Andric   llvm::LoadInst *LD;
83*700637cbSDimitry Andric 
84*700637cbSDimitry Andric   auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
85*700637cbSDimitry Andric 
86*700637cbSDimitry Andric   if (Cov == CodeObjectVersionKind::COV_None) {
87*700637cbSDimitry Andric     StringRef Name = "__oclc_ABI_version";
88*700637cbSDimitry Andric     auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
89*700637cbSDimitry Andric     if (!ABIVersionC)
90*700637cbSDimitry Andric       ABIVersionC = new llvm::GlobalVariable(
91*700637cbSDimitry Andric           CGF.CGM.getModule(), CGF.Int32Ty, false,
92*700637cbSDimitry Andric           llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
93*700637cbSDimitry Andric           llvm::GlobalVariable::NotThreadLocal,
94*700637cbSDimitry Andric           CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
95*700637cbSDimitry Andric 
96*700637cbSDimitry Andric     // This load will be eliminated by the IPSCCP because it is constant
97*700637cbSDimitry Andric     // weak_odr without externally_initialized. Either changing it to weak or
98*700637cbSDimitry Andric     // adding externally_initialized will keep the load.
99*700637cbSDimitry Andric     Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
100*700637cbSDimitry Andric                                                       CGF.CGM.getIntAlign());
101*700637cbSDimitry Andric 
102*700637cbSDimitry Andric     Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
103*700637cbSDimitry Andric         ABIVersion,
104*700637cbSDimitry Andric         llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
105*700637cbSDimitry Andric 
106*700637cbSDimitry Andric     // Indexing the implicit kernarg segment.
107*700637cbSDimitry Andric     Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
108*700637cbSDimitry Andric         CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
109*700637cbSDimitry Andric 
110*700637cbSDimitry Andric     // Indexing the HSA kernel_dispatch_packet struct.
111*700637cbSDimitry Andric     Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
112*700637cbSDimitry Andric         CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
113*700637cbSDimitry Andric 
114*700637cbSDimitry Andric     auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
115*700637cbSDimitry Andric     LD = CGF.Builder.CreateLoad(
116*700637cbSDimitry Andric         Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
117*700637cbSDimitry Andric   } else {
118*700637cbSDimitry Andric     Value *GEP = nullptr;
119*700637cbSDimitry Andric     if (Cov >= CodeObjectVersionKind::COV_5) {
120*700637cbSDimitry Andric       // Indexing the implicit kernarg segment.
121*700637cbSDimitry Andric       GEP = CGF.Builder.CreateConstGEP1_32(
122*700637cbSDimitry Andric           CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
123*700637cbSDimitry Andric     } else {
124*700637cbSDimitry Andric       // Indexing the HSA kernel_dispatch_packet struct.
125*700637cbSDimitry Andric       GEP = CGF.Builder.CreateConstGEP1_32(
126*700637cbSDimitry Andric           CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
127*700637cbSDimitry Andric     }
128*700637cbSDimitry Andric     LD = CGF.Builder.CreateLoad(
129*700637cbSDimitry Andric         Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
130*700637cbSDimitry Andric   }
131*700637cbSDimitry Andric 
132*700637cbSDimitry Andric   llvm::MDBuilder MDHelper(CGF.getLLVMContext());
133*700637cbSDimitry Andric   llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
134*700637cbSDimitry Andric       APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
135*700637cbSDimitry Andric   LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
136*700637cbSDimitry Andric   LD->setMetadata(llvm::LLVMContext::MD_noundef,
137*700637cbSDimitry Andric                   llvm::MDNode::get(CGF.getLLVMContext(), {}));
138*700637cbSDimitry Andric   LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
139*700637cbSDimitry Andric                   llvm::MDNode::get(CGF.getLLVMContext(), {}));
140*700637cbSDimitry Andric   return LD;
141*700637cbSDimitry Andric }
142*700637cbSDimitry Andric 
143*700637cbSDimitry Andric // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
EmitAMDGPUGridSize(CodeGenFunction & CGF,unsigned Index)144*700637cbSDimitry Andric Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
145*700637cbSDimitry Andric   const unsigned XOffset = 12;
146*700637cbSDimitry Andric   auto *DP = EmitAMDGPUDispatchPtr(CGF);
147*700637cbSDimitry Andric   // Indexing the HSA kernel_dispatch_packet struct.
148*700637cbSDimitry Andric   auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
149*700637cbSDimitry Andric   auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
150*700637cbSDimitry Andric   auto *LD = CGF.Builder.CreateLoad(
151*700637cbSDimitry Andric       Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
152*700637cbSDimitry Andric 
153*700637cbSDimitry Andric   llvm::MDBuilder MDB(CGF.getLLVMContext());
154*700637cbSDimitry Andric 
155*700637cbSDimitry Andric   // Known non-zero.
156*700637cbSDimitry Andric   LD->setMetadata(llvm::LLVMContext::MD_range,
157*700637cbSDimitry Andric                   MDB.createRange(APInt(32, 1), APInt::getZero(32)));
158*700637cbSDimitry Andric   LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
159*700637cbSDimitry Andric                   llvm::MDNode::get(CGF.getLLVMContext(), {}));
160*700637cbSDimitry Andric   return LD;
161*700637cbSDimitry Andric }
162*700637cbSDimitry Andric } // namespace
163*700637cbSDimitry Andric 
164*700637cbSDimitry Andric // Generates the IR for __builtin_read_exec_*.
165*700637cbSDimitry Andric // Lowers the builtin to amdgcn_ballot intrinsic.
EmitAMDGCNBallotForExec(CodeGenFunction & CGF,const CallExpr * E,llvm::Type * RegisterType,llvm::Type * ValueType,bool isExecHi)166*700637cbSDimitry Andric static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
167*700637cbSDimitry Andric                                       llvm::Type *RegisterType,
168*700637cbSDimitry Andric                                       llvm::Type *ValueType, bool isExecHi) {
169*700637cbSDimitry Andric   CodeGen::CGBuilderTy &Builder = CGF.Builder;
170*700637cbSDimitry Andric   CodeGen::CodeGenModule &CGM = CGF.CGM;
171*700637cbSDimitry Andric 
172*700637cbSDimitry Andric   Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
173*700637cbSDimitry Andric   llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
174*700637cbSDimitry Andric 
175*700637cbSDimitry Andric   if (isExecHi) {
176*700637cbSDimitry Andric     Value *Rt2 = Builder.CreateLShr(Call, 32);
177*700637cbSDimitry Andric     Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
178*700637cbSDimitry Andric     return Rt2;
179*700637cbSDimitry Andric   }
180*700637cbSDimitry Andric 
181*700637cbSDimitry Andric   return Call;
182*700637cbSDimitry Andric }
183*700637cbSDimitry Andric 
184*700637cbSDimitry Andric // Emit an intrinsic that has 1 float or double operand, and 1 integer.
emitFPIntBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)185*700637cbSDimitry Andric static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
186*700637cbSDimitry Andric                                const CallExpr *E,
187*700637cbSDimitry Andric                                unsigned IntrinsicID) {
188*700637cbSDimitry Andric   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
189*700637cbSDimitry Andric   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
190*700637cbSDimitry Andric 
191*700637cbSDimitry Andric   Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
192*700637cbSDimitry Andric   return CGF.Builder.CreateCall(F, {Src0, Src1});
193*700637cbSDimitry Andric }
194*700637cbSDimitry Andric 
195*700637cbSDimitry Andric // For processing memory ordering and memory scope arguments of various
196*700637cbSDimitry Andric // amdgcn builtins.
197*700637cbSDimitry Andric // \p Order takes a C++11 comptabile memory-ordering specifier and converts
198*700637cbSDimitry Andric // it into LLVM's memory ordering specifier using atomic C ABI, and writes
199*700637cbSDimitry Andric // to \p AO. \p Scope takes a const char * and converts it into AMDGCN
200*700637cbSDimitry Andric // specific SyncScopeID and writes it to \p SSID.
ProcessOrderScopeAMDGCN(Value * Order,Value * Scope,llvm::AtomicOrdering & AO,llvm::SyncScope::ID & SSID)201*700637cbSDimitry Andric void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
202*700637cbSDimitry Andric                                               llvm::AtomicOrdering &AO,
203*700637cbSDimitry Andric                                               llvm::SyncScope::ID &SSID) {
204*700637cbSDimitry Andric   int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
205*700637cbSDimitry Andric 
206*700637cbSDimitry Andric   // Map C11/C++11 memory ordering to LLVM memory ordering
207*700637cbSDimitry Andric   assert(llvm::isValidAtomicOrderingCABI(ord));
208*700637cbSDimitry Andric   switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
209*700637cbSDimitry Andric   case llvm::AtomicOrderingCABI::acquire:
210*700637cbSDimitry Andric   case llvm::AtomicOrderingCABI::consume:
211*700637cbSDimitry Andric     AO = llvm::AtomicOrdering::Acquire;
212*700637cbSDimitry Andric     break;
213*700637cbSDimitry Andric   case llvm::AtomicOrderingCABI::release:
214*700637cbSDimitry Andric     AO = llvm::AtomicOrdering::Release;
215*700637cbSDimitry Andric     break;
216*700637cbSDimitry Andric   case llvm::AtomicOrderingCABI::acq_rel:
217*700637cbSDimitry Andric     AO = llvm::AtomicOrdering::AcquireRelease;
218*700637cbSDimitry Andric     break;
219*700637cbSDimitry Andric   case llvm::AtomicOrderingCABI::seq_cst:
220*700637cbSDimitry Andric     AO = llvm::AtomicOrdering::SequentiallyConsistent;
221*700637cbSDimitry Andric     break;
222*700637cbSDimitry Andric   case llvm::AtomicOrderingCABI::relaxed:
223*700637cbSDimitry Andric     AO = llvm::AtomicOrdering::Monotonic;
224*700637cbSDimitry Andric     break;
225*700637cbSDimitry Andric   }
226*700637cbSDimitry Andric 
227*700637cbSDimitry Andric   // Some of the atomic builtins take the scope as a string name.
228*700637cbSDimitry Andric   StringRef scp;
229*700637cbSDimitry Andric   if (llvm::getConstantStringInfo(Scope, scp)) {
230*700637cbSDimitry Andric     SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
231*700637cbSDimitry Andric     return;
232*700637cbSDimitry Andric   }
233*700637cbSDimitry Andric 
234*700637cbSDimitry Andric   // Older builtins had an enum argument for the memory scope.
235*700637cbSDimitry Andric   int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
236*700637cbSDimitry Andric   switch (scope) {
237*700637cbSDimitry Andric   case 0: // __MEMORY_SCOPE_SYSTEM
238*700637cbSDimitry Andric     SSID = llvm::SyncScope::System;
239*700637cbSDimitry Andric     break;
240*700637cbSDimitry Andric   case 1: // __MEMORY_SCOPE_DEVICE
241*700637cbSDimitry Andric     SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
242*700637cbSDimitry Andric     break;
243*700637cbSDimitry Andric   case 2: // __MEMORY_SCOPE_WRKGRP
244*700637cbSDimitry Andric     SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
245*700637cbSDimitry Andric     break;
246*700637cbSDimitry Andric   case 3: // __MEMORY_SCOPE_WVFRNT
247*700637cbSDimitry Andric     SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
248*700637cbSDimitry Andric     break;
249*700637cbSDimitry Andric   case 4: // __MEMORY_SCOPE_SINGLE
250*700637cbSDimitry Andric     SSID = llvm::SyncScope::SingleThread;
251*700637cbSDimitry Andric     break;
252*700637cbSDimitry Andric   default:
253*700637cbSDimitry Andric     SSID = llvm::SyncScope::System;
254*700637cbSDimitry Andric     break;
255*700637cbSDimitry Andric   }
256*700637cbSDimitry Andric }
257*700637cbSDimitry Andric 
EmitScalarOrConstFoldImmArg(unsigned ICEArguments,unsigned Idx,const CallExpr * E)258*700637cbSDimitry Andric llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
259*700637cbSDimitry Andric                                                           unsigned Idx,
260*700637cbSDimitry Andric                                                           const CallExpr *E) {
261*700637cbSDimitry Andric   llvm::Value *Arg = nullptr;
262*700637cbSDimitry Andric   if ((ICEArguments & (1 << Idx)) == 0) {
263*700637cbSDimitry Andric     Arg = EmitScalarExpr(E->getArg(Idx));
264*700637cbSDimitry Andric   } else {
265*700637cbSDimitry Andric     // If this is required to be a constant, constant fold it so that we
266*700637cbSDimitry Andric     // know that the generated intrinsic gets a ConstantInt.
267*700637cbSDimitry Andric     std::optional<llvm::APSInt> Result =
268*700637cbSDimitry Andric         E->getArg(Idx)->getIntegerConstantExpr(getContext());
269*700637cbSDimitry Andric     assert(Result && "Expected argument to be a constant");
270*700637cbSDimitry Andric     Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
271*700637cbSDimitry Andric   }
272*700637cbSDimitry Andric   return Arg;
273*700637cbSDimitry Andric }
274*700637cbSDimitry Andric 
AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction * Inst,const CallExpr * E)275*700637cbSDimitry Andric void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
276*700637cbSDimitry Andric                                                      const CallExpr *E) {
277*700637cbSDimitry Andric   constexpr const char *Tag = "amdgpu-as";
278*700637cbSDimitry Andric 
279*700637cbSDimitry Andric   LLVMContext &Ctx = Inst->getContext();
280*700637cbSDimitry Andric   SmallVector<MMRAMetadata::TagT, 3> MMRAs;
281*700637cbSDimitry Andric   for (unsigned K = 2; K < E->getNumArgs(); ++K) {
282*700637cbSDimitry Andric     llvm::Value *V = EmitScalarExpr(E->getArg(K));
283*700637cbSDimitry Andric     StringRef AS;
284*700637cbSDimitry Andric     if (llvm::getConstantStringInfo(V, AS)) {
285*700637cbSDimitry Andric       MMRAs.push_back({Tag, AS});
286*700637cbSDimitry Andric       // TODO: Delete the resulting unused constant?
287*700637cbSDimitry Andric       continue;
288*700637cbSDimitry Andric     }
289*700637cbSDimitry Andric     CGM.Error(E->getExprLoc(),
290*700637cbSDimitry Andric               "expected an address space name as a string literal");
291*700637cbSDimitry Andric   }
292*700637cbSDimitry Andric 
293*700637cbSDimitry Andric   llvm::sort(MMRAs);
294*700637cbSDimitry Andric   MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
295*700637cbSDimitry Andric   Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
296*700637cbSDimitry Andric }
297*700637cbSDimitry Andric 
EmitAMDGPUBuiltinExpr(unsigned BuiltinID,const CallExpr * E)298*700637cbSDimitry Andric Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
299*700637cbSDimitry Andric                                               const CallExpr *E) {
300*700637cbSDimitry Andric   llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
301*700637cbSDimitry Andric   llvm::SyncScope::ID SSID;
302*700637cbSDimitry Andric   switch (BuiltinID) {
303*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_div_scale:
304*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
305*700637cbSDimitry Andric     // Translate from the intrinsics's struct return to the builtin's out
306*700637cbSDimitry Andric     // argument.
307*700637cbSDimitry Andric 
308*700637cbSDimitry Andric     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
309*700637cbSDimitry Andric 
310*700637cbSDimitry Andric     llvm::Value *X = EmitScalarExpr(E->getArg(0));
311*700637cbSDimitry Andric     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
312*700637cbSDimitry Andric     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
313*700637cbSDimitry Andric 
314*700637cbSDimitry Andric     llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
315*700637cbSDimitry Andric                                            X->getType());
316*700637cbSDimitry Andric 
317*700637cbSDimitry Andric     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
318*700637cbSDimitry Andric 
319*700637cbSDimitry Andric     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
320*700637cbSDimitry Andric     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
321*700637cbSDimitry Andric 
322*700637cbSDimitry Andric     llvm::Type *RealFlagType = FlagOutPtr.getElementType();
323*700637cbSDimitry Andric 
324*700637cbSDimitry Andric     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
325*700637cbSDimitry Andric     Builder.CreateStore(FlagExt, FlagOutPtr);
326*700637cbSDimitry Andric     return Result;
327*700637cbSDimitry Andric   }
328*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_div_fmas:
329*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
330*700637cbSDimitry Andric     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
331*700637cbSDimitry Andric     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
332*700637cbSDimitry Andric     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
333*700637cbSDimitry Andric     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
334*700637cbSDimitry Andric 
335*700637cbSDimitry Andric     llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
336*700637cbSDimitry Andric                                       Src0->getType());
337*700637cbSDimitry Andric     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
338*700637cbSDimitry Andric     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
339*700637cbSDimitry Andric   }
340*700637cbSDimitry Andric 
341*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
342*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<2>(*this, E,
343*700637cbSDimitry Andric                                                Intrinsic::amdgcn_ds_swizzle);
344*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
345*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_mov_dpp:
346*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
347*700637cbSDimitry Andric     llvm::SmallVector<llvm::Value *, 6> Args;
348*700637cbSDimitry Andric     // Find out if any arguments are required to be integer constant
349*700637cbSDimitry Andric     // expressions.
350*700637cbSDimitry Andric     unsigned ICEArguments = 0;
351*700637cbSDimitry Andric     ASTContext::GetBuiltinTypeError Error;
352*700637cbSDimitry Andric     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
353*700637cbSDimitry Andric     assert(Error == ASTContext::GE_None && "Should not codegen an error");
354*700637cbSDimitry Andric     llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
355*700637cbSDimitry Andric     unsigned Size = DataTy->getPrimitiveSizeInBits();
356*700637cbSDimitry Andric     llvm::Type *IntTy =
357*700637cbSDimitry Andric         llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
358*700637cbSDimitry Andric     Function *F =
359*700637cbSDimitry Andric         CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
360*700637cbSDimitry Andric                              ? Intrinsic::amdgcn_mov_dpp8
361*700637cbSDimitry Andric                              : Intrinsic::amdgcn_update_dpp,
362*700637cbSDimitry Andric                          IntTy);
363*700637cbSDimitry Andric     assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
364*700637cbSDimitry Andric            E->getNumArgs() == 2);
365*700637cbSDimitry Andric     bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
366*700637cbSDimitry Andric     if (InsertOld)
367*700637cbSDimitry Andric       Args.push_back(llvm::PoisonValue::get(IntTy));
368*700637cbSDimitry Andric     for (unsigned I = 0; I != E->getNumArgs(); ++I) {
369*700637cbSDimitry Andric       llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
370*700637cbSDimitry Andric       if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
371*700637cbSDimitry Andric           Size < 32) {
372*700637cbSDimitry Andric         if (!DataTy->isIntegerTy())
373*700637cbSDimitry Andric           V = Builder.CreateBitCast(
374*700637cbSDimitry Andric               V, llvm::IntegerType::get(Builder.getContext(), Size));
375*700637cbSDimitry Andric         V = Builder.CreateZExtOrBitCast(V, IntTy);
376*700637cbSDimitry Andric       }
377*700637cbSDimitry Andric       llvm::Type *ExpTy =
378*700637cbSDimitry Andric           F->getFunctionType()->getFunctionParamType(I + InsertOld);
379*700637cbSDimitry Andric       Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
380*700637cbSDimitry Andric     }
381*700637cbSDimitry Andric     Value *V = Builder.CreateCall(F, Args);
382*700637cbSDimitry Andric     if (Size < 32 && !DataTy->isIntegerTy())
383*700637cbSDimitry Andric       V = Builder.CreateTrunc(
384*700637cbSDimitry Andric           V, llvm::IntegerType::get(Builder.getContext(), Size));
385*700637cbSDimitry Andric     return Builder.CreateTruncOrBitCast(V, DataTy);
386*700637cbSDimitry Andric   }
387*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_permlane16:
388*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_permlanex16:
389*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<6>(
390*700637cbSDimitry Andric         *this, E,
391*700637cbSDimitry Andric         BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
392*700637cbSDimitry Andric             ? Intrinsic::amdgcn_permlane16
393*700637cbSDimitry Andric             : Intrinsic::amdgcn_permlanex16);
394*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_permlane64:
395*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
396*700637cbSDimitry Andric                                                Intrinsic::amdgcn_permlane64);
397*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_readlane:
398*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<2>(*this, E,
399*700637cbSDimitry Andric                                                Intrinsic::amdgcn_readlane);
400*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_readfirstlane:
401*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
402*700637cbSDimitry Andric                                                Intrinsic::amdgcn_readfirstlane);
403*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_div_fixup:
404*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
405*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
406*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<3>(*this, E,
407*700637cbSDimitry Andric                                                Intrinsic::amdgcn_div_fixup);
408*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_trig_preop:
409*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
410*700637cbSDimitry Andric     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
411*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_rcp:
412*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_rcpf:
413*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_rcph:
414*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
415*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_sqrt:
416*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_sqrtf:
417*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_sqrth:
418*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
419*700637cbSDimitry Andric                                                Intrinsic::amdgcn_sqrt);
420*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_rsq:
421*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_rsqf:
422*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_rsqh:
423*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
424*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
425*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
426*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
427*700637cbSDimitry Andric                                                Intrinsic::amdgcn_rsq_clamp);
428*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_sinf:
429*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_sinh:
430*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
431*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_cosf:
432*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_cosh:
433*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
434*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
435*700637cbSDimitry Andric     return EmitAMDGPUDispatchPtr(*this, E);
436*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_logf:
437*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
438*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_exp2f:
439*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
440*700637cbSDimitry Andric                                                Intrinsic::amdgcn_exp2);
441*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_log_clampf:
442*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
443*700637cbSDimitry Andric                                                Intrinsic::amdgcn_log_clamp);
444*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ldexp:
445*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ldexpf: {
446*700637cbSDimitry Andric     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
447*700637cbSDimitry Andric     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
448*700637cbSDimitry Andric     llvm::Function *F =
449*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
450*700637cbSDimitry Andric     return Builder.CreateCall(F, {Src0, Src1});
451*700637cbSDimitry Andric   }
452*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ldexph: {
453*700637cbSDimitry Andric     // The raw instruction has a different behavior for out of bounds exponent
454*700637cbSDimitry Andric     // values (implicit truncation instead of saturate to short_min/short_max).
455*700637cbSDimitry Andric     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
456*700637cbSDimitry Andric     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
457*700637cbSDimitry Andric     llvm::Function *F =
458*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
459*700637cbSDimitry Andric     return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
460*700637cbSDimitry Andric   }
461*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
462*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
463*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
464*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
465*700637cbSDimitry Andric                                                Intrinsic::amdgcn_frexp_mant);
466*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
467*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
468*700637cbSDimitry Andric     Value *Src0 = EmitScalarExpr(E->getArg(0));
469*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
470*700637cbSDimitry Andric                                 { Builder.getInt32Ty(), Src0->getType() });
471*700637cbSDimitry Andric     return Builder.CreateCall(F, Src0);
472*700637cbSDimitry Andric   }
473*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
474*700637cbSDimitry Andric     Value *Src0 = EmitScalarExpr(E->getArg(0));
475*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
476*700637cbSDimitry Andric                                 { Builder.getInt16Ty(), Src0->getType() });
477*700637cbSDimitry Andric     return Builder.CreateCall(F, Src0);
478*700637cbSDimitry Andric   }
479*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_fract:
480*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_fractf:
481*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_fracth:
482*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
483*700637cbSDimitry Andric                                                Intrinsic::amdgcn_fract);
484*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_lerp:
485*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<3>(*this, E,
486*700637cbSDimitry Andric                                                Intrinsic::amdgcn_lerp);
487*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ubfe:
488*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<3>(*this, E,
489*700637cbSDimitry Andric                                                Intrinsic::amdgcn_ubfe);
490*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_sbfe:
491*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<3>(*this, E,
492*700637cbSDimitry Andric                                                Intrinsic::amdgcn_sbfe);
493*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ballot_w32:
494*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
495*700637cbSDimitry Andric     llvm::Type *ResultType = ConvertType(E->getType());
496*700637cbSDimitry Andric     llvm::Value *Src = EmitScalarExpr(E->getArg(0));
497*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
498*700637cbSDimitry Andric     return Builder.CreateCall(F, { Src });
499*700637cbSDimitry Andric   }
500*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
501*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
502*700637cbSDimitry Andric                                                Intrinsic::amdgcn_tanh);
503*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_uicmp:
504*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_uicmpl:
505*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_sicmp:
506*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_sicmpl: {
507*700637cbSDimitry Andric     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
508*700637cbSDimitry Andric     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
509*700637cbSDimitry Andric     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
510*700637cbSDimitry Andric 
511*700637cbSDimitry Andric     // FIXME-GFX10: How should 32 bit mask be handled?
512*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
513*700637cbSDimitry Andric       { Builder.getInt64Ty(), Src0->getType() });
514*700637cbSDimitry Andric     return Builder.CreateCall(F, { Src0, Src1, Src2 });
515*700637cbSDimitry Andric   }
516*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_fcmp:
517*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_fcmpf: {
518*700637cbSDimitry Andric     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
519*700637cbSDimitry Andric     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
520*700637cbSDimitry Andric     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
521*700637cbSDimitry Andric 
522*700637cbSDimitry Andric     // FIXME-GFX10: How should 32 bit mask be handled?
523*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
524*700637cbSDimitry Andric       { Builder.getInt64Ty(), Src0->getType() });
525*700637cbSDimitry Andric     return Builder.CreateCall(F, { Src0, Src1, Src2 });
526*700637cbSDimitry Andric   }
527*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_class:
528*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_classf:
529*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_classh:
530*700637cbSDimitry Andric     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
531*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_fmed3f:
532*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_fmed3h:
533*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<3>(*this, E,
534*700637cbSDimitry Andric                                                Intrinsic::amdgcn_fmed3);
535*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_append:
536*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_consume: {
537*700637cbSDimitry Andric     Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
538*700637cbSDimitry Andric       Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
539*700637cbSDimitry Andric     Value *Src0 = EmitScalarExpr(E->getArg(0));
540*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
541*700637cbSDimitry Andric     return Builder.CreateCall(F, { Src0, Builder.getFalse() });
542*700637cbSDimitry Andric   }
543*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
544*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
545*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
546*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
547*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
548*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
549*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
550*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
551*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
552*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
553*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
554*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
555*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
556*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
557*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
558*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
559*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
560*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
561*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
562*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
563*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
564*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
565*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
566*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
567*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
568*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
569*700637cbSDimitry Andric     Intrinsic::ID IID;
570*700637cbSDimitry Andric     switch (BuiltinID) {
571*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
572*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
573*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
574*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_global_load_tr_b64;
575*700637cbSDimitry Andric       break;
576*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
577*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
578*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
579*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
580*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
581*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
582*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
583*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
584*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
585*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_global_load_tr_b128;
586*700637cbSDimitry Andric       break;
587*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
588*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_global_load_tr4_b64;
589*700637cbSDimitry Andric       break;
590*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
591*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_global_load_tr6_b96;
592*700637cbSDimitry Andric       break;
593*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
594*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_load_tr4_b64;
595*700637cbSDimitry Andric       break;
596*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
597*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_load_tr6_b96;
598*700637cbSDimitry Andric       break;
599*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
600*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_load_tr8_b64;
601*700637cbSDimitry Andric       break;
602*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
603*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
604*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
605*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_load_tr16_b128;
606*700637cbSDimitry Andric       break;
607*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
608*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_read_tr4_b64;
609*700637cbSDimitry Andric       break;
610*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
611*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_read_tr8_b64;
612*700637cbSDimitry Andric       break;
613*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
614*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_read_tr6_b96;
615*700637cbSDimitry Andric       break;
616*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
617*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
618*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
619*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_read_tr16_b64;
620*700637cbSDimitry Andric       break;
621*700637cbSDimitry Andric     }
622*700637cbSDimitry Andric     llvm::Type *LoadTy = ConvertType(E->getType());
623*700637cbSDimitry Andric     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
624*700637cbSDimitry Andric     llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
625*700637cbSDimitry Andric     return Builder.CreateCall(F, {Addr});
626*700637cbSDimitry Andric   }
627*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
628*700637cbSDimitry Andric     // Should this have asan instrumentation?
629*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<5>(*this, E,
630*700637cbSDimitry Andric                                                Intrinsic::amdgcn_load_to_lds);
631*700637cbSDimitry Andric   }
632*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
633*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
634*700637cbSDimitry Andric                                    {llvm::Type::getInt64Ty(getLLVMContext())});
635*700637cbSDimitry Andric     return Builder.CreateCall(F);
636*700637cbSDimitry Andric   }
637*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
638*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
639*700637cbSDimitry Andric                                    {llvm::Type::getInt64Ty(getLLVMContext())});
640*700637cbSDimitry Andric     llvm::Value *Env = EmitScalarExpr(E->getArg(0));
641*700637cbSDimitry Andric     return Builder.CreateCall(F, {Env});
642*700637cbSDimitry Andric   }
643*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_read_exec:
644*700637cbSDimitry Andric     return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
645*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
646*700637cbSDimitry Andric     return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
647*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
648*700637cbSDimitry Andric     return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
649*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
650*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
651*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
652*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
653*700637cbSDimitry Andric     llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
654*700637cbSDimitry Andric     llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
655*700637cbSDimitry Andric     llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
656*700637cbSDimitry Andric     llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
657*700637cbSDimitry Andric     llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
658*700637cbSDimitry Andric     llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
659*700637cbSDimitry Andric 
660*700637cbSDimitry Andric     // The builtins take these arguments as vec4 where the last element is
661*700637cbSDimitry Andric     // ignored. The intrinsic takes them as vec3.
662*700637cbSDimitry Andric     RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
663*700637cbSDimitry Andric                                             {0, 1, 2});
664*700637cbSDimitry Andric     RayDir =
665*700637cbSDimitry Andric         Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
666*700637cbSDimitry Andric     RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
667*700637cbSDimitry Andric                                                 {0, 1, 2});
668*700637cbSDimitry Andric 
669*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
670*700637cbSDimitry Andric                                    {NodePtr->getType(), RayDir->getType()});
671*700637cbSDimitry Andric     return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
672*700637cbSDimitry Andric                                   RayInverseDir, TextureDescr});
673*700637cbSDimitry Andric   }
674*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
675*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
676*700637cbSDimitry Andric     Intrinsic::ID IID;
677*700637cbSDimitry Andric     switch (BuiltinID) {
678*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
679*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
680*700637cbSDimitry Andric       break;
681*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
682*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
683*700637cbSDimitry Andric       break;
684*700637cbSDimitry Andric     }
685*700637cbSDimitry Andric     llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
686*700637cbSDimitry Andric     llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
687*700637cbSDimitry Andric     llvm::Value *InstanceMask = EmitScalarExpr(E->getArg(2));
688*700637cbSDimitry Andric     llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(3));
689*700637cbSDimitry Andric     llvm::Value *RayDir = EmitScalarExpr(E->getArg(4));
690*700637cbSDimitry Andric     llvm::Value *Offset = EmitScalarExpr(E->getArg(5));
691*700637cbSDimitry Andric     llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(6));
692*700637cbSDimitry Andric 
693*700637cbSDimitry Andric     Address RetRayOriginPtr = EmitPointerWithAlignment(E->getArg(7));
694*700637cbSDimitry Andric     Address RetRayDirPtr = EmitPointerWithAlignment(E->getArg(8));
695*700637cbSDimitry Andric 
696*700637cbSDimitry Andric     llvm::Function *IntrinsicFunc = CGM.getIntrinsic(IID);
697*700637cbSDimitry Andric 
698*700637cbSDimitry Andric     llvm::CallInst *CI = Builder.CreateCall(
699*700637cbSDimitry Andric         IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
700*700637cbSDimitry Andric                         Offset, TextureDescr});
701*700637cbSDimitry Andric 
702*700637cbSDimitry Andric     llvm::Value *RetVData = Builder.CreateExtractValue(CI, 0);
703*700637cbSDimitry Andric     llvm::Value *RetRayOrigin = Builder.CreateExtractValue(CI, 1);
704*700637cbSDimitry Andric     llvm::Value *RetRayDir = Builder.CreateExtractValue(CI, 2);
705*700637cbSDimitry Andric 
706*700637cbSDimitry Andric     Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
707*700637cbSDimitry Andric     Builder.CreateStore(RetRayDir, RetRayDirPtr);
708*700637cbSDimitry Andric 
709*700637cbSDimitry Andric     return RetVData;
710*700637cbSDimitry Andric   }
711*700637cbSDimitry Andric 
712*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
713*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
714*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
715*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
716*700637cbSDimitry Andric     Intrinsic::ID IID;
717*700637cbSDimitry Andric     switch (BuiltinID) {
718*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
719*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
720*700637cbSDimitry Andric       break;
721*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
722*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
723*700637cbSDimitry Andric       break;
724*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
725*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
726*700637cbSDimitry Andric       break;
727*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
728*700637cbSDimitry Andric       IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
729*700637cbSDimitry Andric       break;
730*700637cbSDimitry Andric     }
731*700637cbSDimitry Andric 
732*700637cbSDimitry Andric     SmallVector<Value *, 4> Args;
733*700637cbSDimitry Andric     for (int i = 0, e = E->getNumArgs(); i != e; ++i)
734*700637cbSDimitry Andric       Args.push_back(EmitScalarExpr(E->getArg(i)));
735*700637cbSDimitry Andric 
736*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(IID);
737*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(F, Args);
738*700637cbSDimitry Andric     Value *Rtn = Builder.CreateExtractValue(Call, 0);
739*700637cbSDimitry Andric     Value *A = Builder.CreateExtractValue(Call, 1);
740*700637cbSDimitry Andric     llvm::Type *RetTy = ConvertType(E->getType());
741*700637cbSDimitry Andric     Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
742*700637cbSDimitry Andric                                             (uint64_t)0);
743*700637cbSDimitry Andric     // ds_bvh_stack_push8_pop2_rtn returns {i64, i32} but the builtin returns
744*700637cbSDimitry Andric     // <2 x i64>, zext the second value.
745*700637cbSDimitry Andric     if (A->getType()->getPrimitiveSizeInBits() <
746*700637cbSDimitry Andric         RetTy->getScalarType()->getPrimitiveSizeInBits())
747*700637cbSDimitry Andric       A = Builder.CreateZExt(A, RetTy->getScalarType());
748*700637cbSDimitry Andric 
749*700637cbSDimitry Andric     return Builder.CreateInsertElement(I0, A, 1);
750*700637cbSDimitry Andric   }
751*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
752*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
753*700637cbSDimitry Andric     llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
754*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(
755*700637cbSDimitry Andric         BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
756*700637cbSDimitry Andric             ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
757*700637cbSDimitry Andric             : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
758*700637cbSDimitry Andric         {VT, VT});
759*700637cbSDimitry Andric 
760*700637cbSDimitry Andric     SmallVector<Value *, 9> Args;
761*700637cbSDimitry Andric     for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
762*700637cbSDimitry Andric       Args.push_back(EmitScalarExpr(E->getArg(I)));
763*700637cbSDimitry Andric     return Builder.CreateCall(F, Args);
764*700637cbSDimitry Andric   }
765*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
766*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
767*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
768*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
769*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
770*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
771*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
772*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
773*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
774*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
775*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
776*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
777*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
778*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
779*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
780*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
781*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
782*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
783*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
784*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
785*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
786*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
787*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
788*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
789*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
790*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
791*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
792*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
793*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
794*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
795*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
796*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
797*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
798*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
799*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
800*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
801*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
802*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
803*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
804*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
805*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
806*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
807*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
808*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
809*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
810*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
811*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
812*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
813*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
814*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
815*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
816*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
817*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
818*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
819*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
820*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
821*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
822*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
823*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
824*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
825*700637cbSDimitry Andric 
826*700637cbSDimitry Andric     // These operations perform a matrix multiplication and accumulation of
827*700637cbSDimitry Andric     // the form:
828*700637cbSDimitry Andric     //             D = A * B + C
829*700637cbSDimitry Andric     // We need to specify one type for matrices AB and one for matrices CD.
830*700637cbSDimitry Andric     // Sparse matrix operations can have different types for A and B as well as
831*700637cbSDimitry Andric     // an additional type for sparsity index.
832*700637cbSDimitry Andric     // Destination type should be put before types used for source operands.
833*700637cbSDimitry Andric     SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
834*700637cbSDimitry Andric     // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
835*700637cbSDimitry Andric     // There is no need for the variable opsel argument, so always set it to
836*700637cbSDimitry Andric     // "false".
837*700637cbSDimitry Andric     bool AppendFalseForOpselArg = false;
838*700637cbSDimitry Andric     unsigned BuiltinWMMAOp;
839*700637cbSDimitry Andric 
840*700637cbSDimitry Andric     switch (BuiltinID) {
841*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
842*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
843*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
844*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
845*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
846*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
847*700637cbSDimitry Andric       break;
848*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
849*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
850*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
851*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
852*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
853*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
854*700637cbSDimitry Andric       break;
855*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
856*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
857*700637cbSDimitry Andric       AppendFalseForOpselArg = true;
858*700637cbSDimitry Andric       [[fallthrough]];
859*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
860*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
861*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
862*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
863*700637cbSDimitry Andric       break;
864*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
865*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
866*700637cbSDimitry Andric       AppendFalseForOpselArg = true;
867*700637cbSDimitry Andric       [[fallthrough]];
868*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
869*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
870*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
871*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
872*700637cbSDimitry Andric       break;
873*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
874*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
875*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
876*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
877*700637cbSDimitry Andric       break;
878*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
879*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
880*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
881*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
882*700637cbSDimitry Andric       break;
883*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
884*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
885*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
886*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
887*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
888*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
889*700637cbSDimitry Andric       break;
890*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
891*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
892*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
893*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
894*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
895*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
896*700637cbSDimitry Andric       break;
897*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
898*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
899*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
900*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
901*700637cbSDimitry Andric       break;
902*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
903*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
904*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
905*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
906*700637cbSDimitry Andric       break;
907*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
908*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
909*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
910*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
911*700637cbSDimitry Andric       break;
912*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
913*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
914*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
915*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
916*700637cbSDimitry Andric       break;
917*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
918*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
919*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
920*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
921*700637cbSDimitry Andric       break;
922*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
923*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
924*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
925*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
926*700637cbSDimitry Andric       break;
927*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
928*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
929*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
930*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
931*700637cbSDimitry Andric       break;
932*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
933*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
934*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
935*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
936*700637cbSDimitry Andric       break;
937*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
938*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
939*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
940*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
941*700637cbSDimitry Andric       break;
942*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
943*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
944*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
945*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
946*700637cbSDimitry Andric       break;
947*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
948*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
949*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
950*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
951*700637cbSDimitry Andric       break;
952*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
953*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
954*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
955*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
956*700637cbSDimitry Andric       break;
957*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
958*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
959*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
960*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
961*700637cbSDimitry Andric       break;
962*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
963*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
964*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
965*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
966*700637cbSDimitry Andric       break;
967*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
968*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
969*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
970*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
971*700637cbSDimitry Andric       break;
972*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
973*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
974*700637cbSDimitry Andric       ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
975*700637cbSDimitry Andric       BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
976*700637cbSDimitry Andric       break;
977*700637cbSDimitry Andric     }
978*700637cbSDimitry Andric 
979*700637cbSDimitry Andric     SmallVector<Value *, 6> Args;
980*700637cbSDimitry Andric     for (int i = 0, e = E->getNumArgs(); i != e; ++i)
981*700637cbSDimitry Andric       Args.push_back(EmitScalarExpr(E->getArg(i)));
982*700637cbSDimitry Andric     if (AppendFalseForOpselArg)
983*700637cbSDimitry Andric       Args.push_back(Builder.getFalse());
984*700637cbSDimitry Andric 
985*700637cbSDimitry Andric     SmallVector<llvm::Type *, 6> ArgTypes;
986*700637cbSDimitry Andric     for (auto ArgIdx : ArgsForMatchingMatrixTypes)
987*700637cbSDimitry Andric       ArgTypes.push_back(Args[ArgIdx]->getType());
988*700637cbSDimitry Andric 
989*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
990*700637cbSDimitry Andric     return Builder.CreateCall(F, Args);
991*700637cbSDimitry Andric   }
992*700637cbSDimitry Andric   // amdgcn workgroup size
993*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
994*700637cbSDimitry Andric     return EmitAMDGPUWorkGroupSize(*this, 0);
995*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
996*700637cbSDimitry Andric     return EmitAMDGPUWorkGroupSize(*this, 1);
997*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
998*700637cbSDimitry Andric     return EmitAMDGPUWorkGroupSize(*this, 2);
999*700637cbSDimitry Andric 
1000*700637cbSDimitry Andric   // amdgcn grid size
1001*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1002*700637cbSDimitry Andric     return EmitAMDGPUGridSize(*this, 0);
1003*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1004*700637cbSDimitry Andric     return EmitAMDGPUGridSize(*this, 1);
1005*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1006*700637cbSDimitry Andric     return EmitAMDGPUGridSize(*this, 2);
1007*700637cbSDimitry Andric 
1008*700637cbSDimitry Andric   // r600 intrinsics
1009*700637cbSDimitry Andric   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1010*700637cbSDimitry Andric   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1011*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<1>(*this, E,
1012*700637cbSDimitry Andric                                                Intrinsic::r600_recipsqrt_ieee);
1013*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_alignbit: {
1014*700637cbSDimitry Andric     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
1015*700637cbSDimitry Andric     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
1016*700637cbSDimitry Andric     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
1017*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1018*700637cbSDimitry Andric     return Builder.CreateCall(F, { Src0, Src1, Src2 });
1019*700637cbSDimitry Andric   }
1020*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_fence: {
1021*700637cbSDimitry Andric     ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
1022*700637cbSDimitry Andric                             EmitScalarExpr(E->getArg(1)), AO, SSID);
1023*700637cbSDimitry Andric     FenceInst *Fence = Builder.CreateFence(AO, SSID);
1024*700637cbSDimitry Andric     if (E->getNumArgs() > 2)
1025*700637cbSDimitry Andric       AddAMDGPUFenceAddressSpaceMMRA(Fence, E);
1026*700637cbSDimitry Andric     return Fence;
1027*700637cbSDimitry Andric   }
1028*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1029*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1030*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1031*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1032*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1033*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1034*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1035*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1036*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1037*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1038*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1039*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1040*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1041*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1042*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1043*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1044*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1045*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1046*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1047*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1048*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1049*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1050*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1051*700637cbSDimitry Andric     llvm::AtomicRMWInst::BinOp BinOp;
1052*700637cbSDimitry Andric     switch (BuiltinID) {
1053*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1054*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1055*700637cbSDimitry Andric       BinOp = llvm::AtomicRMWInst::UIncWrap;
1056*700637cbSDimitry Andric       break;
1057*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1058*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1059*700637cbSDimitry Andric       BinOp = llvm::AtomicRMWInst::UDecWrap;
1060*700637cbSDimitry Andric       break;
1061*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1062*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1063*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1064*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1065*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1066*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1067*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1068*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1069*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1070*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1071*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1072*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1073*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1074*700637cbSDimitry Andric       BinOp = llvm::AtomicRMWInst::FAdd;
1075*700637cbSDimitry Andric       break;
1076*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1077*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1078*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1079*700637cbSDimitry Andric       BinOp = llvm::AtomicRMWInst::FMin;
1080*700637cbSDimitry Andric       break;
1081*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1082*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1083*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1084*700637cbSDimitry Andric       BinOp = llvm::AtomicRMWInst::FMax;
1085*700637cbSDimitry Andric       break;
1086*700637cbSDimitry Andric     }
1087*700637cbSDimitry Andric 
1088*700637cbSDimitry Andric     Address Ptr = CheckAtomicAlignment(*this, E);
1089*700637cbSDimitry Andric     Value *Val = EmitScalarExpr(E->getArg(1));
1090*700637cbSDimitry Andric     llvm::Type *OrigTy = Val->getType();
1091*700637cbSDimitry Andric     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1092*700637cbSDimitry Andric 
1093*700637cbSDimitry Andric     bool Volatile;
1094*700637cbSDimitry Andric 
1095*700637cbSDimitry Andric     if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1096*700637cbSDimitry Andric         BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1097*700637cbSDimitry Andric         BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1098*700637cbSDimitry Andric       // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
1099*700637cbSDimitry Andric       Volatile =
1100*700637cbSDimitry Andric           cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
1101*700637cbSDimitry Andric     } else {
1102*700637cbSDimitry Andric       // Infer volatile from the passed type.
1103*700637cbSDimitry Andric       Volatile =
1104*700637cbSDimitry Andric           PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1105*700637cbSDimitry Andric     }
1106*700637cbSDimitry Andric 
1107*700637cbSDimitry Andric     if (E->getNumArgs() >= 4) {
1108*700637cbSDimitry Andric       // Some of the builtins have explicit ordering and scope arguments.
1109*700637cbSDimitry Andric       ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
1110*700637cbSDimitry Andric                               EmitScalarExpr(E->getArg(3)), AO, SSID);
1111*700637cbSDimitry Andric     } else {
1112*700637cbSDimitry Andric       // Most of the builtins do not have syncscope/order arguments. For DS
1113*700637cbSDimitry Andric       // atomics the scope doesn't really matter, as they implicitly operate at
1114*700637cbSDimitry Andric       // workgroup scope.
1115*700637cbSDimitry Andric       //
1116*700637cbSDimitry Andric       // The global/flat cases need to use agent scope to consistently produce
1117*700637cbSDimitry Andric       // the native instruction instead of a cmpxchg expansion.
1118*700637cbSDimitry Andric       SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
1119*700637cbSDimitry Andric       AO = AtomicOrdering::Monotonic;
1120*700637cbSDimitry Andric 
1121*700637cbSDimitry Andric       // The v2bf16 builtin uses i16 instead of a natural bfloat type.
1122*700637cbSDimitry Andric       if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1123*700637cbSDimitry Andric           BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1124*700637cbSDimitry Andric           BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1125*700637cbSDimitry Andric         llvm::Type *V2BF16Ty = FixedVectorType::get(
1126*700637cbSDimitry Andric             llvm::Type::getBFloatTy(Builder.getContext()), 2);
1127*700637cbSDimitry Andric         Val = Builder.CreateBitCast(Val, V2BF16Ty);
1128*700637cbSDimitry Andric       }
1129*700637cbSDimitry Andric     }
1130*700637cbSDimitry Andric 
1131*700637cbSDimitry Andric     llvm::AtomicRMWInst *RMW =
1132*700637cbSDimitry Andric         Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1133*700637cbSDimitry Andric     if (Volatile)
1134*700637cbSDimitry Andric       RMW->setVolatile(true);
1135*700637cbSDimitry Andric 
1136*700637cbSDimitry Andric     unsigned AddrSpace = Ptr.getType()->getAddressSpace();
1137*700637cbSDimitry Andric     if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1138*700637cbSDimitry Andric       // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
1139*700637cbSDimitry Andric       // instruction for flat and global operations.
1140*700637cbSDimitry Andric       llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
1141*700637cbSDimitry Andric       RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
1142*700637cbSDimitry Andric 
1143*700637cbSDimitry Andric       // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
1144*700637cbSDimitry Andric       // instruction, but this only matters for float fadd.
1145*700637cbSDimitry Andric       if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
1146*700637cbSDimitry Andric         RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
1147*700637cbSDimitry Andric     }
1148*700637cbSDimitry Andric 
1149*700637cbSDimitry Andric     return Builder.CreateBitCast(RMW, OrigTy);
1150*700637cbSDimitry Andric   }
1151*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1152*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1153*700637cbSDimitry Andric     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
1154*700637cbSDimitry Andric     llvm::Type *ResultType = ConvertType(E->getType());
1155*700637cbSDimitry Andric     // s_sendmsg_rtn is mangled using return type only.
1156*700637cbSDimitry Andric     Function *F =
1157*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1158*700637cbSDimitry Andric     return Builder.CreateCall(F, {Arg});
1159*700637cbSDimitry Andric   }
1160*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1161*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1162*700637cbSDimitry Andric     // Because builtin types are limited, and the intrinsic uses a struct/pair
1163*700637cbSDimitry Andric     // output, marshal the pair-of-i32 to <2 x i32>.
1164*700637cbSDimitry Andric     Value *VDstOld = EmitScalarExpr(E->getArg(0));
1165*700637cbSDimitry Andric     Value *VSrcOld = EmitScalarExpr(E->getArg(1));
1166*700637cbSDimitry Andric     Value *FI = EmitScalarExpr(E->getArg(2));
1167*700637cbSDimitry Andric     Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
1168*700637cbSDimitry Andric     Function *F =
1169*700637cbSDimitry Andric         CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1170*700637cbSDimitry Andric                              ? Intrinsic::amdgcn_permlane16_swap
1171*700637cbSDimitry Andric                              : Intrinsic::amdgcn_permlane32_swap);
1172*700637cbSDimitry Andric     llvm::CallInst *Call =
1173*700637cbSDimitry Andric         Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1174*700637cbSDimitry Andric 
1175*700637cbSDimitry Andric     llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
1176*700637cbSDimitry Andric     llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
1177*700637cbSDimitry Andric 
1178*700637cbSDimitry Andric     llvm::Type *ResultType = ConvertType(E->getType());
1179*700637cbSDimitry Andric 
1180*700637cbSDimitry Andric     llvm::Value *Insert0 = Builder.CreateInsertElement(
1181*700637cbSDimitry Andric         llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
1182*700637cbSDimitry Andric     llvm::Value *AsVector =
1183*700637cbSDimitry Andric         Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
1184*700637cbSDimitry Andric     return AsVector;
1185*700637cbSDimitry Andric   }
1186*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
1187*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
1188*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<4>(*this, E,
1189*700637cbSDimitry Andric                                                Intrinsic::amdgcn_bitop3);
1190*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
1191*700637cbSDimitry Andric     // TODO: LLVM has this overloaded to allow for fat pointers, but since
1192*700637cbSDimitry Andric     // those haven't been plumbed through to Clang yet, default to creating the
1193*700637cbSDimitry Andric     // resource type.
1194*700637cbSDimitry Andric     SmallVector<Value *, 4> Args;
1195*700637cbSDimitry Andric     for (unsigned I = 0; I < 4; ++I)
1196*700637cbSDimitry Andric       Args.push_back(EmitScalarExpr(E->getArg(I)));
1197*700637cbSDimitry Andric     llvm::PointerType *RetTy = llvm::PointerType::get(
1198*700637cbSDimitry Andric         Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
1199*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
1200*700637cbSDimitry Andric                                    {RetTy, Args[0]->getType()});
1201*700637cbSDimitry Andric     return Builder.CreateCall(F, Args);
1202*700637cbSDimitry Andric   }
1203*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
1204*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
1205*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
1206*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
1207*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
1208*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1209*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<5>(
1210*700637cbSDimitry Andric         *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
1211*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1212*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1213*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1214*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1215*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1216*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
1217*700637cbSDimitry Andric     llvm::Type *RetTy = nullptr;
1218*700637cbSDimitry Andric     switch (BuiltinID) {
1219*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1220*700637cbSDimitry Andric       RetTy = Int8Ty;
1221*700637cbSDimitry Andric       break;
1222*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1223*700637cbSDimitry Andric       RetTy = Int16Ty;
1224*700637cbSDimitry Andric       break;
1225*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1226*700637cbSDimitry Andric       RetTy = Int32Ty;
1227*700637cbSDimitry Andric       break;
1228*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1229*700637cbSDimitry Andric       RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
1230*700637cbSDimitry Andric       break;
1231*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1232*700637cbSDimitry Andric       RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
1233*700637cbSDimitry Andric       break;
1234*700637cbSDimitry Andric     case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
1235*700637cbSDimitry Andric       RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
1236*700637cbSDimitry Andric       break;
1237*700637cbSDimitry Andric     }
1238*700637cbSDimitry Andric     Function *F =
1239*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
1240*700637cbSDimitry Andric     return Builder.CreateCall(
1241*700637cbSDimitry Andric         F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
1242*700637cbSDimitry Andric             EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
1243*700637cbSDimitry Andric   }
1244*700637cbSDimitry Andric   case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
1245*700637cbSDimitry Andric     return emitBuiltinWithOneOverloadedType<2>(
1246*700637cbSDimitry Andric         *this, E, Intrinsic::amdgcn_s_prefetch_data);
1247*700637cbSDimitry Andric   case Builtin::BIlogbf:
1248*700637cbSDimitry Andric   case Builtin::BI__builtin_logbf: {
1249*700637cbSDimitry Andric     Value *Src0 = EmitScalarExpr(E->getArg(0));
1250*700637cbSDimitry Andric     Function *FrExpFunc = CGM.getIntrinsic(
1251*700637cbSDimitry Andric         Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1252*700637cbSDimitry Andric     CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1253*700637cbSDimitry Andric     Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1254*700637cbSDimitry Andric     Value *Add = Builder.CreateAdd(
1255*700637cbSDimitry Andric         Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1256*700637cbSDimitry Andric     Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getFloatTy());
1257*700637cbSDimitry Andric     Value *Fabs =
1258*700637cbSDimitry Andric         emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1259*700637cbSDimitry Andric     Value *FCmpONE = Builder.CreateFCmpONE(
1260*700637cbSDimitry Andric         Fabs, ConstantFP::getInfinity(Builder.getFloatTy()));
1261*700637cbSDimitry Andric     Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1262*700637cbSDimitry Andric     Value *FCmpOEQ =
1263*700637cbSDimitry Andric         Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getFloatTy()));
1264*700637cbSDimitry Andric     Value *Sel2 = Builder.CreateSelect(
1265*700637cbSDimitry Andric         FCmpOEQ,
1266*700637cbSDimitry Andric         ConstantFP::getInfinity(Builder.getFloatTy(), /*Negative=*/true), Sel1);
1267*700637cbSDimitry Andric     return Sel2;
1268*700637cbSDimitry Andric   }
1269*700637cbSDimitry Andric   case Builtin::BIlogb:
1270*700637cbSDimitry Andric   case Builtin::BI__builtin_logb: {
1271*700637cbSDimitry Andric     Value *Src0 = EmitScalarExpr(E->getArg(0));
1272*700637cbSDimitry Andric     Function *FrExpFunc = CGM.getIntrinsic(
1273*700637cbSDimitry Andric         Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1274*700637cbSDimitry Andric     CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1275*700637cbSDimitry Andric     Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1276*700637cbSDimitry Andric     Value *Add = Builder.CreateAdd(
1277*700637cbSDimitry Andric         Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1278*700637cbSDimitry Andric     Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
1279*700637cbSDimitry Andric     Value *Fabs =
1280*700637cbSDimitry Andric         emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1281*700637cbSDimitry Andric     Value *FCmpONE = Builder.CreateFCmpONE(
1282*700637cbSDimitry Andric         Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
1283*700637cbSDimitry Andric     Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1284*700637cbSDimitry Andric     Value *FCmpOEQ =
1285*700637cbSDimitry Andric         Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
1286*700637cbSDimitry Andric     Value *Sel2 = Builder.CreateSelect(
1287*700637cbSDimitry Andric         FCmpOEQ,
1288*700637cbSDimitry Andric         ConstantFP::getInfinity(Builder.getDoubleTy(), /*Negative=*/true),
1289*700637cbSDimitry Andric         Sel1);
1290*700637cbSDimitry Andric     return Sel2;
1291*700637cbSDimitry Andric   }
1292*700637cbSDimitry Andric   case Builtin::BIscalbnf:
1293*700637cbSDimitry Andric   case Builtin::BI__builtin_scalbnf:
1294*700637cbSDimitry Andric   case Builtin::BIscalbn:
1295*700637cbSDimitry Andric   case Builtin::BI__builtin_scalbn:
1296*700637cbSDimitry Andric     return emitBinaryExpMaybeConstrainedFPBuiltin(
1297*700637cbSDimitry Andric         *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
1298*700637cbSDimitry Andric   default:
1299*700637cbSDimitry Andric     return nullptr;
1300*700637cbSDimitry Andric   }
1301*700637cbSDimitry Andric }
1302