xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //===-------- NVPTX.cpp - Emit LLVM Code for builtins ---------------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // This contains code to emit Builtin calls as LLVM code.
10*700637cbSDimitry Andric //
11*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
12*700637cbSDimitry Andric 
13*700637cbSDimitry Andric #include "CGBuiltin.h"
14*700637cbSDimitry Andric #include "clang/Basic/TargetBuiltins.h"
15*700637cbSDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h"
16*700637cbSDimitry Andric 
17*700637cbSDimitry Andric using namespace clang;
18*700637cbSDimitry Andric using namespace CodeGen;
19*700637cbSDimitry Andric using namespace llvm;
20*700637cbSDimitry Andric 
21*700637cbSDimitry Andric namespace {
22*700637cbSDimitry Andric // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
23*700637cbSDimitry Andric struct NVPTXMmaLdstInfo {
24*700637cbSDimitry Andric   unsigned NumResults;  // Number of elements to load/store
25*700637cbSDimitry Andric   // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
26*700637cbSDimitry Andric   unsigned IID_col;
27*700637cbSDimitry Andric   unsigned IID_row;
28*700637cbSDimitry Andric };
29*700637cbSDimitry Andric 
30*700637cbSDimitry Andric #define MMA_INTR(geom_op_type, layout) \
31*700637cbSDimitry Andric   Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
32*700637cbSDimitry Andric #define MMA_LDST(n, geom_op_type)                                              \
33*700637cbSDimitry Andric   { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
34*700637cbSDimitry Andric 
getNVPTXMmaLdstInfo(unsigned BuiltinID)35*700637cbSDimitry Andric static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
36*700637cbSDimitry Andric   switch (BuiltinID) {
37*700637cbSDimitry Andric   // FP MMA loads
38*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_ld_a:
39*700637cbSDimitry Andric     return MMA_LDST(8, m16n16k16_load_a_f16);
40*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_ld_b:
41*700637cbSDimitry Andric     return MMA_LDST(8, m16n16k16_load_b_f16);
42*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
43*700637cbSDimitry Andric     return MMA_LDST(4, m16n16k16_load_c_f16);
44*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
45*700637cbSDimitry Andric     return MMA_LDST(8, m16n16k16_load_c_f32);
46*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_ld_a:
47*700637cbSDimitry Andric     return MMA_LDST(8, m32n8k16_load_a_f16);
48*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_ld_b:
49*700637cbSDimitry Andric     return MMA_LDST(8, m32n8k16_load_b_f16);
50*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
51*700637cbSDimitry Andric     return MMA_LDST(4, m32n8k16_load_c_f16);
52*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
53*700637cbSDimitry Andric     return MMA_LDST(8, m32n8k16_load_c_f32);
54*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_ld_a:
55*700637cbSDimitry Andric     return MMA_LDST(8, m8n32k16_load_a_f16);
56*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_ld_b:
57*700637cbSDimitry Andric     return MMA_LDST(8, m8n32k16_load_b_f16);
58*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
59*700637cbSDimitry Andric     return MMA_LDST(4, m8n32k16_load_c_f16);
60*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
61*700637cbSDimitry Andric     return MMA_LDST(8, m8n32k16_load_c_f32);
62*700637cbSDimitry Andric 
63*700637cbSDimitry Andric   // Integer MMA loads
64*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_a_s8:
65*700637cbSDimitry Andric     return MMA_LDST(2, m16n16k16_load_a_s8);
66*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_a_u8:
67*700637cbSDimitry Andric     return MMA_LDST(2, m16n16k16_load_a_u8);
68*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_b_s8:
69*700637cbSDimitry Andric     return MMA_LDST(2, m16n16k16_load_b_s8);
70*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_b_u8:
71*700637cbSDimitry Andric     return MMA_LDST(2, m16n16k16_load_b_u8);
72*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_c:
73*700637cbSDimitry Andric     return MMA_LDST(8, m16n16k16_load_c_s32);
74*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_a_s8:
75*700637cbSDimitry Andric     return MMA_LDST(4, m32n8k16_load_a_s8);
76*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_a_u8:
77*700637cbSDimitry Andric     return MMA_LDST(4, m32n8k16_load_a_u8);
78*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_b_s8:
79*700637cbSDimitry Andric     return MMA_LDST(1, m32n8k16_load_b_s8);
80*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_b_u8:
81*700637cbSDimitry Andric     return MMA_LDST(1, m32n8k16_load_b_u8);
82*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_c:
83*700637cbSDimitry Andric     return MMA_LDST(8, m32n8k16_load_c_s32);
84*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_a_s8:
85*700637cbSDimitry Andric     return MMA_LDST(1, m8n32k16_load_a_s8);
86*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_a_u8:
87*700637cbSDimitry Andric     return MMA_LDST(1, m8n32k16_load_a_u8);
88*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_b_s8:
89*700637cbSDimitry Andric     return MMA_LDST(4, m8n32k16_load_b_s8);
90*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_b_u8:
91*700637cbSDimitry Andric     return MMA_LDST(4, m8n32k16_load_b_u8);
92*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_c:
93*700637cbSDimitry Andric     return MMA_LDST(8, m8n32k16_load_c_s32);
94*700637cbSDimitry Andric 
95*700637cbSDimitry Andric   // Sub-integer MMA loads.
96*700637cbSDimitry Andric   // Only row/col layout is supported by A/B fragments.
97*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_a_s4:
98*700637cbSDimitry Andric     return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
99*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_a_u4:
100*700637cbSDimitry Andric     return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
101*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_b_s4:
102*700637cbSDimitry Andric     return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
103*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_b_u4:
104*700637cbSDimitry Andric     return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
105*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_c:
106*700637cbSDimitry Andric     return MMA_LDST(2, m8n8k32_load_c_s32);
107*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
108*700637cbSDimitry Andric     return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
109*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
110*700637cbSDimitry Andric     return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
111*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_ld_c:
112*700637cbSDimitry Andric     return MMA_LDST(2, m8n8k128_load_c_s32);
113*700637cbSDimitry Andric 
114*700637cbSDimitry Andric   // Double MMA loads
115*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_ld_a:
116*700637cbSDimitry Andric     return MMA_LDST(1, m8n8k4_load_a_f64);
117*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_ld_b:
118*700637cbSDimitry Andric     return MMA_LDST(1, m8n8k4_load_b_f64);
119*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_ld_c:
120*700637cbSDimitry Andric     return MMA_LDST(2, m8n8k4_load_c_f64);
121*700637cbSDimitry Andric 
122*700637cbSDimitry Andric   // Alternate float MMA loads
123*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
124*700637cbSDimitry Andric     return MMA_LDST(4, m16n16k16_load_a_bf16);
125*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
126*700637cbSDimitry Andric     return MMA_LDST(4, m16n16k16_load_b_bf16);
127*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
128*700637cbSDimitry Andric     return MMA_LDST(2, m8n32k16_load_a_bf16);
129*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
130*700637cbSDimitry Andric     return MMA_LDST(8, m8n32k16_load_b_bf16);
131*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
132*700637cbSDimitry Andric     return MMA_LDST(8, m32n8k16_load_a_bf16);
133*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
134*700637cbSDimitry Andric     return MMA_LDST(2, m32n8k16_load_b_bf16);
135*700637cbSDimitry Andric   case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
136*700637cbSDimitry Andric     return MMA_LDST(4, m16n16k8_load_a_tf32);
137*700637cbSDimitry Andric   case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
138*700637cbSDimitry Andric     return MMA_LDST(4, m16n16k8_load_b_tf32);
139*700637cbSDimitry Andric   case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
140*700637cbSDimitry Andric     return MMA_LDST(8, m16n16k8_load_c_f32);
141*700637cbSDimitry Andric 
142*700637cbSDimitry Andric   // NOTE: We need to follow inconsitent naming scheme used by NVCC.  Unlike
143*700637cbSDimitry Andric   // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
144*700637cbSDimitry Andric   // use fragment C for both loads and stores.
145*700637cbSDimitry Andric   // FP MMA stores.
146*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_st_c_f16:
147*700637cbSDimitry Andric     return MMA_LDST(4, m16n16k16_store_d_f16);
148*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_st_c_f32:
149*700637cbSDimitry Andric     return MMA_LDST(8, m16n16k16_store_d_f32);
150*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_st_c_f16:
151*700637cbSDimitry Andric     return MMA_LDST(4, m32n8k16_store_d_f16);
152*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_st_c_f32:
153*700637cbSDimitry Andric     return MMA_LDST(8, m32n8k16_store_d_f32);
154*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_st_c_f16:
155*700637cbSDimitry Andric     return MMA_LDST(4, m8n32k16_store_d_f16);
156*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_st_c_f32:
157*700637cbSDimitry Andric     return MMA_LDST(8, m8n32k16_store_d_f32);
158*700637cbSDimitry Andric 
159*700637cbSDimitry Andric   // Integer and sub-integer MMA stores.
160*700637cbSDimitry Andric   // Another naming quirk. Unlike other MMA builtins that use PTX types in the
161*700637cbSDimitry Andric   // name, integer loads/stores use LLVM's i32.
162*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_st_c_i32:
163*700637cbSDimitry Andric     return MMA_LDST(8, m16n16k16_store_d_s32);
164*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_st_c_i32:
165*700637cbSDimitry Andric     return MMA_LDST(8, m32n8k16_store_d_s32);
166*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_st_c_i32:
167*700637cbSDimitry Andric     return MMA_LDST(8, m8n32k16_store_d_s32);
168*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_st_c_i32:
169*700637cbSDimitry Andric     return MMA_LDST(2, m8n8k32_store_d_s32);
170*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_st_c_i32:
171*700637cbSDimitry Andric     return MMA_LDST(2, m8n8k128_store_d_s32);
172*700637cbSDimitry Andric 
173*700637cbSDimitry Andric   // Double MMA store
174*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_st_c_f64:
175*700637cbSDimitry Andric     return MMA_LDST(2, m8n8k4_store_d_f64);
176*700637cbSDimitry Andric 
177*700637cbSDimitry Andric   // Alternate float MMA store
178*700637cbSDimitry Andric   case NVPTX::BI__mma_m16n16k8_st_c_f32:
179*700637cbSDimitry Andric     return MMA_LDST(8, m16n16k8_store_d_f32);
180*700637cbSDimitry Andric 
181*700637cbSDimitry Andric   default:
182*700637cbSDimitry Andric     llvm_unreachable("Unknown MMA builtin");
183*700637cbSDimitry Andric   }
184*700637cbSDimitry Andric }
185*700637cbSDimitry Andric #undef MMA_LDST
186*700637cbSDimitry Andric #undef MMA_INTR
187*700637cbSDimitry Andric 
188*700637cbSDimitry Andric 
189*700637cbSDimitry Andric struct NVPTXMmaInfo {
190*700637cbSDimitry Andric   unsigned NumEltsA;
191*700637cbSDimitry Andric   unsigned NumEltsB;
192*700637cbSDimitry Andric   unsigned NumEltsC;
193*700637cbSDimitry Andric   unsigned NumEltsD;
194*700637cbSDimitry Andric 
195*700637cbSDimitry Andric   // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
196*700637cbSDimitry Andric   // over 'col' for layout. The index of non-satf variants is expected to match
197*700637cbSDimitry Andric   // the undocumented layout constants used by CUDA's mma.hpp.
198*700637cbSDimitry Andric   std::array<unsigned, 8> Variants;
199*700637cbSDimitry Andric 
getMMAIntrinsic__anonf1604afe0111::NVPTXMmaInfo200*700637cbSDimitry Andric   unsigned getMMAIntrinsic(int Layout, bool Satf) {
201*700637cbSDimitry Andric     unsigned Index = Layout + 4 * Satf;
202*700637cbSDimitry Andric     if (Index >= Variants.size())
203*700637cbSDimitry Andric       return 0;
204*700637cbSDimitry Andric     return Variants[Index];
205*700637cbSDimitry Andric   }
206*700637cbSDimitry Andric };
207*700637cbSDimitry Andric 
208*700637cbSDimitry Andric   // Returns an intrinsic that matches Layout and Satf for valid combinations of
209*700637cbSDimitry Andric   // Layout and Satf, 0 otherwise.
getNVPTXMmaInfo(unsigned BuiltinID)210*700637cbSDimitry Andric static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
211*700637cbSDimitry Andric   // clang-format off
212*700637cbSDimitry Andric #define MMA_VARIANTS(geom, type)                                    \
213*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type,             \
214*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type,             \
215*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type,             \
216*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
217*700637cbSDimitry Andric #define MMA_SATF_VARIANTS(geom, type)                               \
218*700637cbSDimitry Andric       MMA_VARIANTS(geom, type),                                     \
219*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
220*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
221*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
222*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
223*700637cbSDimitry Andric // Sub-integer MMA only supports row.col layout.
224*700637cbSDimitry Andric #define MMA_VARIANTS_I4(geom, type) \
225*700637cbSDimitry Andric       0, \
226*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type,             \
227*700637cbSDimitry Andric       0, \
228*700637cbSDimitry Andric       0, \
229*700637cbSDimitry Andric       0, \
230*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
231*700637cbSDimitry Andric       0, \
232*700637cbSDimitry Andric       0
233*700637cbSDimitry Andric // b1 MMA does not support .satfinite.
234*700637cbSDimitry Andric #define MMA_VARIANTS_B1_XOR(geom, type) \
235*700637cbSDimitry Andric       0, \
236*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type,             \
237*700637cbSDimitry Andric       0, \
238*700637cbSDimitry Andric       0, \
239*700637cbSDimitry Andric       0, \
240*700637cbSDimitry Andric       0, \
241*700637cbSDimitry Andric       0, \
242*700637cbSDimitry Andric       0
243*700637cbSDimitry Andric #define MMA_VARIANTS_B1_AND(geom, type) \
244*700637cbSDimitry Andric       0, \
245*700637cbSDimitry Andric       Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type,             \
246*700637cbSDimitry Andric       0, \
247*700637cbSDimitry Andric       0, \
248*700637cbSDimitry Andric       0, \
249*700637cbSDimitry Andric       0, \
250*700637cbSDimitry Andric       0, \
251*700637cbSDimitry Andric       0
252*700637cbSDimitry Andric   // clang-format on
253*700637cbSDimitry Andric   switch (BuiltinID) {
254*700637cbSDimitry Andric   // FP MMA
255*700637cbSDimitry Andric   // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
256*700637cbSDimitry Andric   // NumEltsN of return value are ordered as A,B,C,D.
257*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
258*700637cbSDimitry Andric     return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
259*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
260*700637cbSDimitry Andric     return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
261*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
262*700637cbSDimitry Andric     return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
263*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
264*700637cbSDimitry Andric     return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
265*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
266*700637cbSDimitry Andric     return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
267*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
268*700637cbSDimitry Andric     return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
269*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
270*700637cbSDimitry Andric     return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
271*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
272*700637cbSDimitry Andric     return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
273*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
274*700637cbSDimitry Andric     return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
275*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
276*700637cbSDimitry Andric     return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
277*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
278*700637cbSDimitry Andric     return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
279*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
280*700637cbSDimitry Andric     return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
281*700637cbSDimitry Andric 
282*700637cbSDimitry Andric   // Integer MMA
283*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_mma_s8:
284*700637cbSDimitry Andric     return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
285*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_mma_u8:
286*700637cbSDimitry Andric     return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
287*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_mma_s8:
288*700637cbSDimitry Andric     return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
289*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_mma_u8:
290*700637cbSDimitry Andric     return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
291*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_mma_s8:
292*700637cbSDimitry Andric     return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
293*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_mma_u8:
294*700637cbSDimitry Andric     return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
295*700637cbSDimitry Andric 
296*700637cbSDimitry Andric   // Sub-integer MMA
297*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_mma_s4:
298*700637cbSDimitry Andric     return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
299*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_mma_u4:
300*700637cbSDimitry Andric     return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
301*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
302*700637cbSDimitry Andric     return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
303*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
304*700637cbSDimitry Andric     return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
305*700637cbSDimitry Andric 
306*700637cbSDimitry Andric   // Double MMA
307*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_mma_f64:
308*700637cbSDimitry Andric     return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
309*700637cbSDimitry Andric 
310*700637cbSDimitry Andric   // Alternate FP MMA
311*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
312*700637cbSDimitry Andric     return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
313*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
314*700637cbSDimitry Andric     return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
315*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
316*700637cbSDimitry Andric     return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
317*700637cbSDimitry Andric   case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
318*700637cbSDimitry Andric     return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
319*700637cbSDimitry Andric   default:
320*700637cbSDimitry Andric     llvm_unreachable("Unexpected builtin ID.");
321*700637cbSDimitry Andric   }
322*700637cbSDimitry Andric #undef MMA_VARIANTS
323*700637cbSDimitry Andric #undef MMA_SATF_VARIANTS
324*700637cbSDimitry Andric #undef MMA_VARIANTS_I4
325*700637cbSDimitry Andric #undef MMA_VARIANTS_B1_AND
326*700637cbSDimitry Andric #undef MMA_VARIANTS_B1_XOR
327*700637cbSDimitry Andric }
328*700637cbSDimitry Andric 
MakeLdu(unsigned IntrinsicID,CodeGenFunction & CGF,const CallExpr * E)329*700637cbSDimitry Andric static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
330*700637cbSDimitry Andric                       const CallExpr *E) {
331*700637cbSDimitry Andric   Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
332*700637cbSDimitry Andric   QualType ArgType = E->getArg(0)->getType();
333*700637cbSDimitry Andric   clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
334*700637cbSDimitry Andric   llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
335*700637cbSDimitry Andric   return CGF.Builder.CreateCall(
336*700637cbSDimitry Andric       CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
337*700637cbSDimitry Andric       {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
338*700637cbSDimitry Andric }
339*700637cbSDimitry Andric 
MakeLdg(CodeGenFunction & CGF,const CallExpr * E)340*700637cbSDimitry Andric static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
341*700637cbSDimitry Andric   Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
342*700637cbSDimitry Andric   QualType ArgType = E->getArg(0)->getType();
343*700637cbSDimitry Andric   clang::CharUnits AlignV = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
344*700637cbSDimitry Andric   llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
345*700637cbSDimitry Andric 
346*700637cbSDimitry Andric   // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
347*700637cbSDimitry Andric   auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
348*700637cbSDimitry Andric   auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
349*700637cbSDimitry Andric   MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
350*700637cbSDimitry Andric   LD->setMetadata(LLVMContext::MD_invariant_load, MD);
351*700637cbSDimitry Andric 
352*700637cbSDimitry Andric   return LD;
353*700637cbSDimitry Andric }
354*700637cbSDimitry Andric 
MakeScopedAtomic(unsigned IntrinsicID,CodeGenFunction & CGF,const CallExpr * E)355*700637cbSDimitry Andric static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
356*700637cbSDimitry Andric                                const CallExpr *E) {
357*700637cbSDimitry Andric   Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
358*700637cbSDimitry Andric   llvm::Type *ElemTy =
359*700637cbSDimitry Andric       CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
360*700637cbSDimitry Andric   return CGF.Builder.CreateCall(
361*700637cbSDimitry Andric       CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
362*700637cbSDimitry Andric       {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
363*700637cbSDimitry Andric }
364*700637cbSDimitry Andric 
MakeCpAsync(unsigned IntrinsicID,unsigned IntrinsicIDS,CodeGenFunction & CGF,const CallExpr * E,int SrcSize)365*700637cbSDimitry Andric static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
366*700637cbSDimitry Andric                           CodeGenFunction &CGF, const CallExpr *E,
367*700637cbSDimitry Andric                           int SrcSize) {
368*700637cbSDimitry Andric   return E->getNumArgs() == 3
369*700637cbSDimitry Andric              ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
370*700637cbSDimitry Andric                                       {CGF.EmitScalarExpr(E->getArg(0)),
371*700637cbSDimitry Andric                                        CGF.EmitScalarExpr(E->getArg(1)),
372*700637cbSDimitry Andric                                        CGF.EmitScalarExpr(E->getArg(2))})
373*700637cbSDimitry Andric              : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
374*700637cbSDimitry Andric                                       {CGF.EmitScalarExpr(E->getArg(0)),
375*700637cbSDimitry Andric                                        CGF.EmitScalarExpr(E->getArg(1))});
376*700637cbSDimitry Andric }
377*700637cbSDimitry Andric 
MakeHalfType(unsigned IntrinsicID,unsigned BuiltinID,const CallExpr * E,CodeGenFunction & CGF)378*700637cbSDimitry Andric static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
379*700637cbSDimitry Andric                            const CallExpr *E, CodeGenFunction &CGF) {
380*700637cbSDimitry Andric   auto &C = CGF.CGM.getContext();
381*700637cbSDimitry Andric   if (!(C.getLangOpts().NativeHalfType ||
382*700637cbSDimitry Andric         !C.getTargetInfo().useFP16ConversionIntrinsics())) {
383*700637cbSDimitry Andric     CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getQuotedName(BuiltinID) +
384*700637cbSDimitry Andric                                        " requires native half type support.");
385*700637cbSDimitry Andric     return nullptr;
386*700637cbSDimitry Andric   }
387*700637cbSDimitry Andric 
388*700637cbSDimitry Andric   if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
389*700637cbSDimitry Andric     return MakeLdg(CGF, E);
390*700637cbSDimitry Andric 
391*700637cbSDimitry Andric   if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
392*700637cbSDimitry Andric     return MakeLdu(IntrinsicID, CGF, E);
393*700637cbSDimitry Andric 
394*700637cbSDimitry Andric   SmallVector<Value *, 16> Args;
395*700637cbSDimitry Andric   auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
396*700637cbSDimitry Andric   auto *FTy = F->getFunctionType();
397*700637cbSDimitry Andric   unsigned ICEArguments = 0;
398*700637cbSDimitry Andric   ASTContext::GetBuiltinTypeError Error;
399*700637cbSDimitry Andric   C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
400*700637cbSDimitry Andric   assert(Error == ASTContext::GE_None && "Should not codegen an error");
401*700637cbSDimitry Andric   for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
402*700637cbSDimitry Andric     assert((ICEArguments & (1 << i)) == 0);
403*700637cbSDimitry Andric     auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
404*700637cbSDimitry Andric     auto *PTy = FTy->getParamType(i);
405*700637cbSDimitry Andric     if (PTy != ArgValue->getType())
406*700637cbSDimitry Andric       ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
407*700637cbSDimitry Andric     Args.push_back(ArgValue);
408*700637cbSDimitry Andric   }
409*700637cbSDimitry Andric 
410*700637cbSDimitry Andric   return CGF.Builder.CreateCall(F, Args);
411*700637cbSDimitry Andric }
412*700637cbSDimitry Andric } // namespace
413*700637cbSDimitry Andric 
EmitNVPTXBuiltinExpr(unsigned BuiltinID,const CallExpr * E)414*700637cbSDimitry Andric Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
415*700637cbSDimitry Andric                                              const CallExpr *E) {
416*700637cbSDimitry Andric   switch (BuiltinID) {
417*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_add_gen_i:
418*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_add_gen_l:
419*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_add_gen_ll:
420*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
421*700637cbSDimitry Andric 
422*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sub_gen_i:
423*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sub_gen_l:
424*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sub_gen_ll:
425*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
426*700637cbSDimitry Andric 
427*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_and_gen_i:
428*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_and_gen_l:
429*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_and_gen_ll:
430*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
431*700637cbSDimitry Andric 
432*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_or_gen_i:
433*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_or_gen_l:
434*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_or_gen_ll:
435*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
436*700637cbSDimitry Andric 
437*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_xor_gen_i:
438*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_xor_gen_l:
439*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_xor_gen_ll:
440*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
441*700637cbSDimitry Andric 
442*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_xchg_gen_i:
443*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_xchg_gen_l:
444*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
445*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
446*700637cbSDimitry Andric 
447*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_max_gen_i:
448*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_max_gen_l:
449*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_max_gen_ll:
450*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
451*700637cbSDimitry Andric 
452*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_max_gen_ui:
453*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_max_gen_ul:
454*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_max_gen_ull:
455*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
456*700637cbSDimitry Andric 
457*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_min_gen_i:
458*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_min_gen_l:
459*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_min_gen_ll:
460*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
461*700637cbSDimitry Andric 
462*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_min_gen_ui:
463*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_min_gen_ul:
464*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_min_gen_ull:
465*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
466*700637cbSDimitry Andric 
467*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cas_gen_us:
468*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cas_gen_i:
469*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cas_gen_l:
470*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cas_gen_ll:
471*700637cbSDimitry Andric     // __nvvm_atom_cas_gen_* should return the old value rather than the
472*700637cbSDimitry Andric     // success flag.
473*700637cbSDimitry Andric     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
474*700637cbSDimitry Andric 
475*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_add_gen_f:
476*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_add_gen_d: {
477*700637cbSDimitry Andric     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
478*700637cbSDimitry Andric     Value *Val = EmitScalarExpr(E->getArg(1));
479*700637cbSDimitry Andric 
480*700637cbSDimitry Andric     return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
481*700637cbSDimitry Andric                                    AtomicOrdering::SequentiallyConsistent);
482*700637cbSDimitry Andric   }
483*700637cbSDimitry Andric 
484*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_inc_gen_ui:
485*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UIncWrap, E);
486*700637cbSDimitry Andric 
487*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_dec_gen_ui:
488*700637cbSDimitry Andric     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UDecWrap, E);
489*700637cbSDimitry Andric 
490*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_c:
491*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_sc:
492*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_c2:
493*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_sc2:
494*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_c4:
495*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_sc4:
496*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_s:
497*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_s2:
498*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_s4:
499*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_i:
500*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_i2:
501*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_i4:
502*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_l:
503*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_l2:
504*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_ll:
505*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_ll2:
506*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_uc:
507*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_uc2:
508*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_uc4:
509*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_us:
510*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_us2:
511*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_us4:
512*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_ui:
513*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_ui2:
514*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_ui4:
515*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_ul:
516*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_ul2:
517*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_ull:
518*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_ull2:
519*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_f:
520*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_f2:
521*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_f4:
522*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_d:
523*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_d2:
524*700637cbSDimitry Andric     // PTX Interoperability section 2.2: "For a vector with an even number of
525*700637cbSDimitry Andric     // elements, its alignment is set to number of elements times the alignment
526*700637cbSDimitry Andric     // of its member: n*alignof(t)."
527*700637cbSDimitry Andric     return MakeLdg(*this, E);
528*700637cbSDimitry Andric 
529*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_c:
530*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_sc:
531*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_c2:
532*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_sc2:
533*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_c4:
534*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_sc4:
535*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_s:
536*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_s2:
537*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_s4:
538*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_i:
539*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_i2:
540*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_i4:
541*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_l:
542*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_l2:
543*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_ll:
544*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_ll2:
545*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_uc:
546*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_uc2:
547*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_uc4:
548*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_us:
549*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_us2:
550*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_us4:
551*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_ui:
552*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_ui2:
553*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_ui4:
554*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_ul:
555*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_ul2:
556*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_ull:
557*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_ull2:
558*700637cbSDimitry Andric     return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
559*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_f:
560*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_f2:
561*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_f4:
562*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_d:
563*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_d2:
564*700637cbSDimitry Andric     return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
565*700637cbSDimitry Andric 
566*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
567*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
568*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
569*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
570*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
571*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
572*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
573*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
574*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
575*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
576*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
577*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
578*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
579*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
580*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
581*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
582*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
583*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
584*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
585*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
586*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
587*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
588*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
589*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
590*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
591*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
592*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
593*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
594*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
595*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
596*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
597*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
598*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
599*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
600*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
601*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
602*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
603*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
604*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
605*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
606*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
607*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
608*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
609*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
610*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
611*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
612*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
613*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
614*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
615*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
616*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
617*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
618*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
619*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
620*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
621*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
622*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
623*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
624*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
625*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
626*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
627*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
628*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
629*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
630*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
631*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
632*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
633*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
634*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
635*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
636*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
637*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
638*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
639*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
640*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
641*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
642*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
643*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
644*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
645*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
646*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
647*700637cbSDimitry Andric     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
648*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
649*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
650*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
651*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
652*700637cbSDimitry Andric     Value *Ptr = EmitScalarExpr(E->getArg(0));
653*700637cbSDimitry Andric     llvm::Type *ElemTy =
654*700637cbSDimitry Andric         ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
655*700637cbSDimitry Andric     return Builder.CreateCall(
656*700637cbSDimitry Andric         CGM.getIntrinsic(
657*700637cbSDimitry Andric             Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
658*700637cbSDimitry Andric         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
659*700637cbSDimitry Andric   }
660*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
661*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
662*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
663*700637cbSDimitry Andric   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
664*700637cbSDimitry Andric     Value *Ptr = EmitScalarExpr(E->getArg(0));
665*700637cbSDimitry Andric     llvm::Type *ElemTy =
666*700637cbSDimitry Andric         ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
667*700637cbSDimitry Andric     return Builder.CreateCall(
668*700637cbSDimitry Andric         CGM.getIntrinsic(
669*700637cbSDimitry Andric             Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
670*700637cbSDimitry Andric         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
671*700637cbSDimitry Andric   }
672*700637cbSDimitry Andric   case NVPTX::BI__nvvm_match_all_sync_i32p:
673*700637cbSDimitry Andric   case NVPTX::BI__nvvm_match_all_sync_i64p: {
674*700637cbSDimitry Andric     Value *Mask = EmitScalarExpr(E->getArg(0));
675*700637cbSDimitry Andric     Value *Val = EmitScalarExpr(E->getArg(1));
676*700637cbSDimitry Andric     Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
677*700637cbSDimitry Andric     Value *ResultPair = Builder.CreateCall(
678*700637cbSDimitry Andric         CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
679*700637cbSDimitry Andric                              ? Intrinsic::nvvm_match_all_sync_i32p
680*700637cbSDimitry Andric                              : Intrinsic::nvvm_match_all_sync_i64p),
681*700637cbSDimitry Andric         {Mask, Val});
682*700637cbSDimitry Andric     Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
683*700637cbSDimitry Andric                                      PredOutPtr.getElementType());
684*700637cbSDimitry Andric     Builder.CreateStore(Pred, PredOutPtr);
685*700637cbSDimitry Andric     return Builder.CreateExtractValue(ResultPair, 0);
686*700637cbSDimitry Andric   }
687*700637cbSDimitry Andric 
688*700637cbSDimitry Andric   // FP MMA loads
689*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_ld_a:
690*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_ld_b:
691*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
692*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
693*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_ld_a:
694*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_ld_b:
695*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
696*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
697*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_ld_a:
698*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_ld_b:
699*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
700*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
701*700637cbSDimitry Andric   // Integer MMA loads.
702*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_a_s8:
703*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_a_u8:
704*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_b_s8:
705*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_b_u8:
706*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_ld_c:
707*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_a_s8:
708*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_a_u8:
709*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_b_s8:
710*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_b_u8:
711*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_ld_c:
712*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_a_s8:
713*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_a_u8:
714*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_b_s8:
715*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_b_u8:
716*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_ld_c:
717*700637cbSDimitry Andric   // Sub-integer MMA loads.
718*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_a_s4:
719*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_a_u4:
720*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_b_s4:
721*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_b_u4:
722*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_ld_c:
723*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
724*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
725*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_ld_c:
726*700637cbSDimitry Andric   // Double MMA loads.
727*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_ld_a:
728*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_ld_b:
729*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_ld_c:
730*700637cbSDimitry Andric   // Alternate float MMA loads.
731*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
732*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
733*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
734*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
735*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
736*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
737*700637cbSDimitry Andric   case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
738*700637cbSDimitry Andric   case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
739*700637cbSDimitry Andric   case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
740*700637cbSDimitry Andric     Address Dst = EmitPointerWithAlignment(E->getArg(0));
741*700637cbSDimitry Andric     Value *Src = EmitScalarExpr(E->getArg(1));
742*700637cbSDimitry Andric     Value *Ldm = EmitScalarExpr(E->getArg(2));
743*700637cbSDimitry Andric     std::optional<llvm::APSInt> isColMajorArg =
744*700637cbSDimitry Andric         E->getArg(3)->getIntegerConstantExpr(getContext());
745*700637cbSDimitry Andric     if (!isColMajorArg)
746*700637cbSDimitry Andric       return nullptr;
747*700637cbSDimitry Andric     bool isColMajor = isColMajorArg->getSExtValue();
748*700637cbSDimitry Andric     NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
749*700637cbSDimitry Andric     unsigned IID = isColMajor ? II.IID_col : II.IID_row;
750*700637cbSDimitry Andric     if (IID == 0)
751*700637cbSDimitry Andric       return nullptr;
752*700637cbSDimitry Andric 
753*700637cbSDimitry Andric     Value *Result =
754*700637cbSDimitry Andric         Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
755*700637cbSDimitry Andric 
756*700637cbSDimitry Andric     // Save returned values.
757*700637cbSDimitry Andric     assert(II.NumResults);
758*700637cbSDimitry Andric     if (II.NumResults == 1) {
759*700637cbSDimitry Andric       Builder.CreateAlignedStore(Result, Dst.emitRawPointer(*this),
760*700637cbSDimitry Andric                                  CharUnits::fromQuantity(4));
761*700637cbSDimitry Andric     } else {
762*700637cbSDimitry Andric       for (unsigned i = 0; i < II.NumResults; ++i) {
763*700637cbSDimitry Andric         Builder.CreateAlignedStore(
764*700637cbSDimitry Andric             Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
765*700637cbSDimitry Andric                                   Dst.getElementType()),
766*700637cbSDimitry Andric             Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),
767*700637cbSDimitry Andric                               llvm::ConstantInt::get(IntTy, i)),
768*700637cbSDimitry Andric             CharUnits::fromQuantity(4));
769*700637cbSDimitry Andric       }
770*700637cbSDimitry Andric     }
771*700637cbSDimitry Andric     return Result;
772*700637cbSDimitry Andric   }
773*700637cbSDimitry Andric 
774*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_st_c_f16:
775*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_st_c_f32:
776*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_st_c_f16:
777*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_st_c_f32:
778*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_st_c_f16:
779*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_st_c_f32:
780*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_st_c_i32:
781*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_st_c_i32:
782*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_st_c_i32:
783*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_st_c_i32:
784*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_st_c_i32:
785*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_st_c_f64:
786*700637cbSDimitry Andric   case NVPTX::BI__mma_m16n16k8_st_c_f32: {
787*700637cbSDimitry Andric     Value *Dst = EmitScalarExpr(E->getArg(0));
788*700637cbSDimitry Andric     Address Src = EmitPointerWithAlignment(E->getArg(1));
789*700637cbSDimitry Andric     Value *Ldm = EmitScalarExpr(E->getArg(2));
790*700637cbSDimitry Andric     std::optional<llvm::APSInt> isColMajorArg =
791*700637cbSDimitry Andric         E->getArg(3)->getIntegerConstantExpr(getContext());
792*700637cbSDimitry Andric     if (!isColMajorArg)
793*700637cbSDimitry Andric       return nullptr;
794*700637cbSDimitry Andric     bool isColMajor = isColMajorArg->getSExtValue();
795*700637cbSDimitry Andric     NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
796*700637cbSDimitry Andric     unsigned IID = isColMajor ? II.IID_col : II.IID_row;
797*700637cbSDimitry Andric     if (IID == 0)
798*700637cbSDimitry Andric       return nullptr;
799*700637cbSDimitry Andric     Function *Intrinsic =
800*700637cbSDimitry Andric         CGM.getIntrinsic(IID, Dst->getType());
801*700637cbSDimitry Andric     llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
802*700637cbSDimitry Andric     SmallVector<Value *, 10> Values = {Dst};
803*700637cbSDimitry Andric     for (unsigned i = 0; i < II.NumResults; ++i) {
804*700637cbSDimitry Andric       Value *V = Builder.CreateAlignedLoad(
805*700637cbSDimitry Andric           Src.getElementType(),
806*700637cbSDimitry Andric           Builder.CreateGEP(Src.getElementType(), Src.emitRawPointer(*this),
807*700637cbSDimitry Andric                             llvm::ConstantInt::get(IntTy, i)),
808*700637cbSDimitry Andric           CharUnits::fromQuantity(4));
809*700637cbSDimitry Andric       Values.push_back(Builder.CreateBitCast(V, ParamType));
810*700637cbSDimitry Andric     }
811*700637cbSDimitry Andric     Values.push_back(Ldm);
812*700637cbSDimitry Andric     Value *Result = Builder.CreateCall(Intrinsic, Values);
813*700637cbSDimitry Andric     return Result;
814*700637cbSDimitry Andric   }
815*700637cbSDimitry Andric 
816*700637cbSDimitry Andric   // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
817*700637cbSDimitry Andric   // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
818*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
819*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
820*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
821*700637cbSDimitry Andric   case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
822*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
823*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
824*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
825*700637cbSDimitry Andric   case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
826*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
827*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
828*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
829*700637cbSDimitry Andric   case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
830*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_mma_s8:
831*700637cbSDimitry Andric   case NVPTX::BI__imma_m16n16k16_mma_u8:
832*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_mma_s8:
833*700637cbSDimitry Andric   case NVPTX::BI__imma_m32n8k16_mma_u8:
834*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_mma_s8:
835*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n32k16_mma_u8:
836*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_mma_s4:
837*700637cbSDimitry Andric   case NVPTX::BI__imma_m8n8k32_mma_u4:
838*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
839*700637cbSDimitry Andric   case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
840*700637cbSDimitry Andric   case NVPTX::BI__dmma_m8n8k4_mma_f64:
841*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
842*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
843*700637cbSDimitry Andric   case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
844*700637cbSDimitry Andric   case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
845*700637cbSDimitry Andric     Address Dst = EmitPointerWithAlignment(E->getArg(0));
846*700637cbSDimitry Andric     Address SrcA = EmitPointerWithAlignment(E->getArg(1));
847*700637cbSDimitry Andric     Address SrcB = EmitPointerWithAlignment(E->getArg(2));
848*700637cbSDimitry Andric     Address SrcC = EmitPointerWithAlignment(E->getArg(3));
849*700637cbSDimitry Andric     std::optional<llvm::APSInt> LayoutArg =
850*700637cbSDimitry Andric         E->getArg(4)->getIntegerConstantExpr(getContext());
851*700637cbSDimitry Andric     if (!LayoutArg)
852*700637cbSDimitry Andric       return nullptr;
853*700637cbSDimitry Andric     int Layout = LayoutArg->getSExtValue();
854*700637cbSDimitry Andric     if (Layout < 0 || Layout > 3)
855*700637cbSDimitry Andric       return nullptr;
856*700637cbSDimitry Andric     llvm::APSInt SatfArg;
857*700637cbSDimitry Andric     if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
858*700637cbSDimitry Andric         BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
859*700637cbSDimitry Andric       SatfArg = 0;  // .b1 does not have satf argument.
860*700637cbSDimitry Andric     else if (std::optional<llvm::APSInt> OptSatfArg =
861*700637cbSDimitry Andric                  E->getArg(5)->getIntegerConstantExpr(getContext()))
862*700637cbSDimitry Andric       SatfArg = *OptSatfArg;
863*700637cbSDimitry Andric     else
864*700637cbSDimitry Andric       return nullptr;
865*700637cbSDimitry Andric     bool Satf = SatfArg.getSExtValue();
866*700637cbSDimitry Andric     NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
867*700637cbSDimitry Andric     unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
868*700637cbSDimitry Andric     if (IID == 0)  // Unsupported combination of Layout/Satf.
869*700637cbSDimitry Andric       return nullptr;
870*700637cbSDimitry Andric 
871*700637cbSDimitry Andric     SmallVector<Value *, 24> Values;
872*700637cbSDimitry Andric     Function *Intrinsic = CGM.getIntrinsic(IID);
873*700637cbSDimitry Andric     llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
874*700637cbSDimitry Andric     // Load A
875*700637cbSDimitry Andric     for (unsigned i = 0; i < MI.NumEltsA; ++i) {
876*700637cbSDimitry Andric       Value *V = Builder.CreateAlignedLoad(
877*700637cbSDimitry Andric           SrcA.getElementType(),
878*700637cbSDimitry Andric           Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
879*700637cbSDimitry Andric                             llvm::ConstantInt::get(IntTy, i)),
880*700637cbSDimitry Andric           CharUnits::fromQuantity(4));
881*700637cbSDimitry Andric       Values.push_back(Builder.CreateBitCast(V, AType));
882*700637cbSDimitry Andric     }
883*700637cbSDimitry Andric     // Load B
884*700637cbSDimitry Andric     llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
885*700637cbSDimitry Andric     for (unsigned i = 0; i < MI.NumEltsB; ++i) {
886*700637cbSDimitry Andric       Value *V = Builder.CreateAlignedLoad(
887*700637cbSDimitry Andric           SrcB.getElementType(),
888*700637cbSDimitry Andric           Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
889*700637cbSDimitry Andric                             llvm::ConstantInt::get(IntTy, i)),
890*700637cbSDimitry Andric           CharUnits::fromQuantity(4));
891*700637cbSDimitry Andric       Values.push_back(Builder.CreateBitCast(V, BType));
892*700637cbSDimitry Andric     }
893*700637cbSDimitry Andric     // Load C
894*700637cbSDimitry Andric     llvm::Type *CType =
895*700637cbSDimitry Andric         Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
896*700637cbSDimitry Andric     for (unsigned i = 0; i < MI.NumEltsC; ++i) {
897*700637cbSDimitry Andric       Value *V = Builder.CreateAlignedLoad(
898*700637cbSDimitry Andric           SrcC.getElementType(),
899*700637cbSDimitry Andric           Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
900*700637cbSDimitry Andric                             llvm::ConstantInt::get(IntTy, i)),
901*700637cbSDimitry Andric           CharUnits::fromQuantity(4));
902*700637cbSDimitry Andric       Values.push_back(Builder.CreateBitCast(V, CType));
903*700637cbSDimitry Andric     }
904*700637cbSDimitry Andric     Value *Result = Builder.CreateCall(Intrinsic, Values);
905*700637cbSDimitry Andric     llvm::Type *DType = Dst.getElementType();
906*700637cbSDimitry Andric     for (unsigned i = 0; i < MI.NumEltsD; ++i)
907*700637cbSDimitry Andric       Builder.CreateAlignedStore(
908*700637cbSDimitry Andric           Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
909*700637cbSDimitry Andric           Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),
910*700637cbSDimitry Andric                             llvm::ConstantInt::get(IntTy, i)),
911*700637cbSDimitry Andric           CharUnits::fromQuantity(4));
912*700637cbSDimitry Andric     return Result;
913*700637cbSDimitry Andric   }
914*700637cbSDimitry Andric   // The following builtins require half type support
915*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ex2_approx_f16:
916*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
917*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ex2_approx_f16x2:
918*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
919*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ff2f16x2_rn:
920*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
921*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
922*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
923*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ff2f16x2_rz:
924*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
925*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
926*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
927*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_f16:
928*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
929*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_f16x2:
930*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
931*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_ftz_f16:
932*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
933*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
934*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
935*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
936*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
937*700637cbSDimitry Andric                         *this);
938*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
939*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
940*700637cbSDimitry Andric                         *this);
941*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
942*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
943*700637cbSDimitry Andric                         *this);
944*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
945*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
946*700637cbSDimitry Andric                         *this);
947*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_relu_f16:
948*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
949*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
950*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
951*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_sat_f16:
952*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
953*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
954*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
955*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_f16:
956*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
957*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_f16x2:
958*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
959*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_ftz_f16:
960*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
961*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_ftz_f16x2:
962*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
963*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
964*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
965*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
966*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
967*700637cbSDimitry Andric                         *this);
968*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
969*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
970*700637cbSDimitry Andric                         E, *this);
971*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
972*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
973*700637cbSDimitry Andric                         BuiltinID, E, *this);
974*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
975*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
976*700637cbSDimitry Andric                         *this);
977*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
978*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
979*700637cbSDimitry Andric                         E, *this);
980*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_nan_f16:
981*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
982*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_nan_f16x2:
983*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
984*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
985*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
986*700637cbSDimitry Andric                         *this);
987*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
988*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
989*700637cbSDimitry Andric                         E, *this);
990*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
991*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
992*700637cbSDimitry Andric                         *this);
993*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
994*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
995*700637cbSDimitry Andric                         *this);
996*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_f16:
997*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
998*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_f16x2:
999*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
1000*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_ftz_f16:
1001*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
1002*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_ftz_f16x2:
1003*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
1004*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
1005*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
1006*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
1007*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
1008*700637cbSDimitry Andric                         *this);
1009*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
1010*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
1011*700637cbSDimitry Andric                         E, *this);
1012*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
1013*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
1014*700637cbSDimitry Andric                         BuiltinID, E, *this);
1015*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
1016*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
1017*700637cbSDimitry Andric                         *this);
1018*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
1019*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
1020*700637cbSDimitry Andric                         E, *this);
1021*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_nan_f16:
1022*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
1023*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_nan_f16x2:
1024*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
1025*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
1026*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
1027*700637cbSDimitry Andric                         *this);
1028*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
1029*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
1030*700637cbSDimitry Andric                         E, *this);
1031*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
1032*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
1033*700637cbSDimitry Andric                         *this);
1034*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
1035*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
1036*700637cbSDimitry Andric                         *this);
1037*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fabs_f:
1038*700637cbSDimitry Andric   case NVPTX::BI__nvvm_abs_bf16:
1039*700637cbSDimitry Andric   case NVPTX::BI__nvvm_abs_bf16x2:
1040*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fabs_f16:
1041*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fabs_f16x2:
1042*700637cbSDimitry Andric     return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs,
1043*700637cbSDimitry Andric                                         EmitScalarExpr(E->getArg(0)));
1044*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fabs_ftz_f:
1045*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fabs_ftz_f16:
1046*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fabs_ftz_f16x2:
1047*700637cbSDimitry Andric     return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs_ftz,
1048*700637cbSDimitry Andric                                         EmitScalarExpr(E->getArg(0)));
1049*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fabs_d:
1050*700637cbSDimitry Andric     return Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
1051*700637cbSDimitry Andric                                         EmitScalarExpr(E->getArg(0)));
1052*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_h:
1053*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldg_h2:
1054*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
1055*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_h:
1056*700637cbSDimitry Andric   case NVPTX::BI__nvvm_ldu_h2:
1057*700637cbSDimitry Andric     return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
1058*700637cbSDimitry Andric   case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
1059*700637cbSDimitry Andric     return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
1060*700637cbSDimitry Andric                        Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
1061*700637cbSDimitry Andric                        4);
1062*700637cbSDimitry Andric   case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
1063*700637cbSDimitry Andric     return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
1064*700637cbSDimitry Andric                        Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
1065*700637cbSDimitry Andric                        8);
1066*700637cbSDimitry Andric   case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
1067*700637cbSDimitry Andric     return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
1068*700637cbSDimitry Andric                        Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
1069*700637cbSDimitry Andric                        16);
1070*700637cbSDimitry Andric   case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
1071*700637cbSDimitry Andric     return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
1072*700637cbSDimitry Andric                        Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
1073*700637cbSDimitry Andric                        16);
1074*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
1075*700637cbSDimitry Andric     return Builder.CreateCall(
1076*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
1077*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
1078*700637cbSDimitry Andric     return Builder.CreateCall(
1079*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
1080*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
1081*700637cbSDimitry Andric     return Builder.CreateCall(
1082*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
1083*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
1084*700637cbSDimitry Andric     return Builder.CreateCall(
1085*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
1086*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
1087*700637cbSDimitry Andric     return Builder.CreateCall(
1088*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
1089*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
1090*700637cbSDimitry Andric     return Builder.CreateCall(
1091*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
1092*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
1093*700637cbSDimitry Andric     return Builder.CreateCall(
1094*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
1095*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
1096*700637cbSDimitry Andric     return Builder.CreateCall(
1097*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
1098*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
1099*700637cbSDimitry Andric     return Builder.CreateCall(
1100*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
1101*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
1102*700637cbSDimitry Andric     return Builder.CreateCall(
1103*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
1104*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
1105*700637cbSDimitry Andric     return Builder.CreateCall(
1106*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
1107*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
1108*700637cbSDimitry Andric     return Builder.CreateCall(
1109*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
1110*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
1111*700637cbSDimitry Andric     return Builder.CreateCall(
1112*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
1113*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
1114*700637cbSDimitry Andric     return Builder.CreateCall(
1115*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
1116*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
1117*700637cbSDimitry Andric     return Builder.CreateCall(
1118*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
1119*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
1120*700637cbSDimitry Andric     return Builder.CreateCall(
1121*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
1122*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
1123*700637cbSDimitry Andric     return Builder.CreateCall(
1124*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
1125*700637cbSDimitry Andric   case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
1126*700637cbSDimitry Andric     return Builder.CreateCall(
1127*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
1128*700637cbSDimitry Andric   case NVPTX::BI__nvvm_is_explicit_cluster:
1129*700637cbSDimitry Andric     return Builder.CreateCall(
1130*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
1131*700637cbSDimitry Andric   case NVPTX::BI__nvvm_isspacep_shared_cluster:
1132*700637cbSDimitry Andric     return Builder.CreateCall(
1133*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
1134*700637cbSDimitry Andric         EmitScalarExpr(E->getArg(0)));
1135*700637cbSDimitry Andric   case NVPTX::BI__nvvm_mapa:
1136*700637cbSDimitry Andric     return Builder.CreateCall(
1137*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_mapa),
1138*700637cbSDimitry Andric         {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1139*700637cbSDimitry Andric   case NVPTX::BI__nvvm_mapa_shared_cluster:
1140*700637cbSDimitry Andric     return Builder.CreateCall(
1141*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
1142*700637cbSDimitry Andric         {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1143*700637cbSDimitry Andric   case NVPTX::BI__nvvm_getctarank:
1144*700637cbSDimitry Andric     return Builder.CreateCall(
1145*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
1146*700637cbSDimitry Andric         EmitScalarExpr(E->getArg(0)));
1147*700637cbSDimitry Andric   case NVPTX::BI__nvvm_getctarank_shared_cluster:
1148*700637cbSDimitry Andric     return Builder.CreateCall(
1149*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
1150*700637cbSDimitry Andric         EmitScalarExpr(E->getArg(0)));
1151*700637cbSDimitry Andric   case NVPTX::BI__nvvm_barrier_cluster_arrive:
1152*700637cbSDimitry Andric     return Builder.CreateCall(
1153*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
1154*700637cbSDimitry Andric   case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
1155*700637cbSDimitry Andric     return Builder.CreateCall(
1156*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
1157*700637cbSDimitry Andric   case NVPTX::BI__nvvm_barrier_cluster_wait:
1158*700637cbSDimitry Andric     return Builder.CreateCall(
1159*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
1160*700637cbSDimitry Andric   case NVPTX::BI__nvvm_fence_sc_cluster:
1161*700637cbSDimitry Andric     return Builder.CreateCall(
1162*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
1163*700637cbSDimitry Andric   case NVPTX::BI__nvvm_bar_sync:
1164*700637cbSDimitry Andric     return Builder.CreateCall(
1165*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1166*700637cbSDimitry Andric         EmitScalarExpr(E->getArg(0)));
1167*700637cbSDimitry Andric   case NVPTX::BI__syncthreads:
1168*700637cbSDimitry Andric     return Builder.CreateCall(
1169*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1170*700637cbSDimitry Andric         Builder.getInt32(0));
1171*700637cbSDimitry Andric   case NVPTX::BI__nvvm_barrier_sync:
1172*700637cbSDimitry Andric     return Builder.CreateCall(
1173*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all),
1174*700637cbSDimitry Andric         EmitScalarExpr(E->getArg(0)));
1175*700637cbSDimitry Andric   case NVPTX::BI__nvvm_barrier_sync_cnt:
1176*700637cbSDimitry Andric     return Builder.CreateCall(
1177*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count),
1178*700637cbSDimitry Andric         {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1179*700637cbSDimitry Andric   default:
1180*700637cbSDimitry Andric     return nullptr;
1181*700637cbSDimitry Andric   }
1182*700637cbSDimitry Andric }
1183