1 //===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the definitions of the enumerations and flags
11 /// associated with NVVM Intrinsics, along with some helper functions.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_IR_NVVMINTRINSICUTILS_H
16 #define LLVM_IR_NVVMINTRINSICUTILS_H
17
18 #include <stdint.h>
19
20 #include "llvm/ADT/APFloat.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/IntrinsicsNVPTX.h"
23
24 namespace llvm {
25 namespace nvvm {
26
27 // Reduction Ops supported with TMA Copy from Shared
28 // to Global Memory for the "cp.reduce.async.bulk.tensor.*"
29 // family of PTX instructions.
30 enum class TMAReductionOp : uint8_t {
31 ADD = 0,
32 MIN = 1,
33 MAX = 2,
34 INC = 3,
35 DEC = 4,
36 AND = 5,
37 OR = 6,
38 XOR = 7,
39 };
40
41 // Enum to represent the cta_group::1 and
42 // cta_group::2 variants in TMA/TCGEN05 family of
43 // PTX instructions.
44 enum class CTAGroupKind : uint8_t {
45 CG_NONE = 0, // default with no cta_group modifier
46 CG_1 = 1, // cta_group::1 modifier
47 CG_2 = 2, // cta_group::2 modifier
48 };
49
FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)50 inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
51 switch (IntrinsicID) {
52 case Intrinsic::nvvm_f2i_rm_ftz:
53 case Intrinsic::nvvm_f2i_rn_ftz:
54 case Intrinsic::nvvm_f2i_rp_ftz:
55 case Intrinsic::nvvm_f2i_rz_ftz:
56
57 case Intrinsic::nvvm_f2ui_rm_ftz:
58 case Intrinsic::nvvm_f2ui_rn_ftz:
59 case Intrinsic::nvvm_f2ui_rp_ftz:
60 case Intrinsic::nvvm_f2ui_rz_ftz:
61
62 case Intrinsic::nvvm_f2ll_rm_ftz:
63 case Intrinsic::nvvm_f2ll_rn_ftz:
64 case Intrinsic::nvvm_f2ll_rp_ftz:
65 case Intrinsic::nvvm_f2ll_rz_ftz:
66
67 case Intrinsic::nvvm_f2ull_rm_ftz:
68 case Intrinsic::nvvm_f2ull_rn_ftz:
69 case Intrinsic::nvvm_f2ull_rp_ftz:
70 case Intrinsic::nvvm_f2ull_rz_ftz:
71 return true;
72
73 case Intrinsic::nvvm_f2i_rm:
74 case Intrinsic::nvvm_f2i_rn:
75 case Intrinsic::nvvm_f2i_rp:
76 case Intrinsic::nvvm_f2i_rz:
77
78 case Intrinsic::nvvm_f2ui_rm:
79 case Intrinsic::nvvm_f2ui_rn:
80 case Intrinsic::nvvm_f2ui_rp:
81 case Intrinsic::nvvm_f2ui_rz:
82
83 case Intrinsic::nvvm_d2i_rm:
84 case Intrinsic::nvvm_d2i_rn:
85 case Intrinsic::nvvm_d2i_rp:
86 case Intrinsic::nvvm_d2i_rz:
87
88 case Intrinsic::nvvm_d2ui_rm:
89 case Intrinsic::nvvm_d2ui_rn:
90 case Intrinsic::nvvm_d2ui_rp:
91 case Intrinsic::nvvm_d2ui_rz:
92
93 case Intrinsic::nvvm_f2ll_rm:
94 case Intrinsic::nvvm_f2ll_rn:
95 case Intrinsic::nvvm_f2ll_rp:
96 case Intrinsic::nvvm_f2ll_rz:
97
98 case Intrinsic::nvvm_f2ull_rm:
99 case Intrinsic::nvvm_f2ull_rn:
100 case Intrinsic::nvvm_f2ull_rp:
101 case Intrinsic::nvvm_f2ull_rz:
102
103 case Intrinsic::nvvm_d2ll_rm:
104 case Intrinsic::nvvm_d2ll_rn:
105 case Intrinsic::nvvm_d2ll_rp:
106 case Intrinsic::nvvm_d2ll_rz:
107
108 case Intrinsic::nvvm_d2ull_rm:
109 case Intrinsic::nvvm_d2ull_rn:
110 case Intrinsic::nvvm_d2ull_rp:
111 case Intrinsic::nvvm_d2ull_rz:
112 return false;
113 }
114 llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
115 return false;
116 }
117
FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)118 inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
119 switch (IntrinsicID) {
120 // f2i
121 case Intrinsic::nvvm_f2i_rm:
122 case Intrinsic::nvvm_f2i_rm_ftz:
123 case Intrinsic::nvvm_f2i_rn:
124 case Intrinsic::nvvm_f2i_rn_ftz:
125 case Intrinsic::nvvm_f2i_rp:
126 case Intrinsic::nvvm_f2i_rp_ftz:
127 case Intrinsic::nvvm_f2i_rz:
128 case Intrinsic::nvvm_f2i_rz_ftz:
129 // d2i
130 case Intrinsic::nvvm_d2i_rm:
131 case Intrinsic::nvvm_d2i_rn:
132 case Intrinsic::nvvm_d2i_rp:
133 case Intrinsic::nvvm_d2i_rz:
134 // f2ll
135 case Intrinsic::nvvm_f2ll_rm:
136 case Intrinsic::nvvm_f2ll_rm_ftz:
137 case Intrinsic::nvvm_f2ll_rn:
138 case Intrinsic::nvvm_f2ll_rn_ftz:
139 case Intrinsic::nvvm_f2ll_rp:
140 case Intrinsic::nvvm_f2ll_rp_ftz:
141 case Intrinsic::nvvm_f2ll_rz:
142 case Intrinsic::nvvm_f2ll_rz_ftz:
143 // d2ll
144 case Intrinsic::nvvm_d2ll_rm:
145 case Intrinsic::nvvm_d2ll_rn:
146 case Intrinsic::nvvm_d2ll_rp:
147 case Intrinsic::nvvm_d2ll_rz:
148 return true;
149
150 // f2ui
151 case Intrinsic::nvvm_f2ui_rm:
152 case Intrinsic::nvvm_f2ui_rm_ftz:
153 case Intrinsic::nvvm_f2ui_rn:
154 case Intrinsic::nvvm_f2ui_rn_ftz:
155 case Intrinsic::nvvm_f2ui_rp:
156 case Intrinsic::nvvm_f2ui_rp_ftz:
157 case Intrinsic::nvvm_f2ui_rz:
158 case Intrinsic::nvvm_f2ui_rz_ftz:
159 // d2ui
160 case Intrinsic::nvvm_d2ui_rm:
161 case Intrinsic::nvvm_d2ui_rn:
162 case Intrinsic::nvvm_d2ui_rp:
163 case Intrinsic::nvvm_d2ui_rz:
164 // f2ull
165 case Intrinsic::nvvm_f2ull_rm:
166 case Intrinsic::nvvm_f2ull_rm_ftz:
167 case Intrinsic::nvvm_f2ull_rn:
168 case Intrinsic::nvvm_f2ull_rn_ftz:
169 case Intrinsic::nvvm_f2ull_rp:
170 case Intrinsic::nvvm_f2ull_rp_ftz:
171 case Intrinsic::nvvm_f2ull_rz:
172 case Intrinsic::nvvm_f2ull_rz_ftz:
173 // d2ull
174 case Intrinsic::nvvm_d2ull_rm:
175 case Intrinsic::nvvm_d2ull_rn:
176 case Intrinsic::nvvm_d2ull_rp:
177 case Intrinsic::nvvm_d2ull_rz:
178 return false;
179 }
180 llvm_unreachable(
181 "Checking invalid f2i/d2i intrinsic for signed int conversion");
182 return false;
183 }
184
185 inline APFloat::roundingMode
GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)186 GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
187 switch (IntrinsicID) {
188 // RM:
189 case Intrinsic::nvvm_f2i_rm:
190 case Intrinsic::nvvm_f2ui_rm:
191 case Intrinsic::nvvm_f2i_rm_ftz:
192 case Intrinsic::nvvm_f2ui_rm_ftz:
193 case Intrinsic::nvvm_d2i_rm:
194 case Intrinsic::nvvm_d2ui_rm:
195
196 case Intrinsic::nvvm_f2ll_rm:
197 case Intrinsic::nvvm_f2ull_rm:
198 case Intrinsic::nvvm_f2ll_rm_ftz:
199 case Intrinsic::nvvm_f2ull_rm_ftz:
200 case Intrinsic::nvvm_d2ll_rm:
201 case Intrinsic::nvvm_d2ull_rm:
202 return APFloat::rmTowardNegative;
203
204 // RN:
205 case Intrinsic::nvvm_f2i_rn:
206 case Intrinsic::nvvm_f2ui_rn:
207 case Intrinsic::nvvm_f2i_rn_ftz:
208 case Intrinsic::nvvm_f2ui_rn_ftz:
209 case Intrinsic::nvvm_d2i_rn:
210 case Intrinsic::nvvm_d2ui_rn:
211
212 case Intrinsic::nvvm_f2ll_rn:
213 case Intrinsic::nvvm_f2ull_rn:
214 case Intrinsic::nvvm_f2ll_rn_ftz:
215 case Intrinsic::nvvm_f2ull_rn_ftz:
216 case Intrinsic::nvvm_d2ll_rn:
217 case Intrinsic::nvvm_d2ull_rn:
218 return APFloat::rmNearestTiesToEven;
219
220 // RP:
221 case Intrinsic::nvvm_f2i_rp:
222 case Intrinsic::nvvm_f2ui_rp:
223 case Intrinsic::nvvm_f2i_rp_ftz:
224 case Intrinsic::nvvm_f2ui_rp_ftz:
225 case Intrinsic::nvvm_d2i_rp:
226 case Intrinsic::nvvm_d2ui_rp:
227
228 case Intrinsic::nvvm_f2ll_rp:
229 case Intrinsic::nvvm_f2ull_rp:
230 case Intrinsic::nvvm_f2ll_rp_ftz:
231 case Intrinsic::nvvm_f2ull_rp_ftz:
232 case Intrinsic::nvvm_d2ll_rp:
233 case Intrinsic::nvvm_d2ull_rp:
234 return APFloat::rmTowardPositive;
235
236 // RZ:
237 case Intrinsic::nvvm_f2i_rz:
238 case Intrinsic::nvvm_f2ui_rz:
239 case Intrinsic::nvvm_f2i_rz_ftz:
240 case Intrinsic::nvvm_f2ui_rz_ftz:
241 case Intrinsic::nvvm_d2i_rz:
242 case Intrinsic::nvvm_d2ui_rz:
243
244 case Intrinsic::nvvm_f2ll_rz:
245 case Intrinsic::nvvm_f2ull_rz:
246 case Intrinsic::nvvm_f2ll_rz_ftz:
247 case Intrinsic::nvvm_f2ull_rz_ftz:
248 case Intrinsic::nvvm_d2ll_rz:
249 case Intrinsic::nvvm_d2ull_rz:
250 return APFloat::rmTowardZero;
251 }
252 llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
253 return APFloat::roundingMode::Invalid;
254 }
255
FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)256 inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
257 switch (IntrinsicID) {
258 case Intrinsic::nvvm_fmax_ftz_f:
259 case Intrinsic::nvvm_fmax_ftz_nan_f:
260 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
261 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
262
263 case Intrinsic::nvvm_fmin_ftz_f:
264 case Intrinsic::nvvm_fmin_ftz_nan_f:
265 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
266 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
267 return true;
268
269 case Intrinsic::nvvm_fmax_d:
270 case Intrinsic::nvvm_fmax_f:
271 case Intrinsic::nvvm_fmax_nan_f:
272 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
273 case Intrinsic::nvvm_fmax_xorsign_abs_f:
274
275 case Intrinsic::nvvm_fmin_d:
276 case Intrinsic::nvvm_fmin_f:
277 case Intrinsic::nvvm_fmin_nan_f:
278 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
279 case Intrinsic::nvvm_fmin_xorsign_abs_f:
280 return false;
281 }
282 llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
283 return false;
284 }
285
FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)286 inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
287 switch (IntrinsicID) {
288 case Intrinsic::nvvm_fmax_ftz_nan_f:
289 case Intrinsic::nvvm_fmax_nan_f:
290 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
291 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
292
293 case Intrinsic::nvvm_fmin_ftz_nan_f:
294 case Intrinsic::nvvm_fmin_nan_f:
295 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
296 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
297 return true;
298
299 case Intrinsic::nvvm_fmax_d:
300 case Intrinsic::nvvm_fmax_f:
301 case Intrinsic::nvvm_fmax_ftz_f:
302 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
303 case Intrinsic::nvvm_fmax_xorsign_abs_f:
304
305 case Intrinsic::nvvm_fmin_d:
306 case Intrinsic::nvvm_fmin_f:
307 case Intrinsic::nvvm_fmin_ftz_f:
308 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
309 case Intrinsic::nvvm_fmin_xorsign_abs_f:
310 return false;
311 }
312 llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
313 return false;
314 }
315
FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)316 inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
317 switch (IntrinsicID) {
318 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
319 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
320 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
321 case Intrinsic::nvvm_fmax_xorsign_abs_f:
322
323 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
324 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
325 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
326 case Intrinsic::nvvm_fmin_xorsign_abs_f:
327 return true;
328
329 case Intrinsic::nvvm_fmax_d:
330 case Intrinsic::nvvm_fmax_f:
331 case Intrinsic::nvvm_fmax_ftz_f:
332 case Intrinsic::nvvm_fmax_ftz_nan_f:
333 case Intrinsic::nvvm_fmax_nan_f:
334
335 case Intrinsic::nvvm_fmin_d:
336 case Intrinsic::nvvm_fmin_f:
337 case Intrinsic::nvvm_fmin_ftz_f:
338 case Intrinsic::nvvm_fmin_ftz_nan_f:
339 case Intrinsic::nvvm_fmin_nan_f:
340 return false;
341 }
342 llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
343 return false;
344 }
345
346 } // namespace nvvm
347 } // namespace llvm
348 #endif // LLVM_IR_NVVMINTRINSICUTILS_H
349