xref: /freebsd/contrib/llvm-project/llvm/include/llvm/IR/NVVMIntrinsicUtils.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the definitions of the enumerations and flags
11 /// associated with NVVM Intrinsics, along with some helper functions.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_IR_NVVMINTRINSICUTILS_H
16 #define LLVM_IR_NVVMINTRINSICUTILS_H
17 
18 #include <stdint.h>
19 
20 #include "llvm/ADT/APFloat.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/IntrinsicsNVPTX.h"
23 
24 namespace llvm {
25 namespace nvvm {
26 
27 // Reduction Ops supported with TMA Copy from Shared
28 // to Global Memory for the "cp.reduce.async.bulk.tensor.*"
29 // family of PTX instructions.
30 enum class TMAReductionOp : uint8_t {
31   ADD = 0,
32   MIN = 1,
33   MAX = 2,
34   INC = 3,
35   DEC = 4,
36   AND = 5,
37   OR = 6,
38   XOR = 7,
39 };
40 
41 // Enum to represent the cta_group::1 and
42 // cta_group::2 variants in TMA/TCGEN05 family of
43 // PTX instructions.
44 enum class CTAGroupKind : uint8_t {
45   CG_NONE = 0, // default with no cta_group modifier
46   CG_1 = 1,    // cta_group::1 modifier
47   CG_2 = 2,    // cta_group::2 modifier
48 };
49 
FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)50 inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
51   switch (IntrinsicID) {
52   case Intrinsic::nvvm_f2i_rm_ftz:
53   case Intrinsic::nvvm_f2i_rn_ftz:
54   case Intrinsic::nvvm_f2i_rp_ftz:
55   case Intrinsic::nvvm_f2i_rz_ftz:
56 
57   case Intrinsic::nvvm_f2ui_rm_ftz:
58   case Intrinsic::nvvm_f2ui_rn_ftz:
59   case Intrinsic::nvvm_f2ui_rp_ftz:
60   case Intrinsic::nvvm_f2ui_rz_ftz:
61 
62   case Intrinsic::nvvm_f2ll_rm_ftz:
63   case Intrinsic::nvvm_f2ll_rn_ftz:
64   case Intrinsic::nvvm_f2ll_rp_ftz:
65   case Intrinsic::nvvm_f2ll_rz_ftz:
66 
67   case Intrinsic::nvvm_f2ull_rm_ftz:
68   case Intrinsic::nvvm_f2ull_rn_ftz:
69   case Intrinsic::nvvm_f2ull_rp_ftz:
70   case Intrinsic::nvvm_f2ull_rz_ftz:
71     return true;
72 
73   case Intrinsic::nvvm_f2i_rm:
74   case Intrinsic::nvvm_f2i_rn:
75   case Intrinsic::nvvm_f2i_rp:
76   case Intrinsic::nvvm_f2i_rz:
77 
78   case Intrinsic::nvvm_f2ui_rm:
79   case Intrinsic::nvvm_f2ui_rn:
80   case Intrinsic::nvvm_f2ui_rp:
81   case Intrinsic::nvvm_f2ui_rz:
82 
83   case Intrinsic::nvvm_d2i_rm:
84   case Intrinsic::nvvm_d2i_rn:
85   case Intrinsic::nvvm_d2i_rp:
86   case Intrinsic::nvvm_d2i_rz:
87 
88   case Intrinsic::nvvm_d2ui_rm:
89   case Intrinsic::nvvm_d2ui_rn:
90   case Intrinsic::nvvm_d2ui_rp:
91   case Intrinsic::nvvm_d2ui_rz:
92 
93   case Intrinsic::nvvm_f2ll_rm:
94   case Intrinsic::nvvm_f2ll_rn:
95   case Intrinsic::nvvm_f2ll_rp:
96   case Intrinsic::nvvm_f2ll_rz:
97 
98   case Intrinsic::nvvm_f2ull_rm:
99   case Intrinsic::nvvm_f2ull_rn:
100   case Intrinsic::nvvm_f2ull_rp:
101   case Intrinsic::nvvm_f2ull_rz:
102 
103   case Intrinsic::nvvm_d2ll_rm:
104   case Intrinsic::nvvm_d2ll_rn:
105   case Intrinsic::nvvm_d2ll_rp:
106   case Intrinsic::nvvm_d2ll_rz:
107 
108   case Intrinsic::nvvm_d2ull_rm:
109   case Intrinsic::nvvm_d2ull_rn:
110   case Intrinsic::nvvm_d2ull_rp:
111   case Intrinsic::nvvm_d2ull_rz:
112     return false;
113   }
114   llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
115   return false;
116 }
117 
FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)118 inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
119   switch (IntrinsicID) {
120   // f2i
121   case Intrinsic::nvvm_f2i_rm:
122   case Intrinsic::nvvm_f2i_rm_ftz:
123   case Intrinsic::nvvm_f2i_rn:
124   case Intrinsic::nvvm_f2i_rn_ftz:
125   case Intrinsic::nvvm_f2i_rp:
126   case Intrinsic::nvvm_f2i_rp_ftz:
127   case Intrinsic::nvvm_f2i_rz:
128   case Intrinsic::nvvm_f2i_rz_ftz:
129   // d2i
130   case Intrinsic::nvvm_d2i_rm:
131   case Intrinsic::nvvm_d2i_rn:
132   case Intrinsic::nvvm_d2i_rp:
133   case Intrinsic::nvvm_d2i_rz:
134   // f2ll
135   case Intrinsic::nvvm_f2ll_rm:
136   case Intrinsic::nvvm_f2ll_rm_ftz:
137   case Intrinsic::nvvm_f2ll_rn:
138   case Intrinsic::nvvm_f2ll_rn_ftz:
139   case Intrinsic::nvvm_f2ll_rp:
140   case Intrinsic::nvvm_f2ll_rp_ftz:
141   case Intrinsic::nvvm_f2ll_rz:
142   case Intrinsic::nvvm_f2ll_rz_ftz:
143   // d2ll
144   case Intrinsic::nvvm_d2ll_rm:
145   case Intrinsic::nvvm_d2ll_rn:
146   case Intrinsic::nvvm_d2ll_rp:
147   case Intrinsic::nvvm_d2ll_rz:
148     return true;
149 
150   // f2ui
151   case Intrinsic::nvvm_f2ui_rm:
152   case Intrinsic::nvvm_f2ui_rm_ftz:
153   case Intrinsic::nvvm_f2ui_rn:
154   case Intrinsic::nvvm_f2ui_rn_ftz:
155   case Intrinsic::nvvm_f2ui_rp:
156   case Intrinsic::nvvm_f2ui_rp_ftz:
157   case Intrinsic::nvvm_f2ui_rz:
158   case Intrinsic::nvvm_f2ui_rz_ftz:
159   // d2ui
160   case Intrinsic::nvvm_d2ui_rm:
161   case Intrinsic::nvvm_d2ui_rn:
162   case Intrinsic::nvvm_d2ui_rp:
163   case Intrinsic::nvvm_d2ui_rz:
164   // f2ull
165   case Intrinsic::nvvm_f2ull_rm:
166   case Intrinsic::nvvm_f2ull_rm_ftz:
167   case Intrinsic::nvvm_f2ull_rn:
168   case Intrinsic::nvvm_f2ull_rn_ftz:
169   case Intrinsic::nvvm_f2ull_rp:
170   case Intrinsic::nvvm_f2ull_rp_ftz:
171   case Intrinsic::nvvm_f2ull_rz:
172   case Intrinsic::nvvm_f2ull_rz_ftz:
173   // d2ull
174   case Intrinsic::nvvm_d2ull_rm:
175   case Intrinsic::nvvm_d2ull_rn:
176   case Intrinsic::nvvm_d2ull_rp:
177   case Intrinsic::nvvm_d2ull_rz:
178     return false;
179   }
180   llvm_unreachable(
181       "Checking invalid f2i/d2i intrinsic for signed int conversion");
182   return false;
183 }
184 
185 inline APFloat::roundingMode
GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)186 GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
187   switch (IntrinsicID) {
188   // RM:
189   case Intrinsic::nvvm_f2i_rm:
190   case Intrinsic::nvvm_f2ui_rm:
191   case Intrinsic::nvvm_f2i_rm_ftz:
192   case Intrinsic::nvvm_f2ui_rm_ftz:
193   case Intrinsic::nvvm_d2i_rm:
194   case Intrinsic::nvvm_d2ui_rm:
195 
196   case Intrinsic::nvvm_f2ll_rm:
197   case Intrinsic::nvvm_f2ull_rm:
198   case Intrinsic::nvvm_f2ll_rm_ftz:
199   case Intrinsic::nvvm_f2ull_rm_ftz:
200   case Intrinsic::nvvm_d2ll_rm:
201   case Intrinsic::nvvm_d2ull_rm:
202     return APFloat::rmTowardNegative;
203 
204   // RN:
205   case Intrinsic::nvvm_f2i_rn:
206   case Intrinsic::nvvm_f2ui_rn:
207   case Intrinsic::nvvm_f2i_rn_ftz:
208   case Intrinsic::nvvm_f2ui_rn_ftz:
209   case Intrinsic::nvvm_d2i_rn:
210   case Intrinsic::nvvm_d2ui_rn:
211 
212   case Intrinsic::nvvm_f2ll_rn:
213   case Intrinsic::nvvm_f2ull_rn:
214   case Intrinsic::nvvm_f2ll_rn_ftz:
215   case Intrinsic::nvvm_f2ull_rn_ftz:
216   case Intrinsic::nvvm_d2ll_rn:
217   case Intrinsic::nvvm_d2ull_rn:
218     return APFloat::rmNearestTiesToEven;
219 
220   // RP:
221   case Intrinsic::nvvm_f2i_rp:
222   case Intrinsic::nvvm_f2ui_rp:
223   case Intrinsic::nvvm_f2i_rp_ftz:
224   case Intrinsic::nvvm_f2ui_rp_ftz:
225   case Intrinsic::nvvm_d2i_rp:
226   case Intrinsic::nvvm_d2ui_rp:
227 
228   case Intrinsic::nvvm_f2ll_rp:
229   case Intrinsic::nvvm_f2ull_rp:
230   case Intrinsic::nvvm_f2ll_rp_ftz:
231   case Intrinsic::nvvm_f2ull_rp_ftz:
232   case Intrinsic::nvvm_d2ll_rp:
233   case Intrinsic::nvvm_d2ull_rp:
234     return APFloat::rmTowardPositive;
235 
236   // RZ:
237   case Intrinsic::nvvm_f2i_rz:
238   case Intrinsic::nvvm_f2ui_rz:
239   case Intrinsic::nvvm_f2i_rz_ftz:
240   case Intrinsic::nvvm_f2ui_rz_ftz:
241   case Intrinsic::nvvm_d2i_rz:
242   case Intrinsic::nvvm_d2ui_rz:
243 
244   case Intrinsic::nvvm_f2ll_rz:
245   case Intrinsic::nvvm_f2ull_rz:
246   case Intrinsic::nvvm_f2ll_rz_ftz:
247   case Intrinsic::nvvm_f2ull_rz_ftz:
248   case Intrinsic::nvvm_d2ll_rz:
249   case Intrinsic::nvvm_d2ull_rz:
250     return APFloat::rmTowardZero;
251   }
252   llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
253   return APFloat::roundingMode::Invalid;
254 }
255 
FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)256 inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
257   switch (IntrinsicID) {
258   case Intrinsic::nvvm_fmax_ftz_f:
259   case Intrinsic::nvvm_fmax_ftz_nan_f:
260   case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
261   case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
262 
263   case Intrinsic::nvvm_fmin_ftz_f:
264   case Intrinsic::nvvm_fmin_ftz_nan_f:
265   case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
266   case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
267     return true;
268 
269   case Intrinsic::nvvm_fmax_d:
270   case Intrinsic::nvvm_fmax_f:
271   case Intrinsic::nvvm_fmax_nan_f:
272   case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
273   case Intrinsic::nvvm_fmax_xorsign_abs_f:
274 
275   case Intrinsic::nvvm_fmin_d:
276   case Intrinsic::nvvm_fmin_f:
277   case Intrinsic::nvvm_fmin_nan_f:
278   case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
279   case Intrinsic::nvvm_fmin_xorsign_abs_f:
280     return false;
281   }
282   llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
283   return false;
284 }
285 
FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)286 inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
287   switch (IntrinsicID) {
288   case Intrinsic::nvvm_fmax_ftz_nan_f:
289   case Intrinsic::nvvm_fmax_nan_f:
290   case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
291   case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
292 
293   case Intrinsic::nvvm_fmin_ftz_nan_f:
294   case Intrinsic::nvvm_fmin_nan_f:
295   case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
296   case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
297     return true;
298 
299   case Intrinsic::nvvm_fmax_d:
300   case Intrinsic::nvvm_fmax_f:
301   case Intrinsic::nvvm_fmax_ftz_f:
302   case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
303   case Intrinsic::nvvm_fmax_xorsign_abs_f:
304 
305   case Intrinsic::nvvm_fmin_d:
306   case Intrinsic::nvvm_fmin_f:
307   case Intrinsic::nvvm_fmin_ftz_f:
308   case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
309   case Intrinsic::nvvm_fmin_xorsign_abs_f:
310     return false;
311   }
312   llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
313   return false;
314 }
315 
FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)316 inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
317   switch (IntrinsicID) {
318   case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
319   case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
320   case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
321   case Intrinsic::nvvm_fmax_xorsign_abs_f:
322 
323   case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
324   case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
325   case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
326   case Intrinsic::nvvm_fmin_xorsign_abs_f:
327     return true;
328 
329   case Intrinsic::nvvm_fmax_d:
330   case Intrinsic::nvvm_fmax_f:
331   case Intrinsic::nvvm_fmax_ftz_f:
332   case Intrinsic::nvvm_fmax_ftz_nan_f:
333   case Intrinsic::nvvm_fmax_nan_f:
334 
335   case Intrinsic::nvvm_fmin_d:
336   case Intrinsic::nvvm_fmin_f:
337   case Intrinsic::nvvm_fmin_ftz_f:
338   case Intrinsic::nvvm_fmin_ftz_nan_f:
339   case Intrinsic::nvvm_fmin_nan_f:
340     return false;
341   }
342   llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
343   return false;
344 }
345 
346 } // namespace nvvm
347 } // namespace llvm
348 #endif // LLVM_IR_NVVMINTRINSICUTILS_H
349