xref: /freebsd/contrib/llvm-project/clang/lib/Headers/__clang_cuda_cmath.h (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __CLANG_CUDA_CMATH_H__
10 #define __CLANG_CUDA_CMATH_H__
11 #ifndef __CUDA__
12 #error "This file is for CUDA compilation only."
13 #endif
14 
15 #ifndef __OPENMP_NVPTX__
16 #include <limits>
17 #endif
18 
19 // CUDA lets us use various std math functions on the device side.  This file
20 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
21 //
22 // Specifically, the forward-declares header declares __device__ overloads for
23 // these functions in the global namespace, then pulls them into namespace std
24 // with 'using' statements.  Then this file implements those functions, after
25 // their implementations have been pulled in.
26 //
27 // It's important that we declare the functions in the global namespace and pull
28 // them into namespace std with using statements, as opposed to simply declaring
29 // these functions in namespace std, because our device functions need to
30 // overload the standard library functions, which may be declared in the global
31 // namespace or in std, depending on the degree of conformance of the stdlib
32 // implementation.  Declaring in the global namespace and pulling into namespace
33 // std covers all of the known knowns.
34 
35 #ifdef __OPENMP_NVPTX__
36 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
37 #else
38 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
39 #endif
40 
41 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
42 __DEVICE__ long abs(long __n) { return ::labs(__n); }
43 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
44 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
45 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
46 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
47 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
48 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
49 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
50 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
51 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
52 __DEVICE__ float exp(float __x) { return ::expf(__x); }
53 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
54 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
55 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
56 __DEVICE__ int fpclassify(float __x) {
57   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
58                               FP_ZERO, __x);
59 }
60 __DEVICE__ int fpclassify(double __x) {
61   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
62                               FP_ZERO, __x);
63 }
64 __DEVICE__ float frexp(float __arg, int *__exp) {
65   return ::frexpf(__arg, __exp);
66 }
67 
68 // For inscrutable reasons, the CUDA headers define these functions for us on
69 // Windows. For OpenMP we omit these as some old system headers have
70 // non-conforming `isinf(float)` and `isnan(float)` implementations that return
71 // an `int`. The system versions of these functions should be fine anyway.
72 #if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__)
73 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
74 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
75 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
76 // For inscrutable reasons, __finite(), the double-precision version of
77 // __finitef, does not exist when compiling for MacOS.  __isfinited is available
78 // everywhere and is just as good.
79 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
80 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
81 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
82 #endif
83 
84 __DEVICE__ bool isgreater(float __x, float __y) {
85   return __builtin_isgreater(__x, __y);
86 }
87 __DEVICE__ bool isgreater(double __x, double __y) {
88   return __builtin_isgreater(__x, __y);
89 }
90 __DEVICE__ bool isgreaterequal(float __x, float __y) {
91   return __builtin_isgreaterequal(__x, __y);
92 }
93 __DEVICE__ bool isgreaterequal(double __x, double __y) {
94   return __builtin_isgreaterequal(__x, __y);
95 }
96 __DEVICE__ bool isless(float __x, float __y) {
97   return __builtin_isless(__x, __y);
98 }
99 __DEVICE__ bool isless(double __x, double __y) {
100   return __builtin_isless(__x, __y);
101 }
102 __DEVICE__ bool islessequal(float __x, float __y) {
103   return __builtin_islessequal(__x, __y);
104 }
105 __DEVICE__ bool islessequal(double __x, double __y) {
106   return __builtin_islessequal(__x, __y);
107 }
108 __DEVICE__ bool islessgreater(float __x, float __y) {
109   return __builtin_islessgreater(__x, __y);
110 }
111 __DEVICE__ bool islessgreater(double __x, double __y) {
112   return __builtin_islessgreater(__x, __y);
113 }
114 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
115 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
116 __DEVICE__ bool isunordered(float __x, float __y) {
117   return __builtin_isunordered(__x, __y);
118 }
119 __DEVICE__ bool isunordered(double __x, double __y) {
120   return __builtin_isunordered(__x, __y);
121 }
122 __DEVICE__ float ldexp(float __arg, int __exp) {
123   return ::ldexpf(__arg, __exp);
124 }
125 __DEVICE__ float log(float __x) { return ::logf(__x); }
126 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
127 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
128 __DEVICE__ float pow(float __base, float __exp) {
129   return ::powf(__base, __exp);
130 }
131 __DEVICE__ float pow(float __base, int __iexp) {
132   return ::powif(__base, __iexp);
133 }
134 __DEVICE__ double pow(double __base, int __iexp) {
135   return ::powi(__base, __iexp);
136 }
137 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
138 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
139 __DEVICE__ float sin(float __x) { return ::sinf(__x); }
140 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
141 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
142 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
143 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
144 
145 // Notably missing above is nexttoward.  We omit it because
146 // libdevice doesn't provide an implementation, and we don't want to be in the
147 // business of implementing tricky libm functions in this header.
148 
149 #ifndef __OPENMP_NVPTX__
150 
151 // Now we've defined everything we promised we'd define in
152 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
153 // fix up our math functions.
154 //
155 // 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
156 //    only sin(float) and sin(double), which means that e.g. sin(0) is
157 //    ambiguous.
158 //
159 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
160 //    std.  These are defined in the CUDA headers in the global namespace,
161 //    independent of everything else we've done here.
162 
163 // We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
164 // we go ahead and unconditionally define functions that are only available when
165 // compiling for C++11 to match the behavior of the CUDA headers.
166 template<bool __B, class __T = void>
167 struct __clang_cuda_enable_if {};
168 
169 template <class __T> struct __clang_cuda_enable_if<true, __T> {
170   typedef __T type;
171 };
172 
173 // Defines an overload of __fn that accepts one integral argument, calls
174 // __fn((double)x), and returns __retty.
175 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
176   template <typename __T>                                                      \
177   __DEVICE__                                                                   \
178       typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
179                                       __retty>::type                           \
180       __fn(__T __x) {                                                          \
181     return ::__fn((double)__x);                                                \
182   }
183 
184 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
185 // __fn((double)x, (double)y), and returns a double.
186 //
187 // Note this is different from OVERLOAD_1, which generates an overload that
188 // accepts only *integral* arguments.
189 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
190   template <typename __T1, typename __T2>                                      \
191   __DEVICE__ typename __clang_cuda_enable_if<                                  \
192       std::numeric_limits<__T1>::is_specialized &&                             \
193           std::numeric_limits<__T2>::is_specialized,                           \
194       __retty>::type                                                           \
195   __fn(__T1 __x, __T2 __y) {                                                   \
196     return __fn((double)__x, (double)__y);                                     \
197   }
198 
199 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
200 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
201 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
202 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
203 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
204 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
205 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
206 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
207 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
208 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
209 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
210 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
211 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
259 
260 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
261 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
262 
263 // Overloads for functions that don't match the patterns expected by
264 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
265 template <typename __T1, typename __T2, typename __T3>
266 __DEVICE__ typename __clang_cuda_enable_if<
267     std::numeric_limits<__T1>::is_specialized &&
268         std::numeric_limits<__T2>::is_specialized &&
269         std::numeric_limits<__T3>::is_specialized,
270     double>::type
271 fma(__T1 __x, __T2 __y, __T3 __z) {
272   return std::fma((double)__x, (double)__y, (double)__z);
273 }
274 
275 template <typename __T>
276 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
277                                            double>::type
278 frexp(__T __x, int *__exp) {
279   return std::frexp((double)__x, __exp);
280 }
281 
282 template <typename __T>
283 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
284                                            double>::type
285 ldexp(__T __x, int __exp) {
286   return std::ldexp((double)__x, __exp);
287 }
288 
289 template <typename __T1, typename __T2>
290 __DEVICE__ typename __clang_cuda_enable_if<
291     std::numeric_limits<__T1>::is_specialized &&
292         std::numeric_limits<__T2>::is_specialized,
293     double>::type
294 remquo(__T1 __x, __T2 __y, int *__quo) {
295   return std::remquo((double)__x, (double)__y, __quo);
296 }
297 
298 template <typename __T>
299 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
300                                            double>::type
301 scalbln(__T __x, long __exp) {
302   return std::scalbln((double)__x, __exp);
303 }
304 
305 template <typename __T>
306 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
307                                            double>::type
308 scalbn(__T __x, int __exp) {
309   return std::scalbn((double)__x, __exp);
310 }
311 
312 // We need to define these overloads in exactly the namespace our standard
313 // library uses (including the right inline namespace), otherwise they won't be
314 // picked up by other functions in the standard library (e.g. functions in
315 // <complex>).  Thus the ugliness below.
316 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
317 _LIBCPP_BEGIN_NAMESPACE_STD
318 #else
319 namespace std {
320 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
321 _GLIBCXX_BEGIN_NAMESPACE_VERSION
322 #endif
323 #endif
324 
325 // Pull the new overloads we defined above into namespace std.
326 using ::acos;
327 using ::acosh;
328 using ::asin;
329 using ::asinh;
330 using ::atan;
331 using ::atan2;
332 using ::atanh;
333 using ::cbrt;
334 using ::ceil;
335 using ::copysign;
336 using ::cos;
337 using ::cosh;
338 using ::erf;
339 using ::erfc;
340 using ::exp;
341 using ::exp2;
342 using ::expm1;
343 using ::fabs;
344 using ::fdim;
345 using ::floor;
346 using ::fma;
347 using ::fmax;
348 using ::fmin;
349 using ::fmod;
350 using ::fpclassify;
351 using ::frexp;
352 using ::hypot;
353 using ::ilogb;
354 using ::isfinite;
355 using ::isgreater;
356 using ::isgreaterequal;
357 using ::isless;
358 using ::islessequal;
359 using ::islessgreater;
360 using ::isnormal;
361 using ::isunordered;
362 using ::ldexp;
363 using ::lgamma;
364 using ::llrint;
365 using ::llround;
366 using ::log;
367 using ::log10;
368 using ::log1p;
369 using ::log2;
370 using ::logb;
371 using ::lrint;
372 using ::lround;
373 using ::nearbyint;
374 using ::nextafter;
375 using ::pow;
376 using ::remainder;
377 using ::remquo;
378 using ::rint;
379 using ::round;
380 using ::scalbln;
381 using ::scalbn;
382 using ::signbit;
383 using ::sin;
384 using ::sinh;
385 using ::sqrt;
386 using ::tan;
387 using ::tanh;
388 using ::tgamma;
389 using ::trunc;
390 
391 // Well this is fun: We need to pull these symbols in for libc++, but we can't
392 // pull them in with libstdc++, because its ::isinf and ::isnan are different
393 // than its std::isinf and std::isnan.
394 #ifndef __GLIBCXX__
395 using ::isinf;
396 using ::isnan;
397 #endif
398 
399 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
400 // namespace std.
401 using ::acosf;
402 using ::acoshf;
403 using ::asinf;
404 using ::asinhf;
405 using ::atan2f;
406 using ::atanf;
407 using ::atanhf;
408 using ::cbrtf;
409 using ::ceilf;
410 using ::copysignf;
411 using ::cosf;
412 using ::coshf;
413 using ::erfcf;
414 using ::erff;
415 using ::exp2f;
416 using ::expf;
417 using ::expm1f;
418 using ::fabsf;
419 using ::fdimf;
420 using ::floorf;
421 using ::fmaf;
422 using ::fmaxf;
423 using ::fminf;
424 using ::fmodf;
425 using ::frexpf;
426 using ::hypotf;
427 using ::ilogbf;
428 using ::ldexpf;
429 using ::lgammaf;
430 using ::llrintf;
431 using ::llroundf;
432 using ::log10f;
433 using ::log1pf;
434 using ::log2f;
435 using ::logbf;
436 using ::logf;
437 using ::lrintf;
438 using ::lroundf;
439 using ::modff;
440 using ::nearbyintf;
441 using ::nextafterf;
442 using ::powf;
443 using ::remainderf;
444 using ::remquof;
445 using ::rintf;
446 using ::roundf;
447 using ::scalblnf;
448 using ::scalbnf;
449 using ::sinf;
450 using ::sinhf;
451 using ::sqrtf;
452 using ::tanf;
453 using ::tanhf;
454 using ::tgammaf;
455 using ::truncf;
456 
457 #ifdef _LIBCPP_END_NAMESPACE_STD
458 _LIBCPP_END_NAMESPACE_STD
459 #else
460 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
461 _GLIBCXX_END_NAMESPACE_VERSION
462 #endif
463 } // namespace std
464 #endif
465 
466 #endif // __OPENMP_NVPTX__
467 
468 #undef __DEVICE__
469 
470 #endif
471