10b57cec5SDimitry Andric/*===---- complex - CUDA wrapper for <complex> ------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Permission is hereby granted, free of charge, to any person obtaining a copy 40b57cec5SDimitry Andric * of this software and associated documentation files (the "Software"), to deal 50b57cec5SDimitry Andric * in the Software without restriction, including without limitation the rights 60b57cec5SDimitry Andric * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 70b57cec5SDimitry Andric * copies of the Software, and to permit persons to whom the Software is 80b57cec5SDimitry Andric * furnished to do so, subject to the following conditions: 90b57cec5SDimitry Andric * 100b57cec5SDimitry Andric * The above copyright notice and this permission notice shall be included in 110b57cec5SDimitry Andric * all copies or substantial portions of the Software. 120b57cec5SDimitry Andric * 130b57cec5SDimitry Andric * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 140b57cec5SDimitry Andric * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 150b57cec5SDimitry Andric * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 160b57cec5SDimitry Andric * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 170b57cec5SDimitry Andric * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 180b57cec5SDimitry Andric * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 190b57cec5SDimitry Andric * THE SOFTWARE. 200b57cec5SDimitry Andric * 210b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 220b57cec5SDimitry Andric */ 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric#ifndef __CLANG_CUDA_WRAPPERS_COMPLEX 250b57cec5SDimitry Andric#define __CLANG_CUDA_WRAPPERS_COMPLEX 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric// Wrapper around <complex> that forces its functions to be __host__ 280b57cec5SDimitry Andric// __device__. 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric// First, include host-only headers we think are likely to be included by 310b57cec5SDimitry Andric// <complex>, so that the pragma below only applies to <complex> itself. 320b57cec5SDimitry Andric#if __cplusplus >= 201103L 330b57cec5SDimitry Andric#include <type_traits> 340b57cec5SDimitry Andric#endif 350b57cec5SDimitry Andric#include <stdexcept> 360b57cec5SDimitry Andric#include <cmath> 370b57cec5SDimitry Andric#include <sstream> 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric// Next, include our <algorithm> wrapper, to ensure that device overloads of 400b57cec5SDimitry Andric// std::min/max are available. 410b57cec5SDimitry Andric#include <algorithm> 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric#pragma clang force_cuda_host_device begin 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric// When compiling for device, ask libstdc++ to use its own implements of 460b57cec5SDimitry Andric// complex functions, rather than calling builtins (which resolve to library 470b57cec5SDimitry Andric// functions that don't exist when compiling CUDA device code). 480b57cec5SDimitry Andric// 490b57cec5SDimitry Andric// This is a little dicey, because it causes libstdc++ to define a different 500b57cec5SDimitry Andric// set of overloads on host and device. 510b57cec5SDimitry Andric// 520b57cec5SDimitry Andric// // Present only when compiling for host. 530b57cec5SDimitry Andric// __host__ __device__ void complex<float> sin(const complex<float>& x) { 540b57cec5SDimitry Andric// return __builtin_csinf(x); 550b57cec5SDimitry Andric// } 560b57cec5SDimitry Andric// 570b57cec5SDimitry Andric// // Present when compiling for host and for device. 580b57cec5SDimitry Andric// template <typename T> 590b57cec5SDimitry Andric// void __host__ __device__ complex<T> sin(const complex<T>& x) { 600b57cec5SDimitry Andric// return complex<T>(sin(x.real()) * cosh(x.imag()), 610b57cec5SDimitry Andric// cos(x.real()), sinh(x.imag())); 620b57cec5SDimitry Andric// } 630b57cec5SDimitry Andric// 640b57cec5SDimitry Andric// This is safe because when compiling for device, all function calls in 650b57cec5SDimitry Andric// __host__ code to sin() will still resolve to *something*, even if they don't 660b57cec5SDimitry Andric// resolve to the same function as they resolve to when compiling for host. We 670b57cec5SDimitry Andric// don't care that they don't resolve to the right function because we won't 680b57cec5SDimitry Andric// codegen this host code when compiling for device. 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric#pragma push_macro("_GLIBCXX_USE_C99_COMPLEX") 710b57cec5SDimitry Andric#pragma push_macro("_GLIBCXX_USE_C99_COMPLEX_TR1") 720b57cec5SDimitry Andric#define _GLIBCXX_USE_C99_COMPLEX 0 730b57cec5SDimitry Andric#define _GLIBCXX_USE_C99_COMPLEX_TR1 0 740b57cec5SDimitry Andric 75*fe6060f1SDimitry Andric// Work around a compatibility issue with libstdc++ 11.1.0 76*fe6060f1SDimitry Andric// https://bugs.llvm.org/show_bug.cgi?id=50383 77*fe6060f1SDimitry Andric#pragma push_macro("__failed_assertion") 78*fe6060f1SDimitry Andric#if _GLIBCXX_RELEASE == 11 79*fe6060f1SDimitry Andric#define __failed_assertion __cuda_failed_assertion 80*fe6060f1SDimitry Andric#endif 81*fe6060f1SDimitry Andric 820b57cec5SDimitry Andric#include_next <complex> 830b57cec5SDimitry Andric 84*fe6060f1SDimitry Andric#pragma pop_macro("__failed_assertion") 850b57cec5SDimitry Andric#pragma pop_macro("_GLIBCXX_USE_C99_COMPLEX_TR1") 860b57cec5SDimitry Andric#pragma pop_macro("_GLIBCXX_USE_C99_COMPLEX") 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric#pragma clang force_cuda_host_device end 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric#endif // include guard 91