1*7a6dacacSDimitry Andric //===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===// 2*7a6dacacSDimitry Andric // 3*7a6dacacSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*7a6dacacSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*7a6dacacSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*7a6dacacSDimitry Andric // 7*7a6dacacSDimitry Andric //===----------------------------------------------------------------------===// 8*7a6dacacSDimitry Andric 9*7a6dacacSDimitry Andric #include "llvm/Frontend/Offloading/OffloadWrapper.h" 10*7a6dacacSDimitry Andric #include "llvm/ADT/ArrayRef.h" 11*7a6dacacSDimitry Andric #include "llvm/BinaryFormat/Magic.h" 12*7a6dacacSDimitry Andric #include "llvm/Frontend/Offloading/Utility.h" 13*7a6dacacSDimitry Andric #include "llvm/IR/Constants.h" 14*7a6dacacSDimitry Andric #include "llvm/IR/GlobalVariable.h" 15*7a6dacacSDimitry Andric #include "llvm/IR/IRBuilder.h" 16*7a6dacacSDimitry Andric #include "llvm/IR/LLVMContext.h" 17*7a6dacacSDimitry Andric #include "llvm/IR/Module.h" 18*7a6dacacSDimitry Andric #include "llvm/Object/OffloadBinary.h" 19*7a6dacacSDimitry Andric #include "llvm/Support/Error.h" 20*7a6dacacSDimitry Andric #include "llvm/TargetParser/Triple.h" 21*7a6dacacSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h" 22*7a6dacacSDimitry Andric 23*7a6dacacSDimitry Andric using namespace llvm; 24*7a6dacacSDimitry Andric using namespace llvm::offloading; 25*7a6dacacSDimitry Andric 26*7a6dacacSDimitry Andric namespace { 27*7a6dacacSDimitry Andric /// Magic number that begins the section containing the CUDA fatbinary. 28*7a6dacacSDimitry Andric constexpr unsigned CudaFatMagic = 0x466243b1; 29*7a6dacacSDimitry Andric constexpr unsigned HIPFatMagic = 0x48495046; 30*7a6dacacSDimitry Andric 31*7a6dacacSDimitry Andric IntegerType *getSizeTTy(Module &M) { 32*7a6dacacSDimitry Andric return M.getDataLayout().getIntPtrType(M.getContext()); 33*7a6dacacSDimitry Andric } 34*7a6dacacSDimitry Andric 35*7a6dacacSDimitry Andric // struct __tgt_device_image { 36*7a6dacacSDimitry Andric // void *ImageStart; 37*7a6dacacSDimitry Andric // void *ImageEnd; 38*7a6dacacSDimitry Andric // __tgt_offload_entry *EntriesBegin; 39*7a6dacacSDimitry Andric // __tgt_offload_entry *EntriesEnd; 40*7a6dacacSDimitry Andric // }; 41*7a6dacacSDimitry Andric StructType *getDeviceImageTy(Module &M) { 42*7a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 43*7a6dacacSDimitry Andric StructType *ImageTy = StructType::getTypeByName(C, "__tgt_device_image"); 44*7a6dacacSDimitry Andric if (!ImageTy) 45*7a6dacacSDimitry Andric ImageTy = 46*7a6dacacSDimitry Andric StructType::create("__tgt_device_image", PointerType::getUnqual(C), 47*7a6dacacSDimitry Andric PointerType::getUnqual(C), PointerType::getUnqual(C), 48*7a6dacacSDimitry Andric PointerType::getUnqual(C)); 49*7a6dacacSDimitry Andric return ImageTy; 50*7a6dacacSDimitry Andric } 51*7a6dacacSDimitry Andric 52*7a6dacacSDimitry Andric PointerType *getDeviceImagePtrTy(Module &M) { 53*7a6dacacSDimitry Andric return PointerType::getUnqual(getDeviceImageTy(M)); 54*7a6dacacSDimitry Andric } 55*7a6dacacSDimitry Andric 56*7a6dacacSDimitry Andric // struct __tgt_bin_desc { 57*7a6dacacSDimitry Andric // int32_t NumDeviceImages; 58*7a6dacacSDimitry Andric // __tgt_device_image *DeviceImages; 59*7a6dacacSDimitry Andric // __tgt_offload_entry *HostEntriesBegin; 60*7a6dacacSDimitry Andric // __tgt_offload_entry *HostEntriesEnd; 61*7a6dacacSDimitry Andric // }; 62*7a6dacacSDimitry Andric StructType *getBinDescTy(Module &M) { 63*7a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 64*7a6dacacSDimitry Andric StructType *DescTy = StructType::getTypeByName(C, "__tgt_bin_desc"); 65*7a6dacacSDimitry Andric if (!DescTy) 66*7a6dacacSDimitry Andric DescTy = StructType::create( 67*7a6dacacSDimitry Andric "__tgt_bin_desc", Type::getInt32Ty(C), getDeviceImagePtrTy(M), 68*7a6dacacSDimitry Andric PointerType::getUnqual(C), PointerType::getUnqual(C)); 69*7a6dacacSDimitry Andric return DescTy; 70*7a6dacacSDimitry Andric } 71*7a6dacacSDimitry Andric 72*7a6dacacSDimitry Andric PointerType *getBinDescPtrTy(Module &M) { 73*7a6dacacSDimitry Andric return PointerType::getUnqual(getBinDescTy(M)); 74*7a6dacacSDimitry Andric } 75*7a6dacacSDimitry Andric 76*7a6dacacSDimitry Andric /// Creates binary descriptor for the given device images. Binary descriptor 77*7a6dacacSDimitry Andric /// is an object that is passed to the offloading runtime at program startup 78*7a6dacacSDimitry Andric /// and it describes all device images available in the executable or shared 79*7a6dacacSDimitry Andric /// library. It is defined as follows 80*7a6dacacSDimitry Andric /// 81*7a6dacacSDimitry Andric /// __attribute__((visibility("hidden"))) 82*7a6dacacSDimitry Andric /// extern __tgt_offload_entry *__start_omp_offloading_entries; 83*7a6dacacSDimitry Andric /// __attribute__((visibility("hidden"))) 84*7a6dacacSDimitry Andric /// extern __tgt_offload_entry *__stop_omp_offloading_entries; 85*7a6dacacSDimitry Andric /// 86*7a6dacacSDimitry Andric /// static const char Image0[] = { <Bufs.front() contents> }; 87*7a6dacacSDimitry Andric /// ... 88*7a6dacacSDimitry Andric /// static const char ImageN[] = { <Bufs.back() contents> }; 89*7a6dacacSDimitry Andric /// 90*7a6dacacSDimitry Andric /// static const __tgt_device_image Images[] = { 91*7a6dacacSDimitry Andric /// { 92*7a6dacacSDimitry Andric /// Image0, /*ImageStart*/ 93*7a6dacacSDimitry Andric /// Image0 + sizeof(Image0), /*ImageEnd*/ 94*7a6dacacSDimitry Andric /// __start_omp_offloading_entries, /*EntriesBegin*/ 95*7a6dacacSDimitry Andric /// __stop_omp_offloading_entries /*EntriesEnd*/ 96*7a6dacacSDimitry Andric /// }, 97*7a6dacacSDimitry Andric /// ... 98*7a6dacacSDimitry Andric /// { 99*7a6dacacSDimitry Andric /// ImageN, /*ImageStart*/ 100*7a6dacacSDimitry Andric /// ImageN + sizeof(ImageN), /*ImageEnd*/ 101*7a6dacacSDimitry Andric /// __start_omp_offloading_entries, /*EntriesBegin*/ 102*7a6dacacSDimitry Andric /// __stop_omp_offloading_entries /*EntriesEnd*/ 103*7a6dacacSDimitry Andric /// } 104*7a6dacacSDimitry Andric /// }; 105*7a6dacacSDimitry Andric /// 106*7a6dacacSDimitry Andric /// static const __tgt_bin_desc BinDesc = { 107*7a6dacacSDimitry Andric /// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/ 108*7a6dacacSDimitry Andric /// Images, /*DeviceImages*/ 109*7a6dacacSDimitry Andric /// __start_omp_offloading_entries, /*HostEntriesBegin*/ 110*7a6dacacSDimitry Andric /// __stop_omp_offloading_entries /*HostEntriesEnd*/ 111*7a6dacacSDimitry Andric /// }; 112*7a6dacacSDimitry Andric /// 113*7a6dacacSDimitry Andric /// Global variable that represents BinDesc is returned. 114*7a6dacacSDimitry Andric GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs, 115*7a6dacacSDimitry Andric EntryArrayTy EntryArray, StringRef Suffix) { 116*7a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 117*7a6dacacSDimitry Andric auto [EntriesB, EntriesE] = EntryArray; 118*7a6dacacSDimitry Andric 119*7a6dacacSDimitry Andric auto *Zero = ConstantInt::get(getSizeTTy(M), 0u); 120*7a6dacacSDimitry Andric Constant *ZeroZero[] = {Zero, Zero}; 121*7a6dacacSDimitry Andric 122*7a6dacacSDimitry Andric // Create initializer for the images array. 123*7a6dacacSDimitry Andric SmallVector<Constant *, 4u> ImagesInits; 124*7a6dacacSDimitry Andric ImagesInits.reserve(Bufs.size()); 125*7a6dacacSDimitry Andric for (ArrayRef<char> Buf : Bufs) { 126*7a6dacacSDimitry Andric // We embed the full offloading entry so the binary utilities can parse it. 127*7a6dacacSDimitry Andric auto *Data = ConstantDataArray::get(C, Buf); 128*7a6dacacSDimitry Andric auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant=*/true, 129*7a6dacacSDimitry Andric GlobalVariable::InternalLinkage, Data, 130*7a6dacacSDimitry Andric ".omp_offloading.device_image" + Suffix); 131*7a6dacacSDimitry Andric Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 132*7a6dacacSDimitry Andric Image->setSection(".llvm.offloading"); 133*7a6dacacSDimitry Andric Image->setAlignment(Align(object::OffloadBinary::getAlignment())); 134*7a6dacacSDimitry Andric 135*7a6dacacSDimitry Andric StringRef Binary(Buf.data(), Buf.size()); 136*7a6dacacSDimitry Andric assert(identify_magic(Binary) == file_magic::offload_binary && 137*7a6dacacSDimitry Andric "Invalid binary format"); 138*7a6dacacSDimitry Andric 139*7a6dacacSDimitry Andric // The device image struct contains the pointer to the beginning and end of 140*7a6dacacSDimitry Andric // the image stored inside of the offload binary. There should only be one 141*7a6dacacSDimitry Andric // of these for each buffer so we parse it out manually. 142*7a6dacacSDimitry Andric const auto *Header = 143*7a6dacacSDimitry Andric reinterpret_cast<const object::OffloadBinary::Header *>( 144*7a6dacacSDimitry Andric Binary.bytes_begin()); 145*7a6dacacSDimitry Andric const auto *Entry = reinterpret_cast<const object::OffloadBinary::Entry *>( 146*7a6dacacSDimitry Andric Binary.bytes_begin() + Header->EntryOffset); 147*7a6dacacSDimitry Andric 148*7a6dacacSDimitry Andric auto *Begin = ConstantInt::get(getSizeTTy(M), Entry->ImageOffset); 149*7a6dacacSDimitry Andric auto *Size = 150*7a6dacacSDimitry Andric ConstantInt::get(getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize); 151*7a6dacacSDimitry Andric Constant *ZeroBegin[] = {Zero, Begin}; 152*7a6dacacSDimitry Andric Constant *ZeroSize[] = {Zero, Size}; 153*7a6dacacSDimitry Andric 154*7a6dacacSDimitry Andric auto *ImageB = 155*7a6dacacSDimitry Andric ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroBegin); 156*7a6dacacSDimitry Andric auto *ImageE = 157*7a6dacacSDimitry Andric ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroSize); 158*7a6dacacSDimitry Andric 159*7a6dacacSDimitry Andric ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB, 160*7a6dacacSDimitry Andric ImageE, EntriesB, EntriesE)); 161*7a6dacacSDimitry Andric } 162*7a6dacacSDimitry Andric 163*7a6dacacSDimitry Andric // Then create images array. 164*7a6dacacSDimitry Andric auto *ImagesData = ConstantArray::get( 165*7a6dacacSDimitry Andric ArrayType::get(getDeviceImageTy(M), ImagesInits.size()), ImagesInits); 166*7a6dacacSDimitry Andric 167*7a6dacacSDimitry Andric auto *Images = 168*7a6dacacSDimitry Andric new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true, 169*7a6dacacSDimitry Andric GlobalValue::InternalLinkage, ImagesData, 170*7a6dacacSDimitry Andric ".omp_offloading.device_images" + Suffix); 171*7a6dacacSDimitry Andric Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 172*7a6dacacSDimitry Andric 173*7a6dacacSDimitry Andric auto *ImagesB = 174*7a6dacacSDimitry Andric ConstantExpr::getGetElementPtr(Images->getValueType(), Images, ZeroZero); 175*7a6dacacSDimitry Andric 176*7a6dacacSDimitry Andric // And finally create the binary descriptor object. 177*7a6dacacSDimitry Andric auto *DescInit = ConstantStruct::get( 178*7a6dacacSDimitry Andric getBinDescTy(M), 179*7a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB, 180*7a6dacacSDimitry Andric EntriesB, EntriesE); 181*7a6dacacSDimitry Andric 182*7a6dacacSDimitry Andric return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true, 183*7a6dacacSDimitry Andric GlobalValue::InternalLinkage, DescInit, 184*7a6dacacSDimitry Andric ".omp_offloading.descriptor" + Suffix); 185*7a6dacacSDimitry Andric } 186*7a6dacacSDimitry Andric 187*7a6dacacSDimitry Andric void createRegisterFunction(Module &M, GlobalVariable *BinDesc, 188*7a6dacacSDimitry Andric StringRef Suffix) { 189*7a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 190*7a6dacacSDimitry Andric auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 191*7a6dacacSDimitry Andric auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, 192*7a6dacacSDimitry Andric ".omp_offloading.descriptor_reg" + Suffix, &M); 193*7a6dacacSDimitry Andric Func->setSection(".text.startup"); 194*7a6dacacSDimitry Andric 195*7a6dacacSDimitry Andric // Get __tgt_register_lib function declaration. 196*7a6dacacSDimitry Andric auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), 197*7a6dacacSDimitry Andric /*isVarArg*/ false); 198*7a6dacacSDimitry Andric FunctionCallee RegFuncC = 199*7a6dacacSDimitry Andric M.getOrInsertFunction("__tgt_register_lib", RegFuncTy); 200*7a6dacacSDimitry Andric 201*7a6dacacSDimitry Andric // Construct function body 202*7a6dacacSDimitry Andric IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); 203*7a6dacacSDimitry Andric Builder.CreateCall(RegFuncC, BinDesc); 204*7a6dacacSDimitry Andric Builder.CreateRetVoid(); 205*7a6dacacSDimitry Andric 206*7a6dacacSDimitry Andric // Add this function to constructors. 207*7a6dacacSDimitry Andric // Set priority to 1 so that __tgt_register_lib is executed AFTER 208*7a6dacacSDimitry Andric // __tgt_register_requires (we want to know what requirements have been 209*7a6dacacSDimitry Andric // asked for before we load a libomptarget plugin so that by the time the 210*7a6dacacSDimitry Andric // plugin is loaded it can report how many devices there are which can 211*7a6dacacSDimitry Andric // satisfy these requirements). 212*7a6dacacSDimitry Andric appendToGlobalCtors(M, Func, /*Priority*/ 1); 213*7a6dacacSDimitry Andric } 214*7a6dacacSDimitry Andric 215*7a6dacacSDimitry Andric void createUnregisterFunction(Module &M, GlobalVariable *BinDesc, 216*7a6dacacSDimitry Andric StringRef Suffix) { 217*7a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 218*7a6dacacSDimitry Andric auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 219*7a6dacacSDimitry Andric auto *Func = 220*7a6dacacSDimitry Andric Function::Create(FuncTy, GlobalValue::InternalLinkage, 221*7a6dacacSDimitry Andric ".omp_offloading.descriptor_unreg" + Suffix, &M); 222*7a6dacacSDimitry Andric Func->setSection(".text.startup"); 223*7a6dacacSDimitry Andric 224*7a6dacacSDimitry Andric // Get __tgt_unregister_lib function declaration. 225*7a6dacacSDimitry Andric auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), 226*7a6dacacSDimitry Andric /*isVarArg*/ false); 227*7a6dacacSDimitry Andric FunctionCallee UnRegFuncC = 228*7a6dacacSDimitry Andric M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy); 229*7a6dacacSDimitry Andric 230*7a6dacacSDimitry Andric // Construct function body 231*7a6dacacSDimitry Andric IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); 232*7a6dacacSDimitry Andric Builder.CreateCall(UnRegFuncC, BinDesc); 233*7a6dacacSDimitry Andric Builder.CreateRetVoid(); 234*7a6dacacSDimitry Andric 235*7a6dacacSDimitry Andric // Add this function to global destructors. 236*7a6dacacSDimitry Andric // Match priority of __tgt_register_lib 237*7a6dacacSDimitry Andric appendToGlobalDtors(M, Func, /*Priority*/ 1); 238*7a6dacacSDimitry Andric } 239*7a6dacacSDimitry Andric 240*7a6dacacSDimitry Andric // struct fatbin_wrapper { 241*7a6dacacSDimitry Andric // int32_t magic; 242*7a6dacacSDimitry Andric // int32_t version; 243*7a6dacacSDimitry Andric // void *image; 244*7a6dacacSDimitry Andric // void *reserved; 245*7a6dacacSDimitry Andric //}; 246*7a6dacacSDimitry Andric StructType *getFatbinWrapperTy(Module &M) { 247*7a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 248*7a6dacacSDimitry Andric StructType *FatbinTy = StructType::getTypeByName(C, "fatbin_wrapper"); 249*7a6dacacSDimitry Andric if (!FatbinTy) 250*7a6dacacSDimitry Andric FatbinTy = StructType::create( 251*7a6dacacSDimitry Andric "fatbin_wrapper", Type::getInt32Ty(C), Type::getInt32Ty(C), 252*7a6dacacSDimitry Andric PointerType::getUnqual(C), PointerType::getUnqual(C)); 253*7a6dacacSDimitry Andric return FatbinTy; 254*7a6dacacSDimitry Andric } 255*7a6dacacSDimitry Andric 256*7a6dacacSDimitry Andric /// Embed the image \p Image into the module \p M so it can be found by the 257*7a6dacacSDimitry Andric /// runtime. 258*7a6dacacSDimitry Andric GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP, 259*7a6dacacSDimitry Andric StringRef Suffix) { 260*7a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 261*7a6dacacSDimitry Andric llvm::Type *Int8PtrTy = PointerType::getUnqual(C); 262*7a6dacacSDimitry Andric llvm::Triple Triple = llvm::Triple(M.getTargetTriple()); 263*7a6dacacSDimitry Andric 264*7a6dacacSDimitry Andric // Create the global string containing the fatbinary. 265*7a6dacacSDimitry Andric StringRef FatbinConstantSection = 266*7a6dacacSDimitry Andric IsHIP ? ".hip_fatbin" 267*7a6dacacSDimitry Andric : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin"); 268*7a6dacacSDimitry Andric auto *Data = ConstantDataArray::get(C, Image); 269*7a6dacacSDimitry Andric auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true, 270*7a6dacacSDimitry Andric GlobalVariable::InternalLinkage, Data, 271*7a6dacacSDimitry Andric ".fatbin_image" + Suffix); 272*7a6dacacSDimitry Andric Fatbin->setSection(FatbinConstantSection); 273*7a6dacacSDimitry Andric 274*7a6dacacSDimitry Andric // Create the fatbinary wrapper 275*7a6dacacSDimitry Andric StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment" 276*7a6dacacSDimitry Andric : Triple.isMacOSX() ? "__NV_CUDA,__fatbin" 277*7a6dacacSDimitry Andric : ".nvFatBinSegment"; 278*7a6dacacSDimitry Andric Constant *FatbinWrapper[] = { 279*7a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic), 280*7a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 1), 281*7a6dacacSDimitry Andric ConstantExpr::getPointerBitCastOrAddrSpaceCast(Fatbin, Int8PtrTy), 282*7a6dacacSDimitry Andric ConstantPointerNull::get(PointerType::getUnqual(C))}; 283*7a6dacacSDimitry Andric 284*7a6dacacSDimitry Andric Constant *FatbinInitializer = 285*7a6dacacSDimitry Andric ConstantStruct::get(getFatbinWrapperTy(M), FatbinWrapper); 286*7a6dacacSDimitry Andric 287*7a6dacacSDimitry Andric auto *FatbinDesc = 288*7a6dacacSDimitry Andric new GlobalVariable(M, getFatbinWrapperTy(M), 289*7a6dacacSDimitry Andric /*isConstant*/ true, GlobalValue::InternalLinkage, 290*7a6dacacSDimitry Andric FatbinInitializer, ".fatbin_wrapper" + Suffix); 291*7a6dacacSDimitry Andric FatbinDesc->setSection(FatbinWrapperSection); 292*7a6dacacSDimitry Andric FatbinDesc->setAlignment(Align(8)); 293*7a6dacacSDimitry Andric 294*7a6dacacSDimitry Andric return FatbinDesc; 295*7a6dacacSDimitry Andric } 296*7a6dacacSDimitry Andric 297*7a6dacacSDimitry Andric /// Create the register globals function. We will iterate all of the offloading 298*7a6dacacSDimitry Andric /// entries stored at the begin / end symbols and register them according to 299*7a6dacacSDimitry Andric /// their type. This creates the following function in IR: 300*7a6dacacSDimitry Andric /// 301*7a6dacacSDimitry Andric /// extern struct __tgt_offload_entry __start_cuda_offloading_entries; 302*7a6dacacSDimitry Andric /// extern struct __tgt_offload_entry __stop_cuda_offloading_entries; 303*7a6dacacSDimitry Andric /// 304*7a6dacacSDimitry Andric /// extern void __cudaRegisterFunction(void **, void *, void *, void *, int, 305*7a6dacacSDimitry Andric /// void *, void *, void *, void *, int *); 306*7a6dacacSDimitry Andric /// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t, 307*7a6dacacSDimitry Andric /// int64_t, int32_t, int32_t); 308*7a6dacacSDimitry Andric /// 309*7a6dacacSDimitry Andric /// void __cudaRegisterTest(void **fatbinHandle) { 310*7a6dacacSDimitry Andric /// for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries; 311*7a6dacacSDimitry Andric /// entry != &__stop_cuda_offloading_entries; ++entry) { 312*7a6dacacSDimitry Andric /// if (!entry->size) 313*7a6dacacSDimitry Andric /// __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name, 314*7a6dacacSDimitry Andric /// entry->name, -1, 0, 0, 0, 0, 0); 315*7a6dacacSDimitry Andric /// else 316*7a6dacacSDimitry Andric /// __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name, 317*7a6dacacSDimitry Andric /// 0, entry->size, 0, 0); 318*7a6dacacSDimitry Andric /// } 319*7a6dacacSDimitry Andric /// } 320*7a6dacacSDimitry Andric Function *createRegisterGlobalsFunction(Module &M, bool IsHIP, 321*7a6dacacSDimitry Andric EntryArrayTy EntryArray, 322*7a6dacacSDimitry Andric StringRef Suffix, 323*7a6dacacSDimitry Andric bool EmitSurfacesAndTextures) { 324*7a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 325*7a6dacacSDimitry Andric auto [EntriesB, EntriesE] = EntryArray; 326*7a6dacacSDimitry Andric 327*7a6dacacSDimitry Andric // Get the __cudaRegisterFunction function declaration. 328*7a6dacacSDimitry Andric PointerType *Int8PtrTy = PointerType::get(C, 0); 329*7a6dacacSDimitry Andric PointerType *Int8PtrPtrTy = PointerType::get(C, 0); 330*7a6dacacSDimitry Andric PointerType *Int32PtrTy = PointerType::get(C, 0); 331*7a6dacacSDimitry Andric auto *RegFuncTy = FunctionType::get( 332*7a6dacacSDimitry Andric Type::getInt32Ty(C), 333*7a6dacacSDimitry Andric {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C), 334*7a6dacacSDimitry Andric Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy}, 335*7a6dacacSDimitry Andric /*isVarArg*/ false); 336*7a6dacacSDimitry Andric FunctionCallee RegFunc = M.getOrInsertFunction( 337*7a6dacacSDimitry Andric IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy); 338*7a6dacacSDimitry Andric 339*7a6dacacSDimitry Andric // Get the __cudaRegisterVar function declaration. 340*7a6dacacSDimitry Andric auto *RegVarTy = FunctionType::get( 341*7a6dacacSDimitry Andric Type::getVoidTy(C), 342*7a6dacacSDimitry Andric {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C), 343*7a6dacacSDimitry Andric getSizeTTy(M), Type::getInt32Ty(C), Type::getInt32Ty(C)}, 344*7a6dacacSDimitry Andric /*isVarArg*/ false); 345*7a6dacacSDimitry Andric FunctionCallee RegVar = M.getOrInsertFunction( 346*7a6dacacSDimitry Andric IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy); 347*7a6dacacSDimitry Andric 348*7a6dacacSDimitry Andric // Get the __cudaRegisterSurface function declaration. 349*7a6dacacSDimitry Andric FunctionType *RegSurfaceTy = 350*7a6dacacSDimitry Andric FunctionType::get(Type::getVoidTy(C), 351*7a6dacacSDimitry Andric {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, 352*7a6dacacSDimitry Andric Type::getInt32Ty(C), Type::getInt32Ty(C)}, 353*7a6dacacSDimitry Andric /*isVarArg=*/false); 354*7a6dacacSDimitry Andric FunctionCallee RegSurface = M.getOrInsertFunction( 355*7a6dacacSDimitry Andric IsHIP ? "__hipRegisterSurface" : "__cudaRegisterSurface", RegSurfaceTy); 356*7a6dacacSDimitry Andric 357*7a6dacacSDimitry Andric // Get the __cudaRegisterTexture function declaration. 358*7a6dacacSDimitry Andric FunctionType *RegTextureTy = FunctionType::get( 359*7a6dacacSDimitry Andric Type::getVoidTy(C), 360*7a6dacacSDimitry Andric {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C), 361*7a6dacacSDimitry Andric Type::getInt32Ty(C), Type::getInt32Ty(C)}, 362*7a6dacacSDimitry Andric /*isVarArg=*/false); 363*7a6dacacSDimitry Andric FunctionCallee RegTexture = M.getOrInsertFunction( 364*7a6dacacSDimitry Andric IsHIP ? "__hipRegisterTexture" : "__cudaRegisterTexture", RegTextureTy); 365*7a6dacacSDimitry Andric 366*7a6dacacSDimitry Andric auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C), Int8PtrPtrTy, 367*7a6dacacSDimitry Andric /*isVarArg*/ false); 368*7a6dacacSDimitry Andric auto *RegGlobalsFn = 369*7a6dacacSDimitry Andric Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage, 370*7a6dacacSDimitry Andric IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M); 371*7a6dacacSDimitry Andric RegGlobalsFn->setSection(".text.startup"); 372*7a6dacacSDimitry Andric 373*7a6dacacSDimitry Andric // Create the loop to register all the entries. 374*7a6dacacSDimitry Andric IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn)); 375*7a6dacacSDimitry Andric auto *EntryBB = BasicBlock::Create(C, "while.entry", RegGlobalsFn); 376*7a6dacacSDimitry Andric auto *IfThenBB = BasicBlock::Create(C, "if.then", RegGlobalsFn); 377*7a6dacacSDimitry Andric auto *IfElseBB = BasicBlock::Create(C, "if.else", RegGlobalsFn); 378*7a6dacacSDimitry Andric auto *SwGlobalBB = BasicBlock::Create(C, "sw.global", RegGlobalsFn); 379*7a6dacacSDimitry Andric auto *SwManagedBB = BasicBlock::Create(C, "sw.managed", RegGlobalsFn); 380*7a6dacacSDimitry Andric auto *SwSurfaceBB = BasicBlock::Create(C, "sw.surface", RegGlobalsFn); 381*7a6dacacSDimitry Andric auto *SwTextureBB = BasicBlock::Create(C, "sw.texture", RegGlobalsFn); 382*7a6dacacSDimitry Andric auto *IfEndBB = BasicBlock::Create(C, "if.end", RegGlobalsFn); 383*7a6dacacSDimitry Andric auto *ExitBB = BasicBlock::Create(C, "while.end", RegGlobalsFn); 384*7a6dacacSDimitry Andric 385*7a6dacacSDimitry Andric auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE); 386*7a6dacacSDimitry Andric Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB); 387*7a6dacacSDimitry Andric Builder.SetInsertPoint(EntryBB); 388*7a6dacacSDimitry Andric auto *Entry = Builder.CreatePHI(PointerType::getUnqual(C), 2, "entry"); 389*7a6dacacSDimitry Andric auto *AddrPtr = 390*7a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 391*7a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 392*7a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 0)}); 393*7a6dacacSDimitry Andric auto *Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr, "addr"); 394*7a6dacacSDimitry Andric auto *NamePtr = 395*7a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 396*7a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 397*7a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 1)}); 398*7a6dacacSDimitry Andric auto *Name = Builder.CreateLoad(Int8PtrTy, NamePtr, "name"); 399*7a6dacacSDimitry Andric auto *SizePtr = 400*7a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 401*7a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 402*7a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 2)}); 403*7a6dacacSDimitry Andric auto *Size = Builder.CreateLoad(getSizeTTy(M), SizePtr, "size"); 404*7a6dacacSDimitry Andric auto *FlagsPtr = 405*7a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 406*7a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 407*7a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 3)}); 408*7a6dacacSDimitry Andric auto *Flags = Builder.CreateLoad(Type::getInt32Ty(C), FlagsPtr, "flags"); 409*7a6dacacSDimitry Andric auto *DataPtr = 410*7a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 411*7a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 412*7a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 4)}); 413*7a6dacacSDimitry Andric auto *Data = Builder.CreateLoad(Type::getInt32Ty(C), DataPtr, "textype"); 414*7a6dacacSDimitry Andric auto *Kind = Builder.CreateAnd( 415*7a6dacacSDimitry Andric Flags, ConstantInt::get(Type::getInt32Ty(C), 0x7), "type"); 416*7a6dacacSDimitry Andric 417*7a6dacacSDimitry Andric // Extract the flags stored in the bit-field and convert them to C booleans. 418*7a6dacacSDimitry Andric auto *ExternBit = Builder.CreateAnd( 419*7a6dacacSDimitry Andric Flags, ConstantInt::get(Type::getInt32Ty(C), 420*7a6dacacSDimitry Andric llvm::offloading::OffloadGlobalExtern)); 421*7a6dacacSDimitry Andric auto *Extern = Builder.CreateLShr( 422*7a6dacacSDimitry Andric ExternBit, ConstantInt::get(Type::getInt32Ty(C), 3), "extern"); 423*7a6dacacSDimitry Andric auto *ConstantBit = Builder.CreateAnd( 424*7a6dacacSDimitry Andric Flags, ConstantInt::get(Type::getInt32Ty(C), 425*7a6dacacSDimitry Andric llvm::offloading::OffloadGlobalConstant)); 426*7a6dacacSDimitry Andric auto *Const = Builder.CreateLShr( 427*7a6dacacSDimitry Andric ConstantBit, ConstantInt::get(Type::getInt32Ty(C), 4), "constant"); 428*7a6dacacSDimitry Andric auto *NormalizedBit = Builder.CreateAnd( 429*7a6dacacSDimitry Andric Flags, ConstantInt::get(Type::getInt32Ty(C), 430*7a6dacacSDimitry Andric llvm::offloading::OffloadGlobalNormalized)); 431*7a6dacacSDimitry Andric auto *Normalized = Builder.CreateLShr( 432*7a6dacacSDimitry Andric NormalizedBit, ConstantInt::get(Type::getInt32Ty(C), 5), "normalized"); 433*7a6dacacSDimitry Andric auto *FnCond = 434*7a6dacacSDimitry Andric Builder.CreateICmpEQ(Size, ConstantInt::getNullValue(getSizeTTy(M))); 435*7a6dacacSDimitry Andric Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB); 436*7a6dacacSDimitry Andric 437*7a6dacacSDimitry Andric // Create kernel registration code. 438*7a6dacacSDimitry Andric Builder.SetInsertPoint(IfThenBB); 439*7a6dacacSDimitry Andric Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(), Addr, Name, Name, 440*7a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), -1), 441*7a6dacacSDimitry Andric ConstantPointerNull::get(Int8PtrTy), 442*7a6dacacSDimitry Andric ConstantPointerNull::get(Int8PtrTy), 443*7a6dacacSDimitry Andric ConstantPointerNull::get(Int8PtrTy), 444*7a6dacacSDimitry Andric ConstantPointerNull::get(Int8PtrTy), 445*7a6dacacSDimitry Andric ConstantPointerNull::get(Int32PtrTy)}); 446*7a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 447*7a6dacacSDimitry Andric Builder.SetInsertPoint(IfElseBB); 448*7a6dacacSDimitry Andric 449*7a6dacacSDimitry Andric auto *Switch = Builder.CreateSwitch(Kind, IfEndBB); 450*7a6dacacSDimitry Andric // Create global variable registration code. 451*7a6dacacSDimitry Andric Builder.SetInsertPoint(SwGlobalBB); 452*7a6dacacSDimitry Andric Builder.CreateCall(RegVar, 453*7a6dacacSDimitry Andric {RegGlobalsFn->arg_begin(), Addr, Name, Name, Extern, Size, 454*7a6dacacSDimitry Andric Const, ConstantInt::get(Type::getInt32Ty(C), 0)}); 455*7a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 456*7a6dacacSDimitry Andric Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalEntry), 457*7a6dacacSDimitry Andric SwGlobalBB); 458*7a6dacacSDimitry Andric 459*7a6dacacSDimitry Andric // Create managed variable registration code. 460*7a6dacacSDimitry Andric Builder.SetInsertPoint(SwManagedBB); 461*7a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 462*7a6dacacSDimitry Andric Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry), 463*7a6dacacSDimitry Andric SwManagedBB); 464*7a6dacacSDimitry Andric // Create surface variable registration code. 465*7a6dacacSDimitry Andric Builder.SetInsertPoint(SwSurfaceBB); 466*7a6dacacSDimitry Andric if (EmitSurfacesAndTextures) 467*7a6dacacSDimitry Andric Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(), Addr, Name, Name, 468*7a6dacacSDimitry Andric Data, Extern}); 469*7a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 470*7a6dacacSDimitry Andric Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalSurfaceEntry), 471*7a6dacacSDimitry Andric SwSurfaceBB); 472*7a6dacacSDimitry Andric 473*7a6dacacSDimitry Andric // Create texture variable registration code. 474*7a6dacacSDimitry Andric Builder.SetInsertPoint(SwTextureBB); 475*7a6dacacSDimitry Andric if (EmitSurfacesAndTextures) 476*7a6dacacSDimitry Andric Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(), Addr, Name, Name, 477*7a6dacacSDimitry Andric Data, Normalized, Extern}); 478*7a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 479*7a6dacacSDimitry Andric Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalTextureEntry), 480*7a6dacacSDimitry Andric SwTextureBB); 481*7a6dacacSDimitry Andric 482*7a6dacacSDimitry Andric Builder.SetInsertPoint(IfEndBB); 483*7a6dacacSDimitry Andric auto *NewEntry = Builder.CreateInBoundsGEP( 484*7a6dacacSDimitry Andric offloading::getEntryTy(M), Entry, ConstantInt::get(getSizeTTy(M), 1)); 485*7a6dacacSDimitry Andric auto *Cmp = Builder.CreateICmpEQ( 486*7a6dacacSDimitry Andric NewEntry, 487*7a6dacacSDimitry Andric ConstantExpr::getInBoundsGetElementPtr( 488*7a6dacacSDimitry Andric ArrayType::get(offloading::getEntryTy(M), 0), EntriesE, 489*7a6dacacSDimitry Andric ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0), 490*7a6dacacSDimitry Andric ConstantInt::get(getSizeTTy(M), 0)}))); 491*7a6dacacSDimitry Andric Entry->addIncoming( 492*7a6dacacSDimitry Andric ConstantExpr::getInBoundsGetElementPtr( 493*7a6dacacSDimitry Andric ArrayType::get(offloading::getEntryTy(M), 0), EntriesB, 494*7a6dacacSDimitry Andric ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0), 495*7a6dacacSDimitry Andric ConstantInt::get(getSizeTTy(M), 0)})), 496*7a6dacacSDimitry Andric &RegGlobalsFn->getEntryBlock()); 497*7a6dacacSDimitry Andric Entry->addIncoming(NewEntry, IfEndBB); 498*7a6dacacSDimitry Andric Builder.CreateCondBr(Cmp, ExitBB, EntryBB); 499*7a6dacacSDimitry Andric Builder.SetInsertPoint(ExitBB); 500*7a6dacacSDimitry Andric Builder.CreateRetVoid(); 501*7a6dacacSDimitry Andric 502*7a6dacacSDimitry Andric return RegGlobalsFn; 503*7a6dacacSDimitry Andric } 504*7a6dacacSDimitry Andric 505*7a6dacacSDimitry Andric // Create the constructor and destructor to register the fatbinary with the CUDA 506*7a6dacacSDimitry Andric // runtime. 507*7a6dacacSDimitry Andric void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc, 508*7a6dacacSDimitry Andric bool IsHIP, EntryArrayTy EntryArray, 509*7a6dacacSDimitry Andric StringRef Suffix, 510*7a6dacacSDimitry Andric bool EmitSurfacesAndTextures) { 511*7a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 512*7a6dacacSDimitry Andric auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 513*7a6dacacSDimitry Andric auto *CtorFunc = Function::Create( 514*7a6dacacSDimitry Andric CtorFuncTy, GlobalValue::InternalLinkage, 515*7a6dacacSDimitry Andric (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M); 516*7a6dacacSDimitry Andric CtorFunc->setSection(".text.startup"); 517*7a6dacacSDimitry Andric 518*7a6dacacSDimitry Andric auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 519*7a6dacacSDimitry Andric auto *DtorFunc = Function::Create( 520*7a6dacacSDimitry Andric DtorFuncTy, GlobalValue::InternalLinkage, 521*7a6dacacSDimitry Andric (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M); 522*7a6dacacSDimitry Andric DtorFunc->setSection(".text.startup"); 523*7a6dacacSDimitry Andric 524*7a6dacacSDimitry Andric auto *PtrTy = PointerType::getUnqual(C); 525*7a6dacacSDimitry Andric 526*7a6dacacSDimitry Andric // Get the __cudaRegisterFatBinary function declaration. 527*7a6dacacSDimitry Andric auto *RegFatTy = FunctionType::get(PtrTy, PtrTy, /*isVarArg=*/false); 528*7a6dacacSDimitry Andric FunctionCallee RegFatbin = M.getOrInsertFunction( 529*7a6dacacSDimitry Andric IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy); 530*7a6dacacSDimitry Andric // Get the __cudaRegisterFatBinaryEnd function declaration. 531*7a6dacacSDimitry Andric auto *RegFatEndTy = 532*7a6dacacSDimitry Andric FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false); 533*7a6dacacSDimitry Andric FunctionCallee RegFatbinEnd = 534*7a6dacacSDimitry Andric M.getOrInsertFunction("__cudaRegisterFatBinaryEnd", RegFatEndTy); 535*7a6dacacSDimitry Andric // Get the __cudaUnregisterFatBinary function declaration. 536*7a6dacacSDimitry Andric auto *UnregFatTy = 537*7a6dacacSDimitry Andric FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false); 538*7a6dacacSDimitry Andric FunctionCallee UnregFatbin = M.getOrInsertFunction( 539*7a6dacacSDimitry Andric IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary", 540*7a6dacacSDimitry Andric UnregFatTy); 541*7a6dacacSDimitry Andric 542*7a6dacacSDimitry Andric auto *AtExitTy = 543*7a6dacacSDimitry Andric FunctionType::get(Type::getInt32Ty(C), PtrTy, /*isVarArg=*/false); 544*7a6dacacSDimitry Andric FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy); 545*7a6dacacSDimitry Andric 546*7a6dacacSDimitry Andric auto *BinaryHandleGlobal = new llvm::GlobalVariable( 547*7a6dacacSDimitry Andric M, PtrTy, false, llvm::GlobalValue::InternalLinkage, 548*7a6dacacSDimitry Andric llvm::ConstantPointerNull::get(PtrTy), 549*7a6dacacSDimitry Andric (IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle") + Suffix); 550*7a6dacacSDimitry Andric 551*7a6dacacSDimitry Andric // Create the constructor to register this image with the runtime. 552*7a6dacacSDimitry Andric IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc)); 553*7a6dacacSDimitry Andric CallInst *Handle = CtorBuilder.CreateCall( 554*7a6dacacSDimitry Andric RegFatbin, 555*7a6dacacSDimitry Andric ConstantExpr::getPointerBitCastOrAddrSpaceCast(FatbinDesc, PtrTy)); 556*7a6dacacSDimitry Andric CtorBuilder.CreateAlignedStore( 557*7a6dacacSDimitry Andric Handle, BinaryHandleGlobal, 558*7a6dacacSDimitry Andric Align(M.getDataLayout().getPointerTypeSize(PtrTy))); 559*7a6dacacSDimitry Andric CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray, 560*7a6dacacSDimitry Andric Suffix, 561*7a6dacacSDimitry Andric EmitSurfacesAndTextures), 562*7a6dacacSDimitry Andric Handle); 563*7a6dacacSDimitry Andric if (!IsHIP) 564*7a6dacacSDimitry Andric CtorBuilder.CreateCall(RegFatbinEnd, Handle); 565*7a6dacacSDimitry Andric CtorBuilder.CreateCall(AtExit, DtorFunc); 566*7a6dacacSDimitry Andric CtorBuilder.CreateRetVoid(); 567*7a6dacacSDimitry Andric 568*7a6dacacSDimitry Andric // Create the destructor to unregister the image with the runtime. We cannot 569*7a6dacacSDimitry Andric // use a standard global destructor after CUDA 9.2 so this must be called by 570*7a6dacacSDimitry Andric // `atexit()` intead. 571*7a6dacacSDimitry Andric IRBuilder<> DtorBuilder(BasicBlock::Create(C, "entry", DtorFunc)); 572*7a6dacacSDimitry Andric LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad( 573*7a6dacacSDimitry Andric PtrTy, BinaryHandleGlobal, 574*7a6dacacSDimitry Andric Align(M.getDataLayout().getPointerTypeSize(PtrTy))); 575*7a6dacacSDimitry Andric DtorBuilder.CreateCall(UnregFatbin, BinaryHandle); 576*7a6dacacSDimitry Andric DtorBuilder.CreateRetVoid(); 577*7a6dacacSDimitry Andric 578*7a6dacacSDimitry Andric // Add this function to constructors. 579*7a6dacacSDimitry Andric appendToGlobalCtors(M, CtorFunc, /*Priority*/ 1); 580*7a6dacacSDimitry Andric } 581*7a6dacacSDimitry Andric } // namespace 582*7a6dacacSDimitry Andric 583*7a6dacacSDimitry Andric Error offloading::wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images, 584*7a6dacacSDimitry Andric EntryArrayTy EntryArray, 585*7a6dacacSDimitry Andric llvm::StringRef Suffix) { 586*7a6dacacSDimitry Andric GlobalVariable *Desc = createBinDesc(M, Images, EntryArray, Suffix); 587*7a6dacacSDimitry Andric if (!Desc) 588*7a6dacacSDimitry Andric return createStringError(inconvertibleErrorCode(), 589*7a6dacacSDimitry Andric "No binary descriptors created."); 590*7a6dacacSDimitry Andric createRegisterFunction(M, Desc, Suffix); 591*7a6dacacSDimitry Andric createUnregisterFunction(M, Desc, Suffix); 592*7a6dacacSDimitry Andric return Error::success(); 593*7a6dacacSDimitry Andric } 594*7a6dacacSDimitry Andric 595*7a6dacacSDimitry Andric Error offloading::wrapCudaBinary(Module &M, ArrayRef<char> Image, 596*7a6dacacSDimitry Andric EntryArrayTy EntryArray, 597*7a6dacacSDimitry Andric llvm::StringRef Suffix, 598*7a6dacacSDimitry Andric bool EmitSurfacesAndTextures) { 599*7a6dacacSDimitry Andric GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/false, Suffix); 600*7a6dacacSDimitry Andric if (!Desc) 601*7a6dacacSDimitry Andric return createStringError(inconvertibleErrorCode(), 602*7a6dacacSDimitry Andric "No fatbin section created."); 603*7a6dacacSDimitry Andric 604*7a6dacacSDimitry Andric createRegisterFatbinFunction(M, Desc, /*IsHip=*/false, EntryArray, Suffix, 605*7a6dacacSDimitry Andric EmitSurfacesAndTextures); 606*7a6dacacSDimitry Andric return Error::success(); 607*7a6dacacSDimitry Andric } 608*7a6dacacSDimitry Andric 609*7a6dacacSDimitry Andric Error offloading::wrapHIPBinary(Module &M, ArrayRef<char> Image, 610*7a6dacacSDimitry Andric EntryArrayTy EntryArray, llvm::StringRef Suffix, 611*7a6dacacSDimitry Andric bool EmitSurfacesAndTextures) { 612*7a6dacacSDimitry Andric GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/true, Suffix); 613*7a6dacacSDimitry Andric if (!Desc) 614*7a6dacacSDimitry Andric return createStringError(inconvertibleErrorCode(), 615*7a6dacacSDimitry Andric "No fatbin section created."); 616*7a6dacacSDimitry Andric 617*7a6dacacSDimitry Andric createRegisterFatbinFunction(M, Desc, /*IsHip=*/true, EntryArray, Suffix, 618*7a6dacacSDimitry Andric EmitSurfacesAndTextures); 619*7a6dacacSDimitry Andric return Error::success(); 620*7a6dacacSDimitry Andric } 621