1 //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements offloading to CUDA devices. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "DeviceOffload.h" 14 15 #include "clang/Basic/TargetOptions.h" 16 #include "clang/CodeGen/ModuleBuilder.h" 17 #include "clang/Frontend/CompilerInstance.h" 18 19 #include "llvm/IR/LegacyPassManager.h" 20 #include "llvm/MC/TargetRegistry.h" 21 #include "llvm/Target/TargetMachine.h" 22 23 namespace clang { 24 25 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( 26 Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance, 27 IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx, 28 llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS, 29 llvm::Error &Err) 30 : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err), 31 HostParser(HostParser), VFS(FS) { 32 if (Err) 33 return; 34 StringRef Arch = CI->getTargetOpts().CPU; 35 if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { 36 Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>( 37 "Invalid CUDA architecture", 38 llvm::inconvertibleErrorCode())); 39 return; 40 } 41 } 42 43 llvm::Expected<PartialTranslationUnit &> 44 IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { 45 auto PTU = IncrementalParser::Parse(Input); 46 if (!PTU) 47 return PTU.takeError(); 48 49 auto PTX = GeneratePTX(); 50 if (!PTX) 51 return PTX.takeError(); 52 53 auto Err = GenerateFatbinary(); 54 if (Err) 55 return std::move(Err); 56 57 std::string FatbinFileName = 58 "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; 59 VFS->addFile(FatbinFileName, 0, 60 llvm::MemoryBuffer::getMemBuffer( 61 llvm::StringRef(FatbinContent.data(), FatbinContent.size()), 62 "", false)); 63 64 HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName; 65 66 FatbinContent.clear(); 67 68 return PTU; 69 } 70 71 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { 72 auto &PTU = PTUs.back(); 73 std::string Error; 74 75 const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( 76 PTU.TheModule->getTargetTriple(), Error); 77 if (!Target) 78 return llvm::make_error<llvm::StringError>(std::move(Error), 79 std::error_code()); 80 llvm::TargetOptions TO = llvm::TargetOptions(); 81 llvm::TargetMachine *TargetMachine = Target->createTargetMachine( 82 PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO, 83 llvm::Reloc::Model::PIC_); 84 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout()); 85 86 PTXCode.clear(); 87 llvm::raw_svector_ostream dest(PTXCode); 88 89 llvm::legacy::PassManager PM; 90 if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr, 91 llvm::CGFT_AssemblyFile)) { 92 return llvm::make_error<llvm::StringError>( 93 "NVPTX backend cannot produce PTX code.", 94 llvm::inconvertibleErrorCode()); 95 } 96 97 if (!PM.run(*PTU.TheModule)) 98 return llvm::make_error<llvm::StringError>("Failed to emit PTX code.", 99 llvm::inconvertibleErrorCode()); 100 101 PTXCode += '\0'; 102 while (PTXCode.size() % 8) 103 PTXCode += '\0'; 104 return PTXCode.str(); 105 } 106 107 llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { 108 enum FatBinFlags { 109 AddressSize64 = 0x01, 110 HasDebugInfo = 0x02, 111 ProducerCuda = 0x04, 112 HostLinux = 0x10, 113 HostMac = 0x20, 114 HostWindows = 0x40 115 }; 116 117 struct FatBinInnerHeader { 118 uint16_t Kind; // 0x00 119 uint16_t unknown02; // 0x02 120 uint32_t HeaderSize; // 0x04 121 uint32_t DataSize; // 0x08 122 uint32_t unknown0c; // 0x0c 123 uint32_t CompressedSize; // 0x10 124 uint32_t SubHeaderSize; // 0x14 125 uint16_t VersionMinor; // 0x18 126 uint16_t VersionMajor; // 0x1a 127 uint32_t CudaArch; // 0x1c 128 uint32_t unknown20; // 0x20 129 uint32_t unknown24; // 0x24 130 uint32_t Flags; // 0x28 131 uint32_t unknown2c; // 0x2c 132 uint32_t unknown30; // 0x30 133 uint32_t unknown34; // 0x34 134 uint32_t UncompressedSize; // 0x38 135 uint32_t unknown3c; // 0x3c 136 uint32_t unknown40; // 0x40 137 uint32_t unknown44; // 0x44 138 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags) 139 : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)), 140 DataSize(DataSize), unknown0c(0), CompressedSize(0), 141 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4), 142 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags), 143 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0), 144 unknown3c(0), unknown40(0), unknown44(0) {} 145 }; 146 147 struct FatBinHeader { 148 uint32_t Magic; // 0x00 149 uint16_t Version; // 0x04 150 uint16_t HeaderSize; // 0x06 151 uint32_t DataSize; // 0x08 152 uint32_t unknown0c; // 0x0c 153 public: 154 FatBinHeader(uint32_t DataSize) 155 : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)), 156 DataSize(DataSize), unknown0c(0) {} 157 }; 158 159 FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size()); 160 FatbinContent.append((char *)&OuterHeader, 161 ((char *)&OuterHeader) + OuterHeader.HeaderSize); 162 163 FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion, 164 FatBinFlags::AddressSize64 | 165 FatBinFlags::HostLinux); 166 FatbinContent.append((char *)&InnerHeader, 167 ((char *)&InnerHeader) + InnerHeader.HeaderSize); 168 169 FatbinContent.append(PTXCode.begin(), PTXCode.end()); 170 171 return llvm::Error::success(); 172 } 173 174 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {} 175 176 } // namespace clang 177