1 //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements offloading to CUDA devices. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "DeviceOffload.h" 14 15 #include "clang/Basic/TargetOptions.h" 16 #include "clang/CodeGen/ModuleBuilder.h" 17 #include "clang/Frontend/CompilerInstance.h" 18 19 #include "llvm/IR/LegacyPassManager.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/MC/TargetRegistry.h" 22 #include "llvm/Target/TargetMachine.h" 23 24 namespace clang { 25 26 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( 27 Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance, 28 IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx, 29 llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS, 30 llvm::Error &Err) 31 : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err), 32 HostParser(HostParser), VFS(FS) { 33 if (Err) 34 return; 35 StringRef Arch = CI->getTargetOpts().CPU; 36 if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { 37 Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>( 38 "Invalid CUDA architecture", 39 llvm::inconvertibleErrorCode())); 40 return; 41 } 42 } 43 44 llvm::Expected<PartialTranslationUnit &> 45 IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { 46 auto PTU = IncrementalParser::Parse(Input); 47 if (!PTU) 48 return PTU.takeError(); 49 50 auto PTX = GeneratePTX(); 51 if (!PTX) 52 return PTX.takeError(); 53 54 auto Err = GenerateFatbinary(); 55 if (Err) 56 return std::move(Err); 57 58 std::string FatbinFileName = 59 "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; 60 VFS->addFile(FatbinFileName, 0, 61 llvm::MemoryBuffer::getMemBuffer( 62 llvm::StringRef(FatbinContent.data(), FatbinContent.size()), 63 "", false)); 64 65 HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName; 66 67 FatbinContent.clear(); 68 69 return PTU; 70 } 71 72 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { 73 auto &PTU = PTUs.back(); 74 std::string Error; 75 76 const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( 77 PTU.TheModule->getTargetTriple(), Error); 78 if (!Target) 79 return llvm::make_error<llvm::StringError>(std::move(Error), 80 std::error_code()); 81 llvm::TargetOptions TO = llvm::TargetOptions(); 82 llvm::TargetMachine *TargetMachine = Target->createTargetMachine( 83 PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO, 84 llvm::Reloc::Model::PIC_); 85 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout()); 86 87 PTXCode.clear(); 88 llvm::raw_svector_ostream dest(PTXCode); 89 90 llvm::legacy::PassManager PM; 91 if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr, 92 llvm::CodeGenFileType::AssemblyFile)) { 93 return llvm::make_error<llvm::StringError>( 94 "NVPTX backend cannot produce PTX code.", 95 llvm::inconvertibleErrorCode()); 96 } 97 98 if (!PM.run(*PTU.TheModule)) 99 return llvm::make_error<llvm::StringError>("Failed to emit PTX code.", 100 llvm::inconvertibleErrorCode()); 101 102 PTXCode += '\0'; 103 while (PTXCode.size() % 8) 104 PTXCode += '\0'; 105 return PTXCode.str(); 106 } 107 108 llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { 109 enum FatBinFlags { 110 AddressSize64 = 0x01, 111 HasDebugInfo = 0x02, 112 ProducerCuda = 0x04, 113 HostLinux = 0x10, 114 HostMac = 0x20, 115 HostWindows = 0x40 116 }; 117 118 struct FatBinInnerHeader { 119 uint16_t Kind; // 0x00 120 uint16_t unknown02; // 0x02 121 uint32_t HeaderSize; // 0x04 122 uint32_t DataSize; // 0x08 123 uint32_t unknown0c; // 0x0c 124 uint32_t CompressedSize; // 0x10 125 uint32_t SubHeaderSize; // 0x14 126 uint16_t VersionMinor; // 0x18 127 uint16_t VersionMajor; // 0x1a 128 uint32_t CudaArch; // 0x1c 129 uint32_t unknown20; // 0x20 130 uint32_t unknown24; // 0x24 131 uint32_t Flags; // 0x28 132 uint32_t unknown2c; // 0x2c 133 uint32_t unknown30; // 0x30 134 uint32_t unknown34; // 0x34 135 uint32_t UncompressedSize; // 0x38 136 uint32_t unknown3c; // 0x3c 137 uint32_t unknown40; // 0x40 138 uint32_t unknown44; // 0x44 139 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags) 140 : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)), 141 DataSize(DataSize), unknown0c(0), CompressedSize(0), 142 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4), 143 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags), 144 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0), 145 unknown3c(0), unknown40(0), unknown44(0) {} 146 }; 147 148 struct FatBinHeader { 149 uint32_t Magic; // 0x00 150 uint16_t Version; // 0x04 151 uint16_t HeaderSize; // 0x06 152 uint32_t DataSize; // 0x08 153 uint32_t unknown0c; // 0x0c 154 public: 155 FatBinHeader(uint32_t DataSize) 156 : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)), 157 DataSize(DataSize), unknown0c(0) {} 158 }; 159 160 FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size()); 161 FatbinContent.append((char *)&OuterHeader, 162 ((char *)&OuterHeader) + OuterHeader.HeaderSize); 163 164 FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion, 165 FatBinFlags::AddressSize64 | 166 FatBinFlags::HostLinux); 167 FatbinContent.append((char *)&InnerHeader, 168 ((char *)&InnerHeader) + InnerHeader.HeaderSize); 169 170 FatbinContent.append(PTXCode.begin(), PTXCode.end()); 171 172 return llvm::Error::success(); 173 } 174 175 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {} 176 177 } // namespace clang 178