1 //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements offloading to CUDA devices. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "DeviceOffload.h" 14 15 #include "clang/Basic/TargetOptions.h" 16 #include "clang/CodeGen/ModuleBuilder.h" 17 #include "clang/Frontend/CompilerInstance.h" 18 #include "clang/Interpreter/PartialTranslationUnit.h" 19 20 #include "llvm/IR/LegacyPassManager.h" 21 #include "llvm/IR/Module.h" 22 #include "llvm/MC/TargetRegistry.h" 23 #include "llvm/Target/TargetMachine.h" 24 25 namespace clang { 26 27 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( 28 CompilerInstance &DeviceInstance, CompilerInstance &HostInstance, 29 llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS, 30 llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs) 31 : IncrementalParser(DeviceInstance, Err), PTUs(PTUs), VFS(FS), 32 CodeGenOpts(HostInstance.getCodeGenOpts()), 33 TargetOpts(DeviceInstance.getTargetOpts()) { 34 if (Err) 35 return; 36 StringRef Arch = TargetOpts.CPU; 37 if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { 38 Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>( 39 "Invalid CUDA architecture", 40 llvm::inconvertibleErrorCode())); 41 return; 42 } 43 } 44 45 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { 46 auto &PTU = PTUs.back(); 47 std::string Error; 48 49 const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( 50 PTU.TheModule->getTargetTriple(), Error); 51 if (!Target) 52 return llvm::make_error<llvm::StringError>(std::move(Error), 53 std::error_code()); 54 llvm::TargetOptions TO = llvm::TargetOptions(); 55 llvm::TargetMachine *TargetMachine = Target->createTargetMachine( 56 PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO, 57 llvm::Reloc::Model::PIC_); 58 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout()); 59 60 PTXCode.clear(); 61 llvm::raw_svector_ostream dest(PTXCode); 62 63 llvm::legacy::PassManager PM; 64 if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr, 65 llvm::CodeGenFileType::AssemblyFile)) { 66 return llvm::make_error<llvm::StringError>( 67 "NVPTX backend cannot produce PTX code.", 68 llvm::inconvertibleErrorCode()); 69 } 70 71 if (!PM.run(*PTU.TheModule)) 72 return llvm::make_error<llvm::StringError>("Failed to emit PTX code.", 73 llvm::inconvertibleErrorCode()); 74 75 PTXCode += '\0'; 76 while (PTXCode.size() % 8) 77 PTXCode += '\0'; 78 return PTXCode.str(); 79 } 80 81 llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { 82 enum FatBinFlags { 83 AddressSize64 = 0x01, 84 HasDebugInfo = 0x02, 85 ProducerCuda = 0x04, 86 HostLinux = 0x10, 87 HostMac = 0x20, 88 HostWindows = 0x40 89 }; 90 91 struct FatBinInnerHeader { 92 uint16_t Kind; // 0x00 93 uint16_t unknown02; // 0x02 94 uint32_t HeaderSize; // 0x04 95 uint32_t DataSize; // 0x08 96 uint32_t unknown0c; // 0x0c 97 uint32_t CompressedSize; // 0x10 98 uint32_t SubHeaderSize; // 0x14 99 uint16_t VersionMinor; // 0x18 100 uint16_t VersionMajor; // 0x1a 101 uint32_t CudaArch; // 0x1c 102 uint32_t unknown20; // 0x20 103 uint32_t unknown24; // 0x24 104 uint32_t Flags; // 0x28 105 uint32_t unknown2c; // 0x2c 106 uint32_t unknown30; // 0x30 107 uint32_t unknown34; // 0x34 108 uint32_t UncompressedSize; // 0x38 109 uint32_t unknown3c; // 0x3c 110 uint32_t unknown40; // 0x40 111 uint32_t unknown44; // 0x44 112 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags) 113 : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)), 114 DataSize(DataSize), unknown0c(0), CompressedSize(0), 115 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4), 116 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags), 117 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0), 118 unknown3c(0), unknown40(0), unknown44(0) {} 119 }; 120 121 struct FatBinHeader { 122 uint32_t Magic; // 0x00 123 uint16_t Version; // 0x04 124 uint16_t HeaderSize; // 0x06 125 uint32_t DataSize; // 0x08 126 uint32_t unknown0c; // 0x0c 127 public: 128 FatBinHeader(uint32_t DataSize) 129 : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)), 130 DataSize(DataSize), unknown0c(0) {} 131 }; 132 133 FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size()); 134 FatbinContent.append((char *)&OuterHeader, 135 ((char *)&OuterHeader) + OuterHeader.HeaderSize); 136 137 FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion, 138 FatBinFlags::AddressSize64 | 139 FatBinFlags::HostLinux); 140 FatbinContent.append((char *)&InnerHeader, 141 ((char *)&InnerHeader) + InnerHeader.HeaderSize); 142 143 FatbinContent.append(PTXCode.begin(), PTXCode.end()); 144 145 const PartialTranslationUnit &PTU = PTUs.back(); 146 147 std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin"; 148 149 VFS->addFile(FatbinFileName, 0, 150 llvm::MemoryBuffer::getMemBuffer( 151 llvm::StringRef(FatbinContent.data(), FatbinContent.size()), 152 "", false)); 153 154 CodeGenOpts.CudaGpuBinaryFileName = std::move(FatbinFileName); 155 156 FatbinContent.clear(); 157 158 return llvm::Error::success(); 159 } 160 161 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {} 162 163 } // namespace clang 164