1 //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements offloading to CUDA devices.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "DeviceOffload.h"
14
15 #include "clang/Basic/TargetOptions.h"
16 #include "clang/CodeGen/ModuleBuilder.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Interpreter/PartialTranslationUnit.h"
19
20 #include "llvm/IR/LegacyPassManager.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/MC/TargetRegistry.h"
23 #include "llvm/Target/TargetMachine.h"
24
25 namespace clang {
26
IncrementalCUDADeviceParser(CompilerInstance & DeviceInstance,CompilerInstance & HostInstance,llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,llvm::Error & Err,const std::list<PartialTranslationUnit> & PTUs)27 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
28 CompilerInstance &DeviceInstance, CompilerInstance &HostInstance,
29 llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
30 llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
31 : IncrementalParser(DeviceInstance, Err), PTUs(PTUs), VFS(FS),
32 CodeGenOpts(HostInstance.getCodeGenOpts()),
33 TargetOpts(DeviceInstance.getTargetOpts()) {
34 if (Err)
35 return;
36 StringRef Arch = TargetOpts.CPU;
37 if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
38 Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
39 "Invalid CUDA architecture",
40 llvm::inconvertibleErrorCode()));
41 return;
42 }
43 }
44
GeneratePTX()45 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
46 auto &PTU = PTUs.back();
47 std::string Error;
48
49 const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
50 PTU.TheModule->getTargetTriple(), Error);
51 if (!Target)
52 return llvm::make_error<llvm::StringError>(std::move(Error),
53 std::error_code());
54 llvm::TargetOptions TO = llvm::TargetOptions();
55 llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
56 PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
57 llvm::Reloc::Model::PIC_);
58 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
59
60 PTXCode.clear();
61 llvm::raw_svector_ostream dest(PTXCode);
62
63 llvm::legacy::PassManager PM;
64 if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
65 llvm::CodeGenFileType::AssemblyFile)) {
66 return llvm::make_error<llvm::StringError>(
67 "NVPTX backend cannot produce PTX code.",
68 llvm::inconvertibleErrorCode());
69 }
70
71 if (!PM.run(*PTU.TheModule))
72 return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
73 llvm::inconvertibleErrorCode());
74
75 PTXCode += '\0';
76 while (PTXCode.size() % 8)
77 PTXCode += '\0';
78 return PTXCode.str();
79 }
80
GenerateFatbinary()81 llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
82 enum FatBinFlags {
83 AddressSize64 = 0x01,
84 HasDebugInfo = 0x02,
85 ProducerCuda = 0x04,
86 HostLinux = 0x10,
87 HostMac = 0x20,
88 HostWindows = 0x40
89 };
90
91 struct FatBinInnerHeader {
92 uint16_t Kind; // 0x00
93 uint16_t unknown02; // 0x02
94 uint32_t HeaderSize; // 0x04
95 uint32_t DataSize; // 0x08
96 uint32_t unknown0c; // 0x0c
97 uint32_t CompressedSize; // 0x10
98 uint32_t SubHeaderSize; // 0x14
99 uint16_t VersionMinor; // 0x18
100 uint16_t VersionMajor; // 0x1a
101 uint32_t CudaArch; // 0x1c
102 uint32_t unknown20; // 0x20
103 uint32_t unknown24; // 0x24
104 uint32_t Flags; // 0x28
105 uint32_t unknown2c; // 0x2c
106 uint32_t unknown30; // 0x30
107 uint32_t unknown34; // 0x34
108 uint32_t UncompressedSize; // 0x38
109 uint32_t unknown3c; // 0x3c
110 uint32_t unknown40; // 0x40
111 uint32_t unknown44; // 0x44
112 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
113 : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
114 DataSize(DataSize), unknown0c(0), CompressedSize(0),
115 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
116 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
117 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
118 unknown3c(0), unknown40(0), unknown44(0) {}
119 };
120
121 struct FatBinHeader {
122 uint32_t Magic; // 0x00
123 uint16_t Version; // 0x04
124 uint16_t HeaderSize; // 0x06
125 uint32_t DataSize; // 0x08
126 uint32_t unknown0c; // 0x0c
127 public:
128 FatBinHeader(uint32_t DataSize)
129 : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
130 DataSize(DataSize), unknown0c(0) {}
131 };
132
133 FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
134 FatbinContent.append((char *)&OuterHeader,
135 ((char *)&OuterHeader) + OuterHeader.HeaderSize);
136
137 FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
138 FatBinFlags::AddressSize64 |
139 FatBinFlags::HostLinux);
140 FatbinContent.append((char *)&InnerHeader,
141 ((char *)&InnerHeader) + InnerHeader.HeaderSize);
142
143 FatbinContent.append(PTXCode.begin(), PTXCode.end());
144
145 const PartialTranslationUnit &PTU = PTUs.back();
146
147 std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin";
148
149 VFS->addFile(FatbinFileName, 0,
150 llvm::MemoryBuffer::getMemBuffer(
151 llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
152 "", false));
153
154 CodeGenOpts.CudaGpuBinaryFileName = std::move(FatbinFileName);
155
156 FatbinContent.clear();
157
158 return llvm::Error::success();
159 }
160
~IncrementalCUDADeviceParser()161 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
162
163 } // namespace clang
164