xref: /freebsd/contrib/llvm-project/clang/lib/Interpreter/DeviceOffload.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements offloading to CUDA devices.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "DeviceOffload.h"
14 
15 #include "clang/Basic/TargetOptions.h"
16 #include "clang/CodeGen/ModuleBuilder.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Interpreter/PartialTranslationUnit.h"
19 
20 #include "llvm/IR/LegacyPassManager.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/MC/TargetRegistry.h"
23 #include "llvm/Target/TargetMachine.h"
24 
25 namespace clang {
26 
IncrementalCUDADeviceParser(CompilerInstance & DeviceInstance,CompilerInstance & HostInstance,llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,llvm::Error & Err,const std::list<PartialTranslationUnit> & PTUs)27 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
28     CompilerInstance &DeviceInstance, CompilerInstance &HostInstance,
29     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
30     llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
31     : IncrementalParser(DeviceInstance, Err), PTUs(PTUs), VFS(FS),
32       CodeGenOpts(HostInstance.getCodeGenOpts()),
33       TargetOpts(DeviceInstance.getTargetOpts()) {
34   if (Err)
35     return;
36   StringRef Arch = TargetOpts.CPU;
37   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
38     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
39                                                "Invalid CUDA architecture",
40                                                llvm::inconvertibleErrorCode()));
41     return;
42   }
43 }
44 
GeneratePTX()45 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
46   auto &PTU = PTUs.back();
47   std::string Error;
48 
49   const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
50       PTU.TheModule->getTargetTriple(), Error);
51   if (!Target)
52     return llvm::make_error<llvm::StringError>(std::move(Error),
53                                                std::error_code());
54   llvm::TargetOptions TO = llvm::TargetOptions();
55   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
56       PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
57       llvm::Reloc::Model::PIC_);
58   PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
59 
60   PTXCode.clear();
61   llvm::raw_svector_ostream dest(PTXCode);
62 
63   llvm::legacy::PassManager PM;
64   if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
65                                          llvm::CodeGenFileType::AssemblyFile)) {
66     return llvm::make_error<llvm::StringError>(
67         "NVPTX backend cannot produce PTX code.",
68         llvm::inconvertibleErrorCode());
69   }
70 
71   if (!PM.run(*PTU.TheModule))
72     return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
73                                                llvm::inconvertibleErrorCode());
74 
75   PTXCode += '\0';
76   while (PTXCode.size() % 8)
77     PTXCode += '\0';
78   return PTXCode.str();
79 }
80 
GenerateFatbinary()81 llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
82   enum FatBinFlags {
83     AddressSize64 = 0x01,
84     HasDebugInfo = 0x02,
85     ProducerCuda = 0x04,
86     HostLinux = 0x10,
87     HostMac = 0x20,
88     HostWindows = 0x40
89   };
90 
91   struct FatBinInnerHeader {
92     uint16_t Kind;             // 0x00
93     uint16_t unknown02;        // 0x02
94     uint32_t HeaderSize;       // 0x04
95     uint32_t DataSize;         // 0x08
96     uint32_t unknown0c;        // 0x0c
97     uint32_t CompressedSize;   // 0x10
98     uint32_t SubHeaderSize;    // 0x14
99     uint16_t VersionMinor;     // 0x18
100     uint16_t VersionMajor;     // 0x1a
101     uint32_t CudaArch;         // 0x1c
102     uint32_t unknown20;        // 0x20
103     uint32_t unknown24;        // 0x24
104     uint32_t Flags;            // 0x28
105     uint32_t unknown2c;        // 0x2c
106     uint32_t unknown30;        // 0x30
107     uint32_t unknown34;        // 0x34
108     uint32_t UncompressedSize; // 0x38
109     uint32_t unknown3c;        // 0x3c
110     uint32_t unknown40;        // 0x40
111     uint32_t unknown44;        // 0x44
112     FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
113         : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
114           DataSize(DataSize), unknown0c(0), CompressedSize(0),
115           SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
116           CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
117           unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
118           unknown3c(0), unknown40(0), unknown44(0) {}
119   };
120 
121   struct FatBinHeader {
122     uint32_t Magic;      // 0x00
123     uint16_t Version;    // 0x04
124     uint16_t HeaderSize; // 0x06
125     uint32_t DataSize;   // 0x08
126     uint32_t unknown0c;  // 0x0c
127   public:
128     FatBinHeader(uint32_t DataSize)
129         : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
130           DataSize(DataSize), unknown0c(0) {}
131   };
132 
133   FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
134   FatbinContent.append((char *)&OuterHeader,
135                        ((char *)&OuterHeader) + OuterHeader.HeaderSize);
136 
137   FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
138                                 FatBinFlags::AddressSize64 |
139                                     FatBinFlags::HostLinux);
140   FatbinContent.append((char *)&InnerHeader,
141                        ((char *)&InnerHeader) + InnerHeader.HeaderSize);
142 
143   FatbinContent.append(PTXCode.begin(), PTXCode.end());
144 
145   const PartialTranslationUnit &PTU = PTUs.back();
146 
147   std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin";
148 
149   VFS->addFile(FatbinFileName, 0,
150                llvm::MemoryBuffer::getMemBuffer(
151                    llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
152                    "", false));
153 
154   CodeGenOpts.CudaGpuBinaryFileName = std::move(FatbinFileName);
155 
156   FatbinContent.clear();
157 
158   return llvm::Error::success();
159 }
160 
~IncrementalCUDADeviceParser()161 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
162 
163 } // namespace clang
164