1 //===- CodeGenData.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for codegen data that has stable summary which
10 // can be used to optimize the code in the subsequent codegen.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_CGDATA_CODEGENDATA_H
15 #define LLVM_CGDATA_CODEGENDATA_H
16
17 #include "llvm/ADT/BitmaskEnum.h"
18 #include "llvm/ADT/StableHashing.h"
19 #include "llvm/Bitcode/BitcodeReader.h"
20 #include "llvm/CGData/OutlinedHashTree.h"
21 #include "llvm/CGData/OutlinedHashTreeRecord.h"
22 #include "llvm/CGData/StableFunctionMapRecord.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/Object/ObjectFile.h"
25 #include "llvm/Support/Caching.h"
26 #include "llvm/Support/Compiler.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/TargetParser/Triple.h"
29 #include <mutex>
30
31 namespace llvm {
32
33 enum CGDataSectKind {
34 #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
35 #include "llvm/CGData/CodeGenData.inc"
36 };
37
38 LLVM_ABI std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
39 Triple::ObjectFormatType OF,
40 bool AddSegmentInfo = true);
41
42 enum class CGDataKind {
43 Unknown = 0x0,
44 // A function outlining info.
45 FunctionOutlinedHashTree = 0x1,
46 // A function merging info.
47 StableFunctionMergingMap = 0x2,
48 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
49 };
50
51 LLVM_ABI const std::error_category &cgdata_category();
52
53 enum class cgdata_error {
54 success = 0,
55 eof,
56 bad_magic,
57 bad_header,
58 empty_cgdata,
59 malformed,
60 unsupported_version,
61 };
62
make_error_code(cgdata_error E)63 inline std::error_code make_error_code(cgdata_error E) {
64 return std::error_code(static_cast<int>(E), cgdata_category());
65 }
66
67 class LLVM_ABI CGDataError : public ErrorInfo<CGDataError> {
68 public:
69 CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
Err(Err)70 : Err(Err), Msg(ErrStr.str()) {
71 assert(Err != cgdata_error::success && "Not an error");
72 }
73
74 std::string message() const override;
75
log(raw_ostream & OS)76 void log(raw_ostream &OS) const override { OS << message(); }
77
convertToErrorCode()78 std::error_code convertToErrorCode() const override {
79 return make_error_code(Err);
80 }
81
get()82 cgdata_error get() const { return Err; }
getMessage()83 const std::string &getMessage() const { return Msg; }
84
85 /// Consume an Error and return the raw enum value contained within it, and
86 /// the optional error message. The Error must either be a success value, or
87 /// contain a single CGDataError.
take(Error E)88 static std::pair<cgdata_error, std::string> take(Error E) {
89 auto Err = cgdata_error::success;
90 std::string Msg;
91 handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) {
92 assert(Err == cgdata_error::success && "Multiple errors encountered");
93 Err = IPE.get();
94 Msg = IPE.getMessage();
95 });
96 return {Err, Msg};
97 }
98
99 static char ID;
100
101 private:
102 cgdata_error Err;
103 std::string Msg;
104 };
105
106 enum CGDataMode {
107 None,
108 Read,
109 Write,
110 };
111
112 class CodeGenData {
113 /// Global outlined hash tree that has oulined hash sequences across modules.
114 std::unique_ptr<OutlinedHashTree> PublishedHashTree;
115 /// Global stable function map that has stable function info across modules.
116 std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;
117
118 /// This flag is set when -fcodegen-data-generate is passed.
119 /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
120 bool EmitCGData;
121
122 /// This is a singleton instance which is thread-safe. Unlike profile data
123 /// which is largely function-based, codegen data describes the whole module.
124 /// Therefore, this can be initialized once, and can be used across modules
125 /// instead of constructing the same one for each codegen backend.
126 static std::unique_ptr<CodeGenData> Instance;
127 static std::once_flag OnceFlag;
128
129 CodeGenData() = default;
130
131 public:
132 ~CodeGenData() = default;
133
134 LLVM_ABI static CodeGenData &getInstance();
135
136 /// Returns true if we have a valid outlined hash tree.
hasOutlinedHashTree()137 bool hasOutlinedHashTree() {
138 return PublishedHashTree && !PublishedHashTree->empty();
139 }
hasStableFunctionMap()140 bool hasStableFunctionMap() {
141 return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
142 }
143
144 /// Returns the outlined hash tree. This can be globally used in a read-only
145 /// manner.
getOutlinedHashTree()146 const OutlinedHashTree *getOutlinedHashTree() {
147 return PublishedHashTree.get();
148 }
getStableFunctionMap()149 const StableFunctionMap *getStableFunctionMap() {
150 return PublishedStableFunctionMap.get();
151 }
152
153 /// Returns true if we should write codegen data.
emitCGData()154 bool emitCGData() { return EmitCGData; }
155
156 /// Publish the (globally) merged or read outlined hash tree.
publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree)157 void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
158 PublishedHashTree = std::move(HashTree);
159 // Ensure we disable emitCGData as we do not want to read and write both.
160 EmitCGData = false;
161 }
162 void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap)163 publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
164 PublishedStableFunctionMap = std::move(FunctionMap);
165 // Ensure we disable emitCGData as we do not want to read and write both.
166 EmitCGData = false;
167 }
168 };
169
170 namespace cgdata {
171
hasOutlinedHashTree()172 inline bool hasOutlinedHashTree() {
173 return CodeGenData::getInstance().hasOutlinedHashTree();
174 }
175
hasStableFunctionMap()176 inline bool hasStableFunctionMap() {
177 return CodeGenData::getInstance().hasStableFunctionMap();
178 }
179
getOutlinedHashTree()180 inline const OutlinedHashTree *getOutlinedHashTree() {
181 return CodeGenData::getInstance().getOutlinedHashTree();
182 }
183
getStableFunctionMap()184 inline const StableFunctionMap *getStableFunctionMap() {
185 return CodeGenData::getInstance().getStableFunctionMap();
186 }
187
emitCGData()188 inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
189
190 inline void
publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree)191 publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
192 CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
193 }
194
195 inline void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap)196 publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
197 CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
198 }
199
200 struct StreamCacheData {
201 /// Backing buffer for serialized data stream.
202 SmallVector<SmallString<0>> Outputs;
203 /// Callback function to add serialized data to the stream.
204 AddStreamFn AddStream;
205 /// Backing buffer for cached data.
206 SmallVector<std::unique_ptr<MemoryBuffer>> Files;
207 /// Cache mechanism for storing data.
208 FileCache Cache;
209
StreamCacheDataStreamCacheData210 StreamCacheData(unsigned Size, const FileCache &OrigCache,
211 const Twine &CachePrefix)
212 : Outputs(Size), Files(Size) {
213 AddStream = [&](size_t Task, const Twine &ModuleName) {
214 return std::make_unique<CachedFileStream>(
215 std::make_unique<raw_svector_ostream>(Outputs[Task]));
216 };
217
218 if (OrigCache.isValid()) {
219 auto CGCacheOrErr =
220 localCache("ThinLTO", CachePrefix, OrigCache.getCacheDirectoryPath(),
221 [&](size_t Task, const Twine &ModuleName,
222 std::unique_ptr<MemoryBuffer> MB) {
223 Files[Task] = std::move(MB);
224 });
225 if (Error Err = CGCacheOrErr.takeError())
226 report_fatal_error(std::move(Err));
227 Cache = std::move(*CGCacheOrErr);
228 }
229 }
230 StreamCacheData() = delete;
231
232 /// Retrieve results from either the cache or the stream.
getResultStreamCacheData233 std::unique_ptr<SmallVector<StringRef>> getResult() {
234 unsigned NumOutputs = Outputs.size();
235 auto Result = std::make_unique<SmallVector<StringRef>>(NumOutputs);
236 for (unsigned I = 0; I < NumOutputs; ++I)
237 if (Files[I])
238 (*Result)[I] = Files[I]->getBuffer();
239 else
240 (*Result)[I] = Outputs[I];
241 return Result;
242 }
243 };
244
245 /// Save \p TheModule before the first codegen round.
246 /// \p Task represents the partition number in the parallel code generation
247 /// process. \p AddStream is the callback used to add the serialized module to
248 /// the stream.
249 LLVM_ABI void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
250 AddStreamFn AddStream);
251
252 /// Load the optimized bitcode module for the second codegen round.
253 /// \p OrigModule is the original bitcode module.
254 /// \p Task identifies the partition number in the parallel code generation
255 /// process. \p Context provides the environment settings for module operations.
256 /// \p IRFiles contains optimized bitcode module files needed for loading.
257 /// \return A unique_ptr to the loaded Module, or nullptr if loading fails.
258 LLVM_ABI std::unique_ptr<Module>
259 loadModuleForTwoRounds(BitcodeModule &OrigModule, unsigned Task,
260 LLVMContext &Context, ArrayRef<StringRef> IRFiles);
261
262 /// Merge the codegen data from the scratch objects \p ObjectFiles from the
263 /// first codegen round.
264 /// \return the combined hash of the merged codegen data.
265 LLVM_ABI Expected<stable_hash>
266 mergeCodeGenData(ArrayRef<StringRef> ObjectFiles);
267
268 LLVM_ABI void warn(Error E, StringRef Whence = "");
269 LLVM_ABI void warn(Twine Message, StringRef Whence = "", StringRef Hint = "");
270
271 } // end namespace cgdata
272
273 namespace IndexedCGData {
274
275 // A signature for data validation, representing "\xffcgdata\x81" in
276 // little-endian order
277 const uint64_t Magic = 0x81617461646763ff;
278
279 enum CGDataVersion {
280 // Version 1 is the first version. This version supports the outlined
281 // hash tree.
282 Version1 = 1,
283 // Version 2 supports the stable function merging map.
284 Version2 = 2,
285 // Version 3 adds the total size of the Names in the stable function map so
286 // we can skip reading them into the memory for non-assertion builds.
287 Version3 = 3,
288 CurrentVersion = CG_DATA_INDEX_VERSION
289 };
290 const uint64_t Version = CGDataVersion::CurrentVersion;
291
292 struct Header {
293 uint64_t Magic;
294 uint32_t Version;
295 uint32_t DataKind;
296 uint64_t OutlinedHashTreeOffset;
297 uint64_t StableFunctionMapOffset;
298
299 // New fields should only be added at the end to ensure that the size
300 // computation is correct. The methods below need to be updated to ensure that
301 // the new field is read correctly.
302
303 // Reads a header struct from the buffer.
304 LLVM_ABI static Expected<Header> readFromBuffer(const unsigned char *Curr);
305 };
306
307 } // end namespace IndexedCGData
308
309 } // end namespace llvm
310
311 #endif // LLVM_CODEGEN_PREPARE_H
312