xref: /freebsd/contrib/llvm-project/llvm/include/llvm/CGData/CodeGenData.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- CodeGenData.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for codegen data that has stable summary which
10 // can be used to optimize the code in the subsequent codegen.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CGDATA_CODEGENDATA_H
15 #define LLVM_CGDATA_CODEGENDATA_H
16 
17 #include "llvm/ADT/BitmaskEnum.h"
18 #include "llvm/ADT/StableHashing.h"
19 #include "llvm/Bitcode/BitcodeReader.h"
20 #include "llvm/CGData/OutlinedHashTree.h"
21 #include "llvm/CGData/OutlinedHashTreeRecord.h"
22 #include "llvm/CGData/StableFunctionMapRecord.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/Object/ObjectFile.h"
25 #include "llvm/Support/Caching.h"
26 #include "llvm/Support/Compiler.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/TargetParser/Triple.h"
29 #include <mutex>
30 
31 namespace llvm {
32 
33 enum CGDataSectKind {
34 #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
35 #include "llvm/CGData/CodeGenData.inc"
36 };
37 
38 LLVM_ABI std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
39                                                Triple::ObjectFormatType OF,
40                                                bool AddSegmentInfo = true);
41 
42 enum class CGDataKind {
43   Unknown = 0x0,
44   // A function outlining info.
45   FunctionOutlinedHashTree = 0x1,
46   // A function merging info.
47   StableFunctionMergingMap = 0x2,
48   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
49 };
50 
51 LLVM_ABI const std::error_category &cgdata_category();
52 
53 enum class cgdata_error {
54   success = 0,
55   eof,
56   bad_magic,
57   bad_header,
58   empty_cgdata,
59   malformed,
60   unsupported_version,
61 };
62 
make_error_code(cgdata_error E)63 inline std::error_code make_error_code(cgdata_error E) {
64   return std::error_code(static_cast<int>(E), cgdata_category());
65 }
66 
67 class LLVM_ABI CGDataError : public ErrorInfo<CGDataError> {
68 public:
69   CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
Err(Err)70       : Err(Err), Msg(ErrStr.str()) {
71     assert(Err != cgdata_error::success && "Not an error");
72   }
73 
74   std::string message() const override;
75 
log(raw_ostream & OS)76   void log(raw_ostream &OS) const override { OS << message(); }
77 
convertToErrorCode()78   std::error_code convertToErrorCode() const override {
79     return make_error_code(Err);
80   }
81 
get()82   cgdata_error get() const { return Err; }
getMessage()83   const std::string &getMessage() const { return Msg; }
84 
85   /// Consume an Error and return the raw enum value contained within it, and
86   /// the optional error message. The Error must either be a success value, or
87   /// contain a single CGDataError.
take(Error E)88   static std::pair<cgdata_error, std::string> take(Error E) {
89     auto Err = cgdata_error::success;
90     std::string Msg;
91     handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) {
92       assert(Err == cgdata_error::success && "Multiple errors encountered");
93       Err = IPE.get();
94       Msg = IPE.getMessage();
95     });
96     return {Err, Msg};
97   }
98 
99   static char ID;
100 
101 private:
102   cgdata_error Err;
103   std::string Msg;
104 };
105 
106 enum CGDataMode {
107   None,
108   Read,
109   Write,
110 };
111 
112 class CodeGenData {
113   /// Global outlined hash tree that has oulined hash sequences across modules.
114   std::unique_ptr<OutlinedHashTree> PublishedHashTree;
115   /// Global stable function map that has stable function info across modules.
116   std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;
117 
118   /// This flag is set when -fcodegen-data-generate is passed.
119   /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
120   bool EmitCGData;
121 
122   /// This is a singleton instance which is thread-safe. Unlike profile data
123   /// which is largely function-based, codegen data describes the whole module.
124   /// Therefore, this can be initialized once, and can be used across modules
125   /// instead of constructing the same one for each codegen backend.
126   static std::unique_ptr<CodeGenData> Instance;
127   static std::once_flag OnceFlag;
128 
129   CodeGenData() = default;
130 
131 public:
132   ~CodeGenData() = default;
133 
134   LLVM_ABI static CodeGenData &getInstance();
135 
136   /// Returns true if we have a valid outlined hash tree.
hasOutlinedHashTree()137   bool hasOutlinedHashTree() {
138     return PublishedHashTree && !PublishedHashTree->empty();
139   }
hasStableFunctionMap()140   bool hasStableFunctionMap() {
141     return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
142   }
143 
144   /// Returns the outlined hash tree. This can be globally used in a read-only
145   /// manner.
getOutlinedHashTree()146   const OutlinedHashTree *getOutlinedHashTree() {
147     return PublishedHashTree.get();
148   }
getStableFunctionMap()149   const StableFunctionMap *getStableFunctionMap() {
150     return PublishedStableFunctionMap.get();
151   }
152 
153   /// Returns true if we should write codegen data.
emitCGData()154   bool emitCGData() { return EmitCGData; }
155 
156   /// Publish the (globally) merged or read outlined hash tree.
publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree)157   void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
158     PublishedHashTree = std::move(HashTree);
159     // Ensure we disable emitCGData as we do not want to read and write both.
160     EmitCGData = false;
161   }
162   void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap)163   publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
164     PublishedStableFunctionMap = std::move(FunctionMap);
165     // Ensure we disable emitCGData as we do not want to read and write both.
166     EmitCGData = false;
167   }
168 };
169 
170 namespace cgdata {
171 
hasOutlinedHashTree()172 inline bool hasOutlinedHashTree() {
173   return CodeGenData::getInstance().hasOutlinedHashTree();
174 }
175 
hasStableFunctionMap()176 inline bool hasStableFunctionMap() {
177   return CodeGenData::getInstance().hasStableFunctionMap();
178 }
179 
getOutlinedHashTree()180 inline const OutlinedHashTree *getOutlinedHashTree() {
181   return CodeGenData::getInstance().getOutlinedHashTree();
182 }
183 
getStableFunctionMap()184 inline const StableFunctionMap *getStableFunctionMap() {
185   return CodeGenData::getInstance().getStableFunctionMap();
186 }
187 
emitCGData()188 inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
189 
190 inline void
publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree)191 publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
192   CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
193 }
194 
195 inline void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap)196 publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
197   CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
198 }
199 
200 struct StreamCacheData {
201   /// Backing buffer for serialized data stream.
202   SmallVector<SmallString<0>> Outputs;
203   /// Callback function to add serialized data to the stream.
204   AddStreamFn AddStream;
205   /// Backing buffer for cached data.
206   SmallVector<std::unique_ptr<MemoryBuffer>> Files;
207   /// Cache mechanism for storing data.
208   FileCache Cache;
209 
StreamCacheDataStreamCacheData210   StreamCacheData(unsigned Size, const FileCache &OrigCache,
211                   const Twine &CachePrefix)
212       : Outputs(Size), Files(Size) {
213     AddStream = [&](size_t Task, const Twine &ModuleName) {
214       return std::make_unique<CachedFileStream>(
215           std::make_unique<raw_svector_ostream>(Outputs[Task]));
216     };
217 
218     if (OrigCache.isValid()) {
219       auto CGCacheOrErr =
220           localCache("ThinLTO", CachePrefix, OrigCache.getCacheDirectoryPath(),
221                      [&](size_t Task, const Twine &ModuleName,
222                          std::unique_ptr<MemoryBuffer> MB) {
223                        Files[Task] = std::move(MB);
224                      });
225       if (Error Err = CGCacheOrErr.takeError())
226         report_fatal_error(std::move(Err));
227       Cache = std::move(*CGCacheOrErr);
228     }
229   }
230   StreamCacheData() = delete;
231 
232   /// Retrieve results from either the cache or the stream.
getResultStreamCacheData233   std::unique_ptr<SmallVector<StringRef>> getResult() {
234     unsigned NumOutputs = Outputs.size();
235     auto Result = std::make_unique<SmallVector<StringRef>>(NumOutputs);
236     for (unsigned I = 0; I < NumOutputs; ++I)
237       if (Files[I])
238         (*Result)[I] = Files[I]->getBuffer();
239       else
240         (*Result)[I] = Outputs[I];
241     return Result;
242   }
243 };
244 
245 /// Save \p TheModule before the first codegen round.
246 /// \p Task represents the partition number in the parallel code generation
247 /// process. \p AddStream is the callback used to add the serialized module to
248 /// the stream.
249 LLVM_ABI void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
250                                      AddStreamFn AddStream);
251 
252 /// Load the optimized bitcode module for the second codegen round.
253 /// \p OrigModule is the original bitcode module.
254 /// \p Task identifies the partition number in the parallel code generation
255 /// process. \p Context provides the environment settings for module operations.
256 /// \p IRFiles contains optimized bitcode module files needed for loading.
257 /// \return A unique_ptr to the loaded Module, or nullptr if loading fails.
258 LLVM_ABI std::unique_ptr<Module>
259 loadModuleForTwoRounds(BitcodeModule &OrigModule, unsigned Task,
260                        LLVMContext &Context, ArrayRef<StringRef> IRFiles);
261 
262 /// Merge the codegen data from the scratch objects \p ObjectFiles from the
263 /// first codegen round.
264 /// \return the combined hash of the merged codegen data.
265 LLVM_ABI Expected<stable_hash>
266 mergeCodeGenData(ArrayRef<StringRef> ObjectFiles);
267 
268 LLVM_ABI void warn(Error E, StringRef Whence = "");
269 LLVM_ABI void warn(Twine Message, StringRef Whence = "", StringRef Hint = "");
270 
271 } // end namespace cgdata
272 
273 namespace IndexedCGData {
274 
275 // A signature for data validation, representing "\xffcgdata\x81" in
276 // little-endian order
277 const uint64_t Magic = 0x81617461646763ff;
278 
279 enum CGDataVersion {
280   // Version 1 is the first version. This version supports the outlined
281   // hash tree.
282   Version1 = 1,
283   // Version 2 supports the stable function merging map.
284   Version2 = 2,
285   // Version 3 adds the total size of the Names in the stable function map so
286   // we can skip reading them into the memory for non-assertion builds.
287   Version3 = 3,
288   CurrentVersion = CG_DATA_INDEX_VERSION
289 };
290 const uint64_t Version = CGDataVersion::CurrentVersion;
291 
292 struct Header {
293   uint64_t Magic;
294   uint32_t Version;
295   uint32_t DataKind;
296   uint64_t OutlinedHashTreeOffset;
297   uint64_t StableFunctionMapOffset;
298 
299   // New fields should only be added at the end to ensure that the size
300   // computation is correct. The methods below need to be updated to ensure that
301   // the new field is read correctly.
302 
303   // Reads a header struct from the buffer.
304   LLVM_ABI static Expected<Header> readFromBuffer(const unsigned char *Curr);
305 };
306 
307 } // end namespace IndexedCGData
308 
309 } // end namespace llvm
310 
311 #endif // LLVM_CODEGEN_PREPARE_H
312