xref: /freebsd/contrib/llvm-project/llvm/lib/CGData/CodeGenDataReader.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- CodeGenDataReader.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading codegen data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CGData/CodeGenDataReader.h"
14 #include "llvm/CGData/OutlinedHashTreeRecord.h"
15 #include "llvm/Object/ObjectFile.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 
19 #define DEBUG_TYPE "cg-data-reader"
20 
21 using namespace llvm;
22 
23 static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
24     "indexed-codegen-data-read-function-map-names", cl::init(true), cl::Hidden,
25     cl::desc("Read function map names in indexed CodeGenData. Can be "
26              "disabled to save memory and time for final consumption of the "
27              "indexed CodeGenData in production."));
28 
29 namespace llvm {
30 
31 static Expected<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename,vfs::FileSystem & FS)32 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
33   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
34                                            : FS.getBufferForFile(Filename);
35   if (std::error_code EC = BufferOrErr.getError())
36     return errorCodeToError(EC);
37   return std::move(BufferOrErr.get());
38 }
39 
mergeFromObjectFile(const object::ObjectFile * Obj,OutlinedHashTreeRecord & GlobalOutlineRecord,StableFunctionMapRecord & GlobalFunctionMapRecord,stable_hash * CombinedHash)40 Error CodeGenDataReader::mergeFromObjectFile(
41     const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
42     StableFunctionMapRecord &GlobalFunctionMapRecord,
43     stable_hash *CombinedHash) {
44   Triple TT = Obj->makeTriple();
45   auto CGOutlineName =
46       getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
47   auto CGMergeName =
48       getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
49 
50   auto processSectionContents = [&](const StringRef &Name,
51                                     const StringRef &Contents) {
52     if (Name != CGOutlineName && Name != CGMergeName)
53       return;
54     if (CombinedHash)
55       *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
56     auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
57     auto *EndData = Data + Contents.size();
58     // In case dealing with an executable that has concatenated cgdata,
59     // we want to merge them into a single cgdata.
60     // Although it's not a typical workflow, we support this scenario
61     // by looping over all data in the sections.
62     if (Name == CGOutlineName) {
63       while (Data != EndData) {
64         OutlinedHashTreeRecord LocalOutlineRecord;
65         LocalOutlineRecord.deserialize(Data);
66         GlobalOutlineRecord.merge(LocalOutlineRecord);
67       }
68     } else if (Name == CGMergeName) {
69       while (Data != EndData) {
70         StableFunctionMapRecord LocalFunctionMapRecord;
71         LocalFunctionMapRecord.deserialize(Data);
72         GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
73       }
74     }
75   };
76 
77   for (auto &Section : Obj->sections()) {
78     Expected<StringRef> NameOrErr = Section.getName();
79     if (!NameOrErr)
80       return NameOrErr.takeError();
81     Expected<StringRef> ContentsOrErr = Section.getContents();
82     if (!ContentsOrErr)
83       return ContentsOrErr.takeError();
84     processSectionContents(*NameOrErr, *ContentsOrErr);
85   }
86 
87   return Error::success();
88 }
89 
read()90 Error IndexedCodeGenDataReader::read() {
91   using namespace support;
92 
93   // The smallest header with the version 1 is 24 bytes.
94   // Do not update this value even with the new version of the header.
95   const unsigned MinHeaderSize = 24;
96   if (DataBuffer->getBufferSize() < MinHeaderSize)
97     return error(cgdata_error::bad_header);
98 
99   auto *Start =
100       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
101   auto *End =
102       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
103   if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
104     return E;
105 
106   if (hasOutlinedHashTree()) {
107     const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
108     if (Ptr >= End)
109       return error(cgdata_error::eof);
110     HashTreeRecord.deserialize(Ptr);
111   }
112   if (hasStableFunctionMap()) {
113     const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
114     if (Ptr >= End)
115       return error(cgdata_error::eof);
116     FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames);
117   }
118 
119   return success();
120 }
121 
122 Expected<std::unique_ptr<CodeGenDataReader>>
create(const Twine & Path,vfs::FileSystem & FS)123 CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
124   // Set up the buffer to read.
125   auto BufferOrError = setupMemoryBuffer(Path, FS);
126   if (Error E = BufferOrError.takeError())
127     return std::move(E);
128   return CodeGenDataReader::create(std::move(BufferOrError.get()));
129 }
130 
131 Expected<std::unique_ptr<CodeGenDataReader>>
create(std::unique_ptr<MemoryBuffer> Buffer)132 CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
133   if (Buffer->getBufferSize() == 0)
134     return make_error<CGDataError>(cgdata_error::empty_cgdata);
135 
136   std::unique_ptr<CodeGenDataReader> Reader;
137   // Create the reader.
138   if (IndexedCodeGenDataReader::hasFormat(*Buffer))
139     Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
140   else if (TextCodeGenDataReader::hasFormat(*Buffer))
141     Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
142   else
143     return make_error<CGDataError>(cgdata_error::malformed);
144 
145   // Initialize the reader and return the result.
146   if (Error E = Reader->read())
147     return std::move(E);
148 
149   return std::move(Reader);
150 }
151 
hasFormat(const MemoryBuffer & DataBuffer)152 bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
153   using namespace support;
154   if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
155     return false;
156 
157   uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
158       DataBuffer.getBufferStart());
159   // Verify that it's magical.
160   return Magic == IndexedCGData::Magic;
161 }
162 
hasFormat(const MemoryBuffer & Buffer)163 bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
164   // Verify that this really looks like plain ASCII text by checking a
165   // 'reasonable' number of characters (up to the magic size).
166   StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
167   return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
168 }
read()169 Error TextCodeGenDataReader::read() {
170   using namespace support;
171 
172   // Parse the custom header line by line.
173   for (; !Line.is_at_eof(); ++Line) {
174     // Skip empty or whitespace-only lines
175     if (Line->trim().empty())
176       continue;
177 
178     if (!Line->starts_with(":"))
179       break;
180     StringRef Str = Line->drop_front().rtrim();
181     if (Str.equals_insensitive("outlined_hash_tree"))
182       DataKind |= CGDataKind::FunctionOutlinedHashTree;
183     else if (Str.equals_insensitive("stable_function_map"))
184       DataKind |= CGDataKind::StableFunctionMergingMap;
185     else
186       return error(cgdata_error::bad_header);
187   }
188 
189   // We treat an empty header (that is a comment # only) as a valid header.
190   if (Line.is_at_eof()) {
191     if (DataKind == CGDataKind::Unknown)
192       return Error::success();
193     return error(cgdata_error::bad_header);
194   }
195 
196   // The YAML docs follow after the header.
197   const char *Pos = Line->data();
198   size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
199                 reinterpret_cast<size_t>(Pos);
200   yaml::Input YOS(StringRef(Pos, Size));
201   if (hasOutlinedHashTree())
202     HashTreeRecord.deserializeYAML(YOS);
203   if (hasStableFunctionMap())
204     FunctionMapRecord.deserializeYAML(YOS);
205 
206   return Error::success();
207 }
208 } // end namespace llvm
209