1 //===- CodeGenDataReader.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading codegen data.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/CGData/CodeGenDataReader.h"
14 #include "llvm/CGData/OutlinedHashTreeRecord.h"
15 #include "llvm/Object/ObjectFile.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/MemoryBuffer.h"
18
19 #define DEBUG_TYPE "cg-data-reader"
20
21 using namespace llvm;
22
23 static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
24 "indexed-codegen-data-read-function-map-names", cl::init(true), cl::Hidden,
25 cl::desc("Read function map names in indexed CodeGenData. Can be "
26 "disabled to save memory and time for final consumption of the "
27 "indexed CodeGenData in production."));
28
29 namespace llvm {
30
31 static Expected<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename,vfs::FileSystem & FS)32 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
33 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
34 : FS.getBufferForFile(Filename);
35 if (std::error_code EC = BufferOrErr.getError())
36 return errorCodeToError(EC);
37 return std::move(BufferOrErr.get());
38 }
39
mergeFromObjectFile(const object::ObjectFile * Obj,OutlinedHashTreeRecord & GlobalOutlineRecord,StableFunctionMapRecord & GlobalFunctionMapRecord,stable_hash * CombinedHash)40 Error CodeGenDataReader::mergeFromObjectFile(
41 const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
42 StableFunctionMapRecord &GlobalFunctionMapRecord,
43 stable_hash *CombinedHash) {
44 Triple TT = Obj->makeTriple();
45 auto CGOutlineName =
46 getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
47 auto CGMergeName =
48 getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
49
50 auto processSectionContents = [&](const StringRef &Name,
51 const StringRef &Contents) {
52 if (Name != CGOutlineName && Name != CGMergeName)
53 return;
54 if (CombinedHash)
55 *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
56 auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
57 auto *EndData = Data + Contents.size();
58 // In case dealing with an executable that has concatenated cgdata,
59 // we want to merge them into a single cgdata.
60 // Although it's not a typical workflow, we support this scenario
61 // by looping over all data in the sections.
62 if (Name == CGOutlineName) {
63 while (Data != EndData) {
64 OutlinedHashTreeRecord LocalOutlineRecord;
65 LocalOutlineRecord.deserialize(Data);
66 GlobalOutlineRecord.merge(LocalOutlineRecord);
67 }
68 } else if (Name == CGMergeName) {
69 while (Data != EndData) {
70 StableFunctionMapRecord LocalFunctionMapRecord;
71 LocalFunctionMapRecord.deserialize(Data);
72 GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
73 }
74 }
75 };
76
77 for (auto &Section : Obj->sections()) {
78 Expected<StringRef> NameOrErr = Section.getName();
79 if (!NameOrErr)
80 return NameOrErr.takeError();
81 Expected<StringRef> ContentsOrErr = Section.getContents();
82 if (!ContentsOrErr)
83 return ContentsOrErr.takeError();
84 processSectionContents(*NameOrErr, *ContentsOrErr);
85 }
86
87 return Error::success();
88 }
89
read()90 Error IndexedCodeGenDataReader::read() {
91 using namespace support;
92
93 // The smallest header with the version 1 is 24 bytes.
94 // Do not update this value even with the new version of the header.
95 const unsigned MinHeaderSize = 24;
96 if (DataBuffer->getBufferSize() < MinHeaderSize)
97 return error(cgdata_error::bad_header);
98
99 auto *Start =
100 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
101 auto *End =
102 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
103 if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
104 return E;
105
106 if (hasOutlinedHashTree()) {
107 const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
108 if (Ptr >= End)
109 return error(cgdata_error::eof);
110 HashTreeRecord.deserialize(Ptr);
111 }
112 if (hasStableFunctionMap()) {
113 const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
114 if (Ptr >= End)
115 return error(cgdata_error::eof);
116 FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames);
117 }
118
119 return success();
120 }
121
122 Expected<std::unique_ptr<CodeGenDataReader>>
create(const Twine & Path,vfs::FileSystem & FS)123 CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
124 // Set up the buffer to read.
125 auto BufferOrError = setupMemoryBuffer(Path, FS);
126 if (Error E = BufferOrError.takeError())
127 return std::move(E);
128 return CodeGenDataReader::create(std::move(BufferOrError.get()));
129 }
130
131 Expected<std::unique_ptr<CodeGenDataReader>>
create(std::unique_ptr<MemoryBuffer> Buffer)132 CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
133 if (Buffer->getBufferSize() == 0)
134 return make_error<CGDataError>(cgdata_error::empty_cgdata);
135
136 std::unique_ptr<CodeGenDataReader> Reader;
137 // Create the reader.
138 if (IndexedCodeGenDataReader::hasFormat(*Buffer))
139 Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
140 else if (TextCodeGenDataReader::hasFormat(*Buffer))
141 Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
142 else
143 return make_error<CGDataError>(cgdata_error::malformed);
144
145 // Initialize the reader and return the result.
146 if (Error E = Reader->read())
147 return std::move(E);
148
149 return std::move(Reader);
150 }
151
hasFormat(const MemoryBuffer & DataBuffer)152 bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
153 using namespace support;
154 if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
155 return false;
156
157 uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
158 DataBuffer.getBufferStart());
159 // Verify that it's magical.
160 return Magic == IndexedCGData::Magic;
161 }
162
hasFormat(const MemoryBuffer & Buffer)163 bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
164 // Verify that this really looks like plain ASCII text by checking a
165 // 'reasonable' number of characters (up to the magic size).
166 StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
167 return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
168 }
read()169 Error TextCodeGenDataReader::read() {
170 using namespace support;
171
172 // Parse the custom header line by line.
173 for (; !Line.is_at_eof(); ++Line) {
174 // Skip empty or whitespace-only lines
175 if (Line->trim().empty())
176 continue;
177
178 if (!Line->starts_with(":"))
179 break;
180 StringRef Str = Line->drop_front().rtrim();
181 if (Str.equals_insensitive("outlined_hash_tree"))
182 DataKind |= CGDataKind::FunctionOutlinedHashTree;
183 else if (Str.equals_insensitive("stable_function_map"))
184 DataKind |= CGDataKind::StableFunctionMergingMap;
185 else
186 return error(cgdata_error::bad_header);
187 }
188
189 // We treat an empty header (that is a comment # only) as a valid header.
190 if (Line.is_at_eof()) {
191 if (DataKind == CGDataKind::Unknown)
192 return Error::success();
193 return error(cgdata_error::bad_header);
194 }
195
196 // The YAML docs follow after the header.
197 const char *Pos = Line->data();
198 size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
199 reinterpret_cast<size_t>(Pos);
200 yaml::Input YOS(StringRef(Pos, Size));
201 if (hasOutlinedHashTree())
202 HashTreeRecord.deserializeYAML(YOS);
203 if (hasStableFunctionMap())
204 FunctionMapRecord.deserializeYAML(YOS);
205
206 return Error::success();
207 }
208 } // end namespace llvm
209