1*700637cbSDimitry Andric //===- CodeGenDataReader.cpp ----------------------------------------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // This file contains support for reading codegen data.
10*700637cbSDimitry Andric //
11*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
12*700637cbSDimitry Andric
13*700637cbSDimitry Andric #include "llvm/CGData/CodeGenDataReader.h"
14*700637cbSDimitry Andric #include "llvm/CGData/OutlinedHashTreeRecord.h"
15*700637cbSDimitry Andric #include "llvm/Object/ObjectFile.h"
16*700637cbSDimitry Andric #include "llvm/Support/CommandLine.h"
17*700637cbSDimitry Andric #include "llvm/Support/MemoryBuffer.h"
18*700637cbSDimitry Andric
19*700637cbSDimitry Andric #define DEBUG_TYPE "cg-data-reader"
20*700637cbSDimitry Andric
21*700637cbSDimitry Andric using namespace llvm;
22*700637cbSDimitry Andric
23*700637cbSDimitry Andric static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
24*700637cbSDimitry Andric "indexed-codegen-data-read-function-map-names", cl::init(true), cl::Hidden,
25*700637cbSDimitry Andric cl::desc("Read function map names in indexed CodeGenData. Can be "
26*700637cbSDimitry Andric "disabled to save memory and time for final consumption of the "
27*700637cbSDimitry Andric "indexed CodeGenData in production."));
28*700637cbSDimitry Andric
29*700637cbSDimitry Andric namespace llvm {
30*700637cbSDimitry Andric
31*700637cbSDimitry Andric static Expected<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename,vfs::FileSystem & FS)32*700637cbSDimitry Andric setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
33*700637cbSDimitry Andric auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
34*700637cbSDimitry Andric : FS.getBufferForFile(Filename);
35*700637cbSDimitry Andric if (std::error_code EC = BufferOrErr.getError())
36*700637cbSDimitry Andric return errorCodeToError(EC);
37*700637cbSDimitry Andric return std::move(BufferOrErr.get());
38*700637cbSDimitry Andric }
39*700637cbSDimitry Andric
mergeFromObjectFile(const object::ObjectFile * Obj,OutlinedHashTreeRecord & GlobalOutlineRecord,StableFunctionMapRecord & GlobalFunctionMapRecord,stable_hash * CombinedHash)40*700637cbSDimitry Andric Error CodeGenDataReader::mergeFromObjectFile(
41*700637cbSDimitry Andric const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
42*700637cbSDimitry Andric StableFunctionMapRecord &GlobalFunctionMapRecord,
43*700637cbSDimitry Andric stable_hash *CombinedHash) {
44*700637cbSDimitry Andric Triple TT = Obj->makeTriple();
45*700637cbSDimitry Andric auto CGOutlineName =
46*700637cbSDimitry Andric getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
47*700637cbSDimitry Andric auto CGMergeName =
48*700637cbSDimitry Andric getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
49*700637cbSDimitry Andric
50*700637cbSDimitry Andric auto processSectionContents = [&](const StringRef &Name,
51*700637cbSDimitry Andric const StringRef &Contents) {
52*700637cbSDimitry Andric if (Name != CGOutlineName && Name != CGMergeName)
53*700637cbSDimitry Andric return;
54*700637cbSDimitry Andric if (CombinedHash)
55*700637cbSDimitry Andric *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
56*700637cbSDimitry Andric auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
57*700637cbSDimitry Andric auto *EndData = Data + Contents.size();
58*700637cbSDimitry Andric // In case dealing with an executable that has concatenated cgdata,
59*700637cbSDimitry Andric // we want to merge them into a single cgdata.
60*700637cbSDimitry Andric // Although it's not a typical workflow, we support this scenario
61*700637cbSDimitry Andric // by looping over all data in the sections.
62*700637cbSDimitry Andric if (Name == CGOutlineName) {
63*700637cbSDimitry Andric while (Data != EndData) {
64*700637cbSDimitry Andric OutlinedHashTreeRecord LocalOutlineRecord;
65*700637cbSDimitry Andric LocalOutlineRecord.deserialize(Data);
66*700637cbSDimitry Andric GlobalOutlineRecord.merge(LocalOutlineRecord);
67*700637cbSDimitry Andric }
68*700637cbSDimitry Andric } else if (Name == CGMergeName) {
69*700637cbSDimitry Andric while (Data != EndData) {
70*700637cbSDimitry Andric StableFunctionMapRecord LocalFunctionMapRecord;
71*700637cbSDimitry Andric LocalFunctionMapRecord.deserialize(Data);
72*700637cbSDimitry Andric GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
73*700637cbSDimitry Andric }
74*700637cbSDimitry Andric }
75*700637cbSDimitry Andric };
76*700637cbSDimitry Andric
77*700637cbSDimitry Andric for (auto &Section : Obj->sections()) {
78*700637cbSDimitry Andric Expected<StringRef> NameOrErr = Section.getName();
79*700637cbSDimitry Andric if (!NameOrErr)
80*700637cbSDimitry Andric return NameOrErr.takeError();
81*700637cbSDimitry Andric Expected<StringRef> ContentsOrErr = Section.getContents();
82*700637cbSDimitry Andric if (!ContentsOrErr)
83*700637cbSDimitry Andric return ContentsOrErr.takeError();
84*700637cbSDimitry Andric processSectionContents(*NameOrErr, *ContentsOrErr);
85*700637cbSDimitry Andric }
86*700637cbSDimitry Andric
87*700637cbSDimitry Andric return Error::success();
88*700637cbSDimitry Andric }
89*700637cbSDimitry Andric
read()90*700637cbSDimitry Andric Error IndexedCodeGenDataReader::read() {
91*700637cbSDimitry Andric using namespace support;
92*700637cbSDimitry Andric
93*700637cbSDimitry Andric // The smallest header with the version 1 is 24 bytes.
94*700637cbSDimitry Andric // Do not update this value even with the new version of the header.
95*700637cbSDimitry Andric const unsigned MinHeaderSize = 24;
96*700637cbSDimitry Andric if (DataBuffer->getBufferSize() < MinHeaderSize)
97*700637cbSDimitry Andric return error(cgdata_error::bad_header);
98*700637cbSDimitry Andric
99*700637cbSDimitry Andric auto *Start =
100*700637cbSDimitry Andric reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
101*700637cbSDimitry Andric auto *End =
102*700637cbSDimitry Andric reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
103*700637cbSDimitry Andric if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
104*700637cbSDimitry Andric return E;
105*700637cbSDimitry Andric
106*700637cbSDimitry Andric if (hasOutlinedHashTree()) {
107*700637cbSDimitry Andric const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
108*700637cbSDimitry Andric if (Ptr >= End)
109*700637cbSDimitry Andric return error(cgdata_error::eof);
110*700637cbSDimitry Andric HashTreeRecord.deserialize(Ptr);
111*700637cbSDimitry Andric }
112*700637cbSDimitry Andric if (hasStableFunctionMap()) {
113*700637cbSDimitry Andric const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
114*700637cbSDimitry Andric if (Ptr >= End)
115*700637cbSDimitry Andric return error(cgdata_error::eof);
116*700637cbSDimitry Andric FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames);
117*700637cbSDimitry Andric }
118*700637cbSDimitry Andric
119*700637cbSDimitry Andric return success();
120*700637cbSDimitry Andric }
121*700637cbSDimitry Andric
122*700637cbSDimitry Andric Expected<std::unique_ptr<CodeGenDataReader>>
create(const Twine & Path,vfs::FileSystem & FS)123*700637cbSDimitry Andric CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
124*700637cbSDimitry Andric // Set up the buffer to read.
125*700637cbSDimitry Andric auto BufferOrError = setupMemoryBuffer(Path, FS);
126*700637cbSDimitry Andric if (Error E = BufferOrError.takeError())
127*700637cbSDimitry Andric return std::move(E);
128*700637cbSDimitry Andric return CodeGenDataReader::create(std::move(BufferOrError.get()));
129*700637cbSDimitry Andric }
130*700637cbSDimitry Andric
131*700637cbSDimitry Andric Expected<std::unique_ptr<CodeGenDataReader>>
create(std::unique_ptr<MemoryBuffer> Buffer)132*700637cbSDimitry Andric CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
133*700637cbSDimitry Andric if (Buffer->getBufferSize() == 0)
134*700637cbSDimitry Andric return make_error<CGDataError>(cgdata_error::empty_cgdata);
135*700637cbSDimitry Andric
136*700637cbSDimitry Andric std::unique_ptr<CodeGenDataReader> Reader;
137*700637cbSDimitry Andric // Create the reader.
138*700637cbSDimitry Andric if (IndexedCodeGenDataReader::hasFormat(*Buffer))
139*700637cbSDimitry Andric Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
140*700637cbSDimitry Andric else if (TextCodeGenDataReader::hasFormat(*Buffer))
141*700637cbSDimitry Andric Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
142*700637cbSDimitry Andric else
143*700637cbSDimitry Andric return make_error<CGDataError>(cgdata_error::malformed);
144*700637cbSDimitry Andric
145*700637cbSDimitry Andric // Initialize the reader and return the result.
146*700637cbSDimitry Andric if (Error E = Reader->read())
147*700637cbSDimitry Andric return std::move(E);
148*700637cbSDimitry Andric
149*700637cbSDimitry Andric return std::move(Reader);
150*700637cbSDimitry Andric }
151*700637cbSDimitry Andric
hasFormat(const MemoryBuffer & DataBuffer)152*700637cbSDimitry Andric bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
153*700637cbSDimitry Andric using namespace support;
154*700637cbSDimitry Andric if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
155*700637cbSDimitry Andric return false;
156*700637cbSDimitry Andric
157*700637cbSDimitry Andric uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
158*700637cbSDimitry Andric DataBuffer.getBufferStart());
159*700637cbSDimitry Andric // Verify that it's magical.
160*700637cbSDimitry Andric return Magic == IndexedCGData::Magic;
161*700637cbSDimitry Andric }
162*700637cbSDimitry Andric
hasFormat(const MemoryBuffer & Buffer)163*700637cbSDimitry Andric bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
164*700637cbSDimitry Andric // Verify that this really looks like plain ASCII text by checking a
165*700637cbSDimitry Andric // 'reasonable' number of characters (up to the magic size).
166*700637cbSDimitry Andric StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
167*700637cbSDimitry Andric return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
168*700637cbSDimitry Andric }
read()169*700637cbSDimitry Andric Error TextCodeGenDataReader::read() {
170*700637cbSDimitry Andric using namespace support;
171*700637cbSDimitry Andric
172*700637cbSDimitry Andric // Parse the custom header line by line.
173*700637cbSDimitry Andric for (; !Line.is_at_eof(); ++Line) {
174*700637cbSDimitry Andric // Skip empty or whitespace-only lines
175*700637cbSDimitry Andric if (Line->trim().empty())
176*700637cbSDimitry Andric continue;
177*700637cbSDimitry Andric
178*700637cbSDimitry Andric if (!Line->starts_with(":"))
179*700637cbSDimitry Andric break;
180*700637cbSDimitry Andric StringRef Str = Line->drop_front().rtrim();
181*700637cbSDimitry Andric if (Str.equals_insensitive("outlined_hash_tree"))
182*700637cbSDimitry Andric DataKind |= CGDataKind::FunctionOutlinedHashTree;
183*700637cbSDimitry Andric else if (Str.equals_insensitive("stable_function_map"))
184*700637cbSDimitry Andric DataKind |= CGDataKind::StableFunctionMergingMap;
185*700637cbSDimitry Andric else
186*700637cbSDimitry Andric return error(cgdata_error::bad_header);
187*700637cbSDimitry Andric }
188*700637cbSDimitry Andric
189*700637cbSDimitry Andric // We treat an empty header (that is a comment # only) as a valid header.
190*700637cbSDimitry Andric if (Line.is_at_eof()) {
191*700637cbSDimitry Andric if (DataKind == CGDataKind::Unknown)
192*700637cbSDimitry Andric return Error::success();
193*700637cbSDimitry Andric return error(cgdata_error::bad_header);
194*700637cbSDimitry Andric }
195*700637cbSDimitry Andric
196*700637cbSDimitry Andric // The YAML docs follow after the header.
197*700637cbSDimitry Andric const char *Pos = Line->data();
198*700637cbSDimitry Andric size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
199*700637cbSDimitry Andric reinterpret_cast<size_t>(Pos);
200*700637cbSDimitry Andric yaml::Input YOS(StringRef(Pos, Size));
201*700637cbSDimitry Andric if (hasOutlinedHashTree())
202*700637cbSDimitry Andric HashTreeRecord.deserializeYAML(YOS);
203*700637cbSDimitry Andric if (hasStableFunctionMap())
204*700637cbSDimitry Andric FunctionMapRecord.deserializeYAML(YOS);
205*700637cbSDimitry Andric
206*700637cbSDimitry Andric return Error::success();
207*700637cbSDimitry Andric }
208*700637cbSDimitry Andric } // end namespace llvm
209