xref: /freebsd/contrib/llvm-project/llvm/lib/CGData/CodeGenDataReader.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //===- CodeGenDataReader.cpp ----------------------------------------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // This file contains support for reading codegen data.
10*700637cbSDimitry Andric //
11*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
12*700637cbSDimitry Andric 
13*700637cbSDimitry Andric #include "llvm/CGData/CodeGenDataReader.h"
14*700637cbSDimitry Andric #include "llvm/CGData/OutlinedHashTreeRecord.h"
15*700637cbSDimitry Andric #include "llvm/Object/ObjectFile.h"
16*700637cbSDimitry Andric #include "llvm/Support/CommandLine.h"
17*700637cbSDimitry Andric #include "llvm/Support/MemoryBuffer.h"
18*700637cbSDimitry Andric 
19*700637cbSDimitry Andric #define DEBUG_TYPE "cg-data-reader"
20*700637cbSDimitry Andric 
21*700637cbSDimitry Andric using namespace llvm;
22*700637cbSDimitry Andric 
23*700637cbSDimitry Andric static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
24*700637cbSDimitry Andric     "indexed-codegen-data-read-function-map-names", cl::init(true), cl::Hidden,
25*700637cbSDimitry Andric     cl::desc("Read function map names in indexed CodeGenData. Can be "
26*700637cbSDimitry Andric              "disabled to save memory and time for final consumption of the "
27*700637cbSDimitry Andric              "indexed CodeGenData in production."));
28*700637cbSDimitry Andric 
29*700637cbSDimitry Andric namespace llvm {
30*700637cbSDimitry Andric 
31*700637cbSDimitry Andric static Expected<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename,vfs::FileSystem & FS)32*700637cbSDimitry Andric setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
33*700637cbSDimitry Andric   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
34*700637cbSDimitry Andric                                            : FS.getBufferForFile(Filename);
35*700637cbSDimitry Andric   if (std::error_code EC = BufferOrErr.getError())
36*700637cbSDimitry Andric     return errorCodeToError(EC);
37*700637cbSDimitry Andric   return std::move(BufferOrErr.get());
38*700637cbSDimitry Andric }
39*700637cbSDimitry Andric 
mergeFromObjectFile(const object::ObjectFile * Obj,OutlinedHashTreeRecord & GlobalOutlineRecord,StableFunctionMapRecord & GlobalFunctionMapRecord,stable_hash * CombinedHash)40*700637cbSDimitry Andric Error CodeGenDataReader::mergeFromObjectFile(
41*700637cbSDimitry Andric     const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
42*700637cbSDimitry Andric     StableFunctionMapRecord &GlobalFunctionMapRecord,
43*700637cbSDimitry Andric     stable_hash *CombinedHash) {
44*700637cbSDimitry Andric   Triple TT = Obj->makeTriple();
45*700637cbSDimitry Andric   auto CGOutlineName =
46*700637cbSDimitry Andric       getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
47*700637cbSDimitry Andric   auto CGMergeName =
48*700637cbSDimitry Andric       getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
49*700637cbSDimitry Andric 
50*700637cbSDimitry Andric   auto processSectionContents = [&](const StringRef &Name,
51*700637cbSDimitry Andric                                     const StringRef &Contents) {
52*700637cbSDimitry Andric     if (Name != CGOutlineName && Name != CGMergeName)
53*700637cbSDimitry Andric       return;
54*700637cbSDimitry Andric     if (CombinedHash)
55*700637cbSDimitry Andric       *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
56*700637cbSDimitry Andric     auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
57*700637cbSDimitry Andric     auto *EndData = Data + Contents.size();
58*700637cbSDimitry Andric     // In case dealing with an executable that has concatenated cgdata,
59*700637cbSDimitry Andric     // we want to merge them into a single cgdata.
60*700637cbSDimitry Andric     // Although it's not a typical workflow, we support this scenario
61*700637cbSDimitry Andric     // by looping over all data in the sections.
62*700637cbSDimitry Andric     if (Name == CGOutlineName) {
63*700637cbSDimitry Andric       while (Data != EndData) {
64*700637cbSDimitry Andric         OutlinedHashTreeRecord LocalOutlineRecord;
65*700637cbSDimitry Andric         LocalOutlineRecord.deserialize(Data);
66*700637cbSDimitry Andric         GlobalOutlineRecord.merge(LocalOutlineRecord);
67*700637cbSDimitry Andric       }
68*700637cbSDimitry Andric     } else if (Name == CGMergeName) {
69*700637cbSDimitry Andric       while (Data != EndData) {
70*700637cbSDimitry Andric         StableFunctionMapRecord LocalFunctionMapRecord;
71*700637cbSDimitry Andric         LocalFunctionMapRecord.deserialize(Data);
72*700637cbSDimitry Andric         GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
73*700637cbSDimitry Andric       }
74*700637cbSDimitry Andric     }
75*700637cbSDimitry Andric   };
76*700637cbSDimitry Andric 
77*700637cbSDimitry Andric   for (auto &Section : Obj->sections()) {
78*700637cbSDimitry Andric     Expected<StringRef> NameOrErr = Section.getName();
79*700637cbSDimitry Andric     if (!NameOrErr)
80*700637cbSDimitry Andric       return NameOrErr.takeError();
81*700637cbSDimitry Andric     Expected<StringRef> ContentsOrErr = Section.getContents();
82*700637cbSDimitry Andric     if (!ContentsOrErr)
83*700637cbSDimitry Andric       return ContentsOrErr.takeError();
84*700637cbSDimitry Andric     processSectionContents(*NameOrErr, *ContentsOrErr);
85*700637cbSDimitry Andric   }
86*700637cbSDimitry Andric 
87*700637cbSDimitry Andric   return Error::success();
88*700637cbSDimitry Andric }
89*700637cbSDimitry Andric 
read()90*700637cbSDimitry Andric Error IndexedCodeGenDataReader::read() {
91*700637cbSDimitry Andric   using namespace support;
92*700637cbSDimitry Andric 
93*700637cbSDimitry Andric   // The smallest header with the version 1 is 24 bytes.
94*700637cbSDimitry Andric   // Do not update this value even with the new version of the header.
95*700637cbSDimitry Andric   const unsigned MinHeaderSize = 24;
96*700637cbSDimitry Andric   if (DataBuffer->getBufferSize() < MinHeaderSize)
97*700637cbSDimitry Andric     return error(cgdata_error::bad_header);
98*700637cbSDimitry Andric 
99*700637cbSDimitry Andric   auto *Start =
100*700637cbSDimitry Andric       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
101*700637cbSDimitry Andric   auto *End =
102*700637cbSDimitry Andric       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
103*700637cbSDimitry Andric   if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
104*700637cbSDimitry Andric     return E;
105*700637cbSDimitry Andric 
106*700637cbSDimitry Andric   if (hasOutlinedHashTree()) {
107*700637cbSDimitry Andric     const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
108*700637cbSDimitry Andric     if (Ptr >= End)
109*700637cbSDimitry Andric       return error(cgdata_error::eof);
110*700637cbSDimitry Andric     HashTreeRecord.deserialize(Ptr);
111*700637cbSDimitry Andric   }
112*700637cbSDimitry Andric   if (hasStableFunctionMap()) {
113*700637cbSDimitry Andric     const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
114*700637cbSDimitry Andric     if (Ptr >= End)
115*700637cbSDimitry Andric       return error(cgdata_error::eof);
116*700637cbSDimitry Andric     FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames);
117*700637cbSDimitry Andric   }
118*700637cbSDimitry Andric 
119*700637cbSDimitry Andric   return success();
120*700637cbSDimitry Andric }
121*700637cbSDimitry Andric 
122*700637cbSDimitry Andric Expected<std::unique_ptr<CodeGenDataReader>>
create(const Twine & Path,vfs::FileSystem & FS)123*700637cbSDimitry Andric CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
124*700637cbSDimitry Andric   // Set up the buffer to read.
125*700637cbSDimitry Andric   auto BufferOrError = setupMemoryBuffer(Path, FS);
126*700637cbSDimitry Andric   if (Error E = BufferOrError.takeError())
127*700637cbSDimitry Andric     return std::move(E);
128*700637cbSDimitry Andric   return CodeGenDataReader::create(std::move(BufferOrError.get()));
129*700637cbSDimitry Andric }
130*700637cbSDimitry Andric 
131*700637cbSDimitry Andric Expected<std::unique_ptr<CodeGenDataReader>>
create(std::unique_ptr<MemoryBuffer> Buffer)132*700637cbSDimitry Andric CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
133*700637cbSDimitry Andric   if (Buffer->getBufferSize() == 0)
134*700637cbSDimitry Andric     return make_error<CGDataError>(cgdata_error::empty_cgdata);
135*700637cbSDimitry Andric 
136*700637cbSDimitry Andric   std::unique_ptr<CodeGenDataReader> Reader;
137*700637cbSDimitry Andric   // Create the reader.
138*700637cbSDimitry Andric   if (IndexedCodeGenDataReader::hasFormat(*Buffer))
139*700637cbSDimitry Andric     Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
140*700637cbSDimitry Andric   else if (TextCodeGenDataReader::hasFormat(*Buffer))
141*700637cbSDimitry Andric     Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
142*700637cbSDimitry Andric   else
143*700637cbSDimitry Andric     return make_error<CGDataError>(cgdata_error::malformed);
144*700637cbSDimitry Andric 
145*700637cbSDimitry Andric   // Initialize the reader and return the result.
146*700637cbSDimitry Andric   if (Error E = Reader->read())
147*700637cbSDimitry Andric     return std::move(E);
148*700637cbSDimitry Andric 
149*700637cbSDimitry Andric   return std::move(Reader);
150*700637cbSDimitry Andric }
151*700637cbSDimitry Andric 
hasFormat(const MemoryBuffer & DataBuffer)152*700637cbSDimitry Andric bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
153*700637cbSDimitry Andric   using namespace support;
154*700637cbSDimitry Andric   if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
155*700637cbSDimitry Andric     return false;
156*700637cbSDimitry Andric 
157*700637cbSDimitry Andric   uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
158*700637cbSDimitry Andric       DataBuffer.getBufferStart());
159*700637cbSDimitry Andric   // Verify that it's magical.
160*700637cbSDimitry Andric   return Magic == IndexedCGData::Magic;
161*700637cbSDimitry Andric }
162*700637cbSDimitry Andric 
hasFormat(const MemoryBuffer & Buffer)163*700637cbSDimitry Andric bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
164*700637cbSDimitry Andric   // Verify that this really looks like plain ASCII text by checking a
165*700637cbSDimitry Andric   // 'reasonable' number of characters (up to the magic size).
166*700637cbSDimitry Andric   StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
167*700637cbSDimitry Andric   return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
168*700637cbSDimitry Andric }
read()169*700637cbSDimitry Andric Error TextCodeGenDataReader::read() {
170*700637cbSDimitry Andric   using namespace support;
171*700637cbSDimitry Andric 
172*700637cbSDimitry Andric   // Parse the custom header line by line.
173*700637cbSDimitry Andric   for (; !Line.is_at_eof(); ++Line) {
174*700637cbSDimitry Andric     // Skip empty or whitespace-only lines
175*700637cbSDimitry Andric     if (Line->trim().empty())
176*700637cbSDimitry Andric       continue;
177*700637cbSDimitry Andric 
178*700637cbSDimitry Andric     if (!Line->starts_with(":"))
179*700637cbSDimitry Andric       break;
180*700637cbSDimitry Andric     StringRef Str = Line->drop_front().rtrim();
181*700637cbSDimitry Andric     if (Str.equals_insensitive("outlined_hash_tree"))
182*700637cbSDimitry Andric       DataKind |= CGDataKind::FunctionOutlinedHashTree;
183*700637cbSDimitry Andric     else if (Str.equals_insensitive("stable_function_map"))
184*700637cbSDimitry Andric       DataKind |= CGDataKind::StableFunctionMergingMap;
185*700637cbSDimitry Andric     else
186*700637cbSDimitry Andric       return error(cgdata_error::bad_header);
187*700637cbSDimitry Andric   }
188*700637cbSDimitry Andric 
189*700637cbSDimitry Andric   // We treat an empty header (that is a comment # only) as a valid header.
190*700637cbSDimitry Andric   if (Line.is_at_eof()) {
191*700637cbSDimitry Andric     if (DataKind == CGDataKind::Unknown)
192*700637cbSDimitry Andric       return Error::success();
193*700637cbSDimitry Andric     return error(cgdata_error::bad_header);
194*700637cbSDimitry Andric   }
195*700637cbSDimitry Andric 
196*700637cbSDimitry Andric   // The YAML docs follow after the header.
197*700637cbSDimitry Andric   const char *Pos = Line->data();
198*700637cbSDimitry Andric   size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
199*700637cbSDimitry Andric                 reinterpret_cast<size_t>(Pos);
200*700637cbSDimitry Andric   yaml::Input YOS(StringRef(Pos, Size));
201*700637cbSDimitry Andric   if (hasOutlinedHashTree())
202*700637cbSDimitry Andric     HashTreeRecord.deserializeYAML(YOS);
203*700637cbSDimitry Andric   if (hasStableFunctionMap())
204*700637cbSDimitry Andric     FunctionMapRecord.deserializeYAML(YOS);
205*700637cbSDimitry Andric 
206*700637cbSDimitry Andric   return Error::success();
207*700637cbSDimitry Andric }
208*700637cbSDimitry Andric } // end namespace llvm
209