1 //===- CodeGenDataReader.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading codegen data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/CGData/CodeGenDataReader.h" 14 #include "llvm/CGData/OutlinedHashTreeRecord.h" 15 #include "llvm/Object/ObjectFile.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/MemoryBuffer.h" 18 19 #define DEBUG_TYPE "cg-data-reader" 20 21 using namespace llvm; 22 23 static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames( 24 "indexed-codegen-data-read-function-map-names", cl::init(true), cl::Hidden, 25 cl::desc("Read function map names in indexed CodeGenData. Can be " 26 "disabled to save memory and time for final consumption of the " 27 "indexed CodeGenData in production.")); 28 29 namespace llvm { 30 31 static Expected<std::unique_ptr<MemoryBuffer>> 32 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { 33 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() 34 : FS.getBufferForFile(Filename); 35 if (std::error_code EC = BufferOrErr.getError()) 36 return errorCodeToError(EC); 37 return std::move(BufferOrErr.get()); 38 } 39 40 Error CodeGenDataReader::mergeFromObjectFile( 41 const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, 42 StableFunctionMapRecord &GlobalFunctionMapRecord, 43 stable_hash *CombinedHash) { 44 Triple TT = Obj->makeTriple(); 45 auto CGOutlineName = 46 getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false); 47 auto CGMergeName = 48 getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false); 49 50 auto processSectionContents = [&](const StringRef &Name, 51 const StringRef &Contents) { 52 if (Name != CGOutlineName && Name != CGMergeName) 53 return; 54 if (CombinedHash) 55 *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents)); 56 auto *Data = reinterpret_cast<const unsigned char *>(Contents.data()); 57 auto *EndData = Data + Contents.size(); 58 // In case dealing with an executable that has concatenated cgdata, 59 // we want to merge them into a single cgdata. 60 // Although it's not a typical workflow, we support this scenario 61 // by looping over all data in the sections. 62 if (Name == CGOutlineName) { 63 while (Data != EndData) { 64 OutlinedHashTreeRecord LocalOutlineRecord; 65 LocalOutlineRecord.deserialize(Data); 66 GlobalOutlineRecord.merge(LocalOutlineRecord); 67 } 68 } else if (Name == CGMergeName) { 69 while (Data != EndData) { 70 StableFunctionMapRecord LocalFunctionMapRecord; 71 LocalFunctionMapRecord.deserialize(Data); 72 GlobalFunctionMapRecord.merge(LocalFunctionMapRecord); 73 } 74 } 75 }; 76 77 for (auto &Section : Obj->sections()) { 78 Expected<StringRef> NameOrErr = Section.getName(); 79 if (!NameOrErr) 80 return NameOrErr.takeError(); 81 Expected<StringRef> ContentsOrErr = Section.getContents(); 82 if (!ContentsOrErr) 83 return ContentsOrErr.takeError(); 84 processSectionContents(*NameOrErr, *ContentsOrErr); 85 } 86 87 return Error::success(); 88 } 89 90 Error IndexedCodeGenDataReader::read() { 91 using namespace support; 92 93 // The smallest header with the version 1 is 24 bytes. 94 // Do not update this value even with the new version of the header. 95 const unsigned MinHeaderSize = 24; 96 if (DataBuffer->getBufferSize() < MinHeaderSize) 97 return error(cgdata_error::bad_header); 98 99 auto *Start = 100 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart()); 101 auto *End = 102 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd()); 103 if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header)) 104 return E; 105 106 if (hasOutlinedHashTree()) { 107 const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset; 108 if (Ptr >= End) 109 return error(cgdata_error::eof); 110 HashTreeRecord.deserialize(Ptr); 111 } 112 if (hasStableFunctionMap()) { 113 const unsigned char *Ptr = Start + Header.StableFunctionMapOffset; 114 if (Ptr >= End) 115 return error(cgdata_error::eof); 116 FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames); 117 } 118 119 return success(); 120 } 121 122 Expected<std::unique_ptr<CodeGenDataReader>> 123 CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) { 124 // Set up the buffer to read. 125 auto BufferOrError = setupMemoryBuffer(Path, FS); 126 if (Error E = BufferOrError.takeError()) 127 return std::move(E); 128 return CodeGenDataReader::create(std::move(BufferOrError.get())); 129 } 130 131 Expected<std::unique_ptr<CodeGenDataReader>> 132 CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 133 if (Buffer->getBufferSize() == 0) 134 return make_error<CGDataError>(cgdata_error::empty_cgdata); 135 136 std::unique_ptr<CodeGenDataReader> Reader; 137 // Create the reader. 138 if (IndexedCodeGenDataReader::hasFormat(*Buffer)) 139 Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer)); 140 else if (TextCodeGenDataReader::hasFormat(*Buffer)) 141 Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer)); 142 else 143 return make_error<CGDataError>(cgdata_error::malformed); 144 145 // Initialize the reader and return the result. 146 if (Error E = Reader->read()) 147 return std::move(E); 148 149 return std::move(Reader); 150 } 151 152 bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) { 153 using namespace support; 154 if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic)) 155 return false; 156 157 uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>( 158 DataBuffer.getBufferStart()); 159 // Verify that it's magical. 160 return Magic == IndexedCGData::Magic; 161 } 162 163 bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) { 164 // Verify that this really looks like plain ASCII text by checking a 165 // 'reasonable' number of characters (up to the magic size). 166 StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t)); 167 return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); }); 168 } 169 Error TextCodeGenDataReader::read() { 170 using namespace support; 171 172 // Parse the custom header line by line. 173 for (; !Line.is_at_eof(); ++Line) { 174 // Skip empty or whitespace-only lines 175 if (Line->trim().empty()) 176 continue; 177 178 if (!Line->starts_with(":")) 179 break; 180 StringRef Str = Line->drop_front().rtrim(); 181 if (Str.equals_insensitive("outlined_hash_tree")) 182 DataKind |= CGDataKind::FunctionOutlinedHashTree; 183 else if (Str.equals_insensitive("stable_function_map")) 184 DataKind |= CGDataKind::StableFunctionMergingMap; 185 else 186 return error(cgdata_error::bad_header); 187 } 188 189 // We treat an empty header (that is a comment # only) as a valid header. 190 if (Line.is_at_eof()) { 191 if (DataKind == CGDataKind::Unknown) 192 return Error::success(); 193 return error(cgdata_error::bad_header); 194 } 195 196 // The YAML docs follow after the header. 197 const char *Pos = Line->data(); 198 size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) - 199 reinterpret_cast<size_t>(Pos); 200 yaml::Input YOS(StringRef(Pos, Size)); 201 if (hasOutlinedHashTree()) 202 HashTreeRecord.deserializeYAML(YOS); 203 if (hasStableFunctionMap()) 204 FunctionMapRecord.deserializeYAML(YOS); 205 206 return Error::success(); 207 } 208 } // end namespace llvm 209