1 //===- GsymCreator.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 //===----------------------------------------------------------------------===// 7 8 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 9 #include "llvm/DebugInfo/GSYM/FileWriter.h" 10 #include "llvm/DebugInfo/GSYM/Header.h" 11 #include "llvm/DebugInfo/GSYM/LineTable.h" 12 #include "llvm/MC/StringTableBuilder.h" 13 #include "llvm/Support/raw_ostream.h" 14 15 #include <algorithm> 16 #include <cassert> 17 #include <functional> 18 #include <vector> 19 20 using namespace llvm; 21 using namespace gsym; 22 23 24 GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { 25 insertFile(StringRef()); 26 } 27 28 uint32_t GsymCreator::insertFile(StringRef Path, 29 llvm::sys::path::Style Style) { 30 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 31 llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 32 FileEntry FE(insertString(directory), insertString(filename)); 33 34 std::lock_guard<std::recursive_mutex> Guard(Mutex); 35 const auto NextIndex = Files.size(); 36 // Find FE in hash map and insert if not present. 37 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 38 if (R.second) 39 Files.emplace_back(FE); 40 return R.first->second; 41 } 42 43 llvm::Error GsymCreator::save(StringRef Path, 44 llvm::support::endianness ByteOrder) const { 45 std::error_code EC; 46 raw_fd_ostream OutStrm(Path, EC); 47 if (EC) 48 return llvm::errorCodeToError(EC); 49 FileWriter O(OutStrm, ByteOrder); 50 return encode(O); 51 } 52 53 llvm::Error GsymCreator::encode(FileWriter &O) const { 54 std::lock_guard<std::recursive_mutex> Guard(Mutex); 55 if (Funcs.empty()) 56 return createStringError(std::errc::invalid_argument, 57 "no functions to encode"); 58 if (!Finalized) 59 return createStringError(std::errc::invalid_argument, 60 "GsymCreator wasn't finalized prior to encoding"); 61 62 if (Funcs.size() > UINT32_MAX) 63 return createStringError(std::errc::invalid_argument, 64 "too many FunctionInfos"); 65 const uint64_t MinAddr = Funcs.front().startAddress(); 66 const uint64_t MaxAddr = Funcs.back().startAddress(); 67 const uint64_t AddrDelta = MaxAddr - MinAddr; 68 Header Hdr; 69 Hdr.Magic = GSYM_MAGIC; 70 Hdr.Version = GSYM_VERSION; 71 Hdr.AddrOffSize = 0; 72 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 73 Hdr.BaseAddress = MinAddr; 74 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 75 Hdr.StrtabOffset = 0; // We will fix this up later. 76 Hdr.StrtabOffset = 0; // We will fix this up later. 77 memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 78 if (UUID.size() > sizeof(Hdr.UUID)) 79 return createStringError(std::errc::invalid_argument, 80 "invalid UUID size %u", (uint32_t)UUID.size()); 81 // Set the address offset size correctly in the GSYM header. 82 if (AddrDelta <= UINT8_MAX) 83 Hdr.AddrOffSize = 1; 84 else if (AddrDelta <= UINT16_MAX) 85 Hdr.AddrOffSize = 2; 86 else if (AddrDelta <= UINT32_MAX) 87 Hdr.AddrOffSize = 4; 88 else 89 Hdr.AddrOffSize = 8; 90 // Copy the UUID value if we have one. 91 if (UUID.size() > 0) 92 memcpy(Hdr.UUID, UUID.data(), UUID.size()); 93 // Write out the header. 94 llvm::Error Err = Hdr.encode(O); 95 if (Err) 96 return Err; 97 98 // Write out the address offsets. 99 O.alignTo(Hdr.AddrOffSize); 100 for (const auto &FuncInfo : Funcs) { 101 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 102 switch(Hdr.AddrOffSize) { 103 case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break; 104 case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break; 105 case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break; 106 case 8: O.writeU64(AddrOffset); break; 107 } 108 } 109 110 // Write out all zeros for the AddrInfoOffsets. 111 O.alignTo(4); 112 const off_t AddrInfoOffsetsOffset = O.tell(); 113 for (size_t i = 0, n = Funcs.size(); i < n; ++i) 114 O.writeU32(0); 115 116 // Write out the file table 117 O.alignTo(4); 118 assert(!Files.empty()); 119 assert(Files[0].Dir == 0); 120 assert(Files[0].Base == 0); 121 size_t NumFiles = Files.size(); 122 if (NumFiles > UINT32_MAX) 123 return createStringError(std::errc::invalid_argument, 124 "too many files"); 125 O.writeU32(static_cast<uint32_t>(NumFiles)); 126 for (auto File: Files) { 127 O.writeU32(File.Dir); 128 O.writeU32(File.Base); 129 } 130 131 // Write out the sting table. 132 const off_t StrtabOffset = O.tell(); 133 StrTab.write(O.get_stream()); 134 const off_t StrtabSize = O.tell() - StrtabOffset; 135 std::vector<uint32_t> AddrInfoOffsets; 136 137 // Write out the address infos for each function info. 138 for (const auto &FuncInfo : Funcs) { 139 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 140 AddrInfoOffsets.push_back(OffsetOrErr.get()); 141 else 142 return OffsetOrErr.takeError(); 143 } 144 // Fixup the string table offset and size in the header 145 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 146 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 147 148 // Fixup all address info offsets 149 uint64_t Offset = 0; 150 for (auto AddrInfoOffset: AddrInfoOffsets) { 151 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 152 Offset += 4; 153 } 154 return ErrorSuccess(); 155 } 156 157 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { 158 std::lock_guard<std::recursive_mutex> Guard(Mutex); 159 if (Finalized) 160 return createStringError(std::errc::invalid_argument, 161 "already finalized"); 162 Finalized = true; 163 164 // Sort function infos so we can emit sorted functions. 165 llvm::sort(Funcs.begin(), Funcs.end()); 166 167 // Don't let the string table indexes change by finalizing in order. 168 StrTab.finalizeInOrder(); 169 170 // Remove duplicates function infos that have both entries from debug info 171 // (DWARF or Breakpad) and entries from the SymbolTable. 172 // 173 // Also handle overlapping function. Usually there shouldn't be any, but they 174 // can and do happen in some rare cases. 175 // 176 // (a) (b) (c) 177 // ^ ^ ^ ^ 178 // |X |Y |X ^ |X 179 // | | | |Y | ^ 180 // | | | v v |Y 181 // v v v v 182 // 183 // In (a) and (b), Y is ignored and X will be reported for the full range. 184 // In (c), both functions will be included in the result and lookups for an 185 // address in the intersection will return Y because of binary search. 186 // 187 // Note that in case of (b), we cannot include Y in the result because then 188 // we wouldn't find any function for range (end of Y, end of X) 189 // with binary search 190 auto NumBefore = Funcs.size(); 191 auto Curr = Funcs.begin(); 192 auto Prev = Funcs.end(); 193 while (Curr != Funcs.end()) { 194 // Can't check for overlaps or same address ranges if we don't have a 195 // previous entry 196 if (Prev != Funcs.end()) { 197 if (Prev->Range.intersects(Curr->Range)) { 198 // Overlapping address ranges. 199 if (Prev->Range == Curr->Range) { 200 // Same address range. Check if one is from debug info and the other 201 // is from a symbol table. If so, then keep the one with debug info. 202 // Our sorting guarantees that entries with matching address ranges 203 // that have debug info are last in the sort. 204 if (*Prev == *Curr) { 205 // FunctionInfo entries match exactly (range, lines, inlines) 206 OS << "warning: duplicate function info entries, removing " 207 "duplicate:\n" 208 << *Curr << '\n'; 209 Curr = Funcs.erase(Prev); 210 } else { 211 if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { 212 // Same address range, one with no debug info (symbol) and the 213 // next with debug info. Keep the latter. 214 Curr = Funcs.erase(Prev); 215 } else { 216 OS << "warning: same address range contains different debug " 217 << "info. Removing:\n" 218 << *Prev << "\nIn favor of this one:\n" 219 << *Curr << "\n"; 220 Curr = Funcs.erase(Prev); 221 } 222 } 223 } else { 224 // print warnings about overlaps 225 OS << "warning: function ranges overlap:\n" 226 << *Prev << "\n" 227 << *Curr << "\n"; 228 } 229 } else if (Prev->Range.size() == 0 && 230 Curr->Range.contains(Prev->Range.Start)) { 231 OS << "warning: removing symbol:\n" 232 << *Prev << "\nKeeping:\n" 233 << *Curr << "\n"; 234 Curr = Funcs.erase(Prev); 235 } 236 } 237 if (Curr == Funcs.end()) 238 break; 239 Prev = Curr++; 240 } 241 242 OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 243 << Funcs.size() << " total\n"; 244 return Error::success(); 245 } 246 247 uint32_t GsymCreator::insertString(StringRef S) { 248 std::lock_guard<std::recursive_mutex> Guard(Mutex); 249 if (S.empty()) 250 return 0; 251 return StrTab.add(S); 252 } 253 254 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 255 std::lock_guard<std::recursive_mutex> Guard(Mutex); 256 Funcs.emplace_back(FI); 257 } 258 259 void GsymCreator::forEachFunctionInfo( 260 std::function<bool(FunctionInfo &)> const &Callback) { 261 std::lock_guard<std::recursive_mutex> Guard(Mutex); 262 for (auto &FI : Funcs) { 263 if (!Callback(FI)) 264 break; 265 } 266 } 267 268 void GsymCreator::forEachFunctionInfo( 269 std::function<bool(const FunctionInfo &)> const &Callback) const { 270 std::lock_guard<std::recursive_mutex> Guard(Mutex); 271 for (const auto &FI : Funcs) { 272 if (!Callback(FI)) 273 break; 274 } 275 } 276