1 //===- GsymCreator.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 //===----------------------------------------------------------------------===// 7 8 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 9 #include "llvm/DebugInfo/GSYM/FileWriter.h" 10 #include "llvm/DebugInfo/GSYM/Header.h" 11 #include "llvm/DebugInfo/GSYM/LineTable.h" 12 #include "llvm/MC/StringTableBuilder.h" 13 #include "llvm/Support/raw_ostream.h" 14 15 #include <algorithm> 16 #include <cassert> 17 #include <functional> 18 #include <vector> 19 20 using namespace llvm; 21 using namespace gsym; 22 23 GsymCreator::GsymCreator(bool Quiet) 24 : StrTab(StringTableBuilder::ELF), Quiet(Quiet) { 25 insertFile(StringRef()); 26 } 27 28 uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { 29 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 30 llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 31 // We must insert the strings first, then call the FileEntry constructor. 32 // If we inline the insertString() function call into the constructor, the 33 // call order is undefined due to parameter lists not having any ordering 34 // requirements. 35 const uint32_t Dir = insertString(directory); 36 const uint32_t Base = insertString(filename); 37 FileEntry FE(Dir, Base); 38 39 std::lock_guard<std::mutex> Guard(Mutex); 40 const auto NextIndex = Files.size(); 41 // Find FE in hash map and insert if not present. 42 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 43 if (R.second) 44 Files.emplace_back(FE); 45 return R.first->second; 46 } 47 48 llvm::Error GsymCreator::save(StringRef Path, 49 llvm::support::endianness ByteOrder) const { 50 std::error_code EC; 51 raw_fd_ostream OutStrm(Path, EC); 52 if (EC) 53 return llvm::errorCodeToError(EC); 54 FileWriter O(OutStrm, ByteOrder); 55 return encode(O); 56 } 57 58 llvm::Error GsymCreator::encode(FileWriter &O) const { 59 std::lock_guard<std::mutex> Guard(Mutex); 60 if (Funcs.empty()) 61 return createStringError(std::errc::invalid_argument, 62 "no functions to encode"); 63 if (!Finalized) 64 return createStringError(std::errc::invalid_argument, 65 "GsymCreator wasn't finalized prior to encoding"); 66 67 if (Funcs.size() > UINT32_MAX) 68 return createStringError(std::errc::invalid_argument, 69 "too many FunctionInfos"); 70 71 const uint64_t MinAddr = 72 BaseAddress ? *BaseAddress : Funcs.front().startAddress(); 73 const uint64_t MaxAddr = Funcs.back().startAddress(); 74 const uint64_t AddrDelta = MaxAddr - MinAddr; 75 Header Hdr; 76 Hdr.Magic = GSYM_MAGIC; 77 Hdr.Version = GSYM_VERSION; 78 Hdr.AddrOffSize = 0; 79 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 80 Hdr.BaseAddress = MinAddr; 81 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 82 Hdr.StrtabOffset = 0; // We will fix this up later. 83 Hdr.StrtabSize = 0; // We will fix this up later. 84 memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 85 if (UUID.size() > sizeof(Hdr.UUID)) 86 return createStringError(std::errc::invalid_argument, 87 "invalid UUID size %u", (uint32_t)UUID.size()); 88 // Set the address offset size correctly in the GSYM header. 89 if (AddrDelta <= UINT8_MAX) 90 Hdr.AddrOffSize = 1; 91 else if (AddrDelta <= UINT16_MAX) 92 Hdr.AddrOffSize = 2; 93 else if (AddrDelta <= UINT32_MAX) 94 Hdr.AddrOffSize = 4; 95 else 96 Hdr.AddrOffSize = 8; 97 // Copy the UUID value if we have one. 98 if (UUID.size() > 0) 99 memcpy(Hdr.UUID, UUID.data(), UUID.size()); 100 // Write out the header. 101 llvm::Error Err = Hdr.encode(O); 102 if (Err) 103 return Err; 104 105 // Write out the address offsets. 106 O.alignTo(Hdr.AddrOffSize); 107 for (const auto &FuncInfo : Funcs) { 108 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 109 switch (Hdr.AddrOffSize) { 110 case 1: 111 O.writeU8(static_cast<uint8_t>(AddrOffset)); 112 break; 113 case 2: 114 O.writeU16(static_cast<uint16_t>(AddrOffset)); 115 break; 116 case 4: 117 O.writeU32(static_cast<uint32_t>(AddrOffset)); 118 break; 119 case 8: 120 O.writeU64(AddrOffset); 121 break; 122 } 123 } 124 125 // Write out all zeros for the AddrInfoOffsets. 126 O.alignTo(4); 127 const off_t AddrInfoOffsetsOffset = O.tell(); 128 for (size_t i = 0, n = Funcs.size(); i < n; ++i) 129 O.writeU32(0); 130 131 // Write out the file table 132 O.alignTo(4); 133 assert(!Files.empty()); 134 assert(Files[0].Dir == 0); 135 assert(Files[0].Base == 0); 136 size_t NumFiles = Files.size(); 137 if (NumFiles > UINT32_MAX) 138 return createStringError(std::errc::invalid_argument, "too many files"); 139 O.writeU32(static_cast<uint32_t>(NumFiles)); 140 for (auto File : Files) { 141 O.writeU32(File.Dir); 142 O.writeU32(File.Base); 143 } 144 145 // Write out the sting table. 146 const off_t StrtabOffset = O.tell(); 147 StrTab.write(O.get_stream()); 148 const off_t StrtabSize = O.tell() - StrtabOffset; 149 std::vector<uint32_t> AddrInfoOffsets; 150 151 // Write out the address infos for each function info. 152 for (const auto &FuncInfo : Funcs) { 153 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 154 AddrInfoOffsets.push_back(OffsetOrErr.get()); 155 else 156 return OffsetOrErr.takeError(); 157 } 158 // Fixup the string table offset and size in the header 159 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 160 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 161 162 // Fixup all address info offsets 163 uint64_t Offset = 0; 164 for (auto AddrInfoOffset : AddrInfoOffsets) { 165 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 166 Offset += 4; 167 } 168 return ErrorSuccess(); 169 } 170 171 // Similar to std::remove_if, but the predicate is binary and it is passed both 172 // the previous and the current element. 173 template <class ForwardIt, class BinaryPredicate> 174 static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt, 175 BinaryPredicate Pred) { 176 if (FirstIt != LastIt) { 177 auto PrevIt = FirstIt++; 178 FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) { 179 return Pred(*PrevIt++, Curr); 180 }); 181 if (FirstIt != LastIt) 182 for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;) 183 if (!Pred(*PrevIt, *CurrIt)) { 184 PrevIt = FirstIt; 185 *FirstIt++ = std::move(*CurrIt); 186 } 187 } 188 return FirstIt; 189 } 190 191 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { 192 std::lock_guard<std::mutex> Guard(Mutex); 193 if (Finalized) 194 return createStringError(std::errc::invalid_argument, "already finalized"); 195 Finalized = true; 196 197 // Sort function infos so we can emit sorted functions. 198 llvm::sort(Funcs); 199 200 // Don't let the string table indexes change by finalizing in order. 201 StrTab.finalizeInOrder(); 202 203 // Remove duplicates function infos that have both entries from debug info 204 // (DWARF or Breakpad) and entries from the SymbolTable. 205 // 206 // Also handle overlapping function. Usually there shouldn't be any, but they 207 // can and do happen in some rare cases. 208 // 209 // (a) (b) (c) 210 // ^ ^ ^ ^ 211 // |X |Y |X ^ |X 212 // | | | |Y | ^ 213 // | | | v v |Y 214 // v v v v 215 // 216 // In (a) and (b), Y is ignored and X will be reported for the full range. 217 // In (c), both functions will be included in the result and lookups for an 218 // address in the intersection will return Y because of binary search. 219 // 220 // Note that in case of (b), we cannot include Y in the result because then 221 // we wouldn't find any function for range (end of Y, end of X) 222 // with binary search 223 auto NumBefore = Funcs.size(); 224 Funcs.erase( 225 removeIfBinary(Funcs.begin(), Funcs.end(), 226 [&](const auto &Prev, const auto &Curr) { 227 // Empty ranges won't intersect, but we still need to 228 // catch the case where we have multiple symbols at the 229 // same address and coalesce them. 230 const bool ranges_equal = Prev.Range == Curr.Range; 231 if (ranges_equal || Prev.Range.intersects(Curr.Range)) { 232 // Overlapping ranges or empty identical ranges. 233 if (ranges_equal) { 234 // Same address range. Check if one is from debug 235 // info and the other is from a symbol table. If 236 // so, then keep the one with debug info. Our 237 // sorting guarantees that entries with matching 238 // address ranges that have debug info are last in 239 // the sort. 240 if (Prev == Curr) { 241 // FunctionInfo entries match exactly (range, 242 // lines, inlines) 243 244 // We used to output a warning here, but this was 245 // so frequent on some binaries, in particular 246 // when those were built with GCC, that it slowed 247 // down processing extremely. 248 return true; 249 } else { 250 if (!Prev.hasRichInfo() && Curr.hasRichInfo()) { 251 // Same address range, one with no debug info 252 // (symbol) and the next with debug info. Keep 253 // the latter. 254 return true; 255 } else { 256 if (!Quiet) { 257 OS << "warning: same address range contains " 258 "different debug " 259 << "info. Removing:\n" 260 << Prev << "\nIn favor of this one:\n" 261 << Curr << "\n"; 262 } 263 return true; 264 } 265 } 266 } else { 267 if (!Quiet) { // print warnings about overlaps 268 OS << "warning: function ranges overlap:\n" 269 << Prev << "\n" 270 << Curr << "\n"; 271 } 272 } 273 } else if (Prev.Range.size() == 0 && 274 Curr.Range.contains(Prev.Range.Start)) { 275 if (!Quiet) { 276 OS << "warning: removing symbol:\n" 277 << Prev << "\nKeeping:\n" 278 << Curr << "\n"; 279 } 280 return true; 281 } 282 283 return false; 284 }), 285 Funcs.end()); 286 287 // If our last function info entry doesn't have a size and if we have valid 288 // text ranges, we should set the size of the last entry since any search for 289 // a high address might match our last entry. By fixing up this size, we can 290 // help ensure we don't cause lookups to always return the last symbol that 291 // has no size when doing lookups. 292 if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { 293 if (auto Range = 294 ValidTextRanges->getRangeThatContains(Funcs.back().Range.Start)) { 295 Funcs.back().Range.End = Range->End; 296 } 297 } 298 OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 299 << Funcs.size() << " total\n"; 300 return Error::success(); 301 } 302 303 uint32_t GsymCreator::insertString(StringRef S, bool Copy) { 304 if (S.empty()) 305 return 0; 306 307 // The hash can be calculated outside the lock. 308 CachedHashStringRef CHStr(S); 309 std::lock_guard<std::mutex> Guard(Mutex); 310 if (Copy) { 311 // We need to provide backing storage for the string if requested 312 // since StringTableBuilder stores references to strings. Any string 313 // that comes from a section in an object file doesn't need to be 314 // copied, but any string created by code will need to be copied. 315 // This allows GsymCreator to be really fast when parsing DWARF and 316 // other object files as most strings don't need to be copied. 317 if (!StrTab.contains(CHStr)) 318 CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), 319 CHStr.hash()}; 320 } 321 return StrTab.add(CHStr); 322 } 323 324 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 325 std::lock_guard<std::mutex> Guard(Mutex); 326 Ranges.insert(FI.Range); 327 Funcs.emplace_back(std::move(FI)); 328 } 329 330 void GsymCreator::forEachFunctionInfo( 331 std::function<bool(FunctionInfo &)> const &Callback) { 332 std::lock_guard<std::mutex> Guard(Mutex); 333 for (auto &FI : Funcs) { 334 if (!Callback(FI)) 335 break; 336 } 337 } 338 339 void GsymCreator::forEachFunctionInfo( 340 std::function<bool(const FunctionInfo &)> const &Callback) const { 341 std::lock_guard<std::mutex> Guard(Mutex); 342 for (const auto &FI : Funcs) { 343 if (!Callback(FI)) 344 break; 345 } 346 } 347 348 size_t GsymCreator::getNumFunctionInfos() const { 349 std::lock_guard<std::mutex> Guard(Mutex); 350 return Funcs.size(); 351 } 352 353 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { 354 if (ValidTextRanges) 355 return ValidTextRanges->contains(Addr); 356 return true; // No valid text ranges has been set, so accept all ranges. 357 } 358 359 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { 360 std::lock_guard<std::mutex> Guard(Mutex); 361 return Ranges.contains(Addr); 362 } 363