1 //===- GsymCreator.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 //===----------------------------------------------------------------------===// 7 8 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 9 #include "llvm/DebugInfo/GSYM/FileWriter.h" 10 #include "llvm/DebugInfo/GSYM/Header.h" 11 #include "llvm/DebugInfo/GSYM/LineTable.h" 12 #include "llvm/MC/StringTableBuilder.h" 13 #include "llvm/Support/raw_ostream.h" 14 15 #include <algorithm> 16 #include <cassert> 17 #include <functional> 18 #include <vector> 19 20 using namespace llvm; 21 using namespace gsym; 22 23 24 GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { 25 insertFile(StringRef()); 26 } 27 28 uint32_t GsymCreator::insertFile(StringRef Path, 29 llvm::sys::path::Style Style) { 30 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 31 llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 32 // We must insert the strings first, then call the FileEntry constructor. 33 // If we inline the insertString() function call into the constructor, the 34 // call order is undefined due to parameter lists not having any ordering 35 // requirements. 36 const uint32_t Dir = insertString(directory); 37 const uint32_t Base = insertString(filename); 38 FileEntry FE(Dir, Base); 39 40 std::lock_guard<std::recursive_mutex> Guard(Mutex); 41 const auto NextIndex = Files.size(); 42 // Find FE in hash map and insert if not present. 43 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 44 if (R.second) 45 Files.emplace_back(FE); 46 return R.first->second; 47 } 48 49 llvm::Error GsymCreator::save(StringRef Path, 50 llvm::support::endianness ByteOrder) const { 51 std::error_code EC; 52 raw_fd_ostream OutStrm(Path, EC); 53 if (EC) 54 return llvm::errorCodeToError(EC); 55 FileWriter O(OutStrm, ByteOrder); 56 return encode(O); 57 } 58 59 llvm::Error GsymCreator::encode(FileWriter &O) const { 60 std::lock_guard<std::recursive_mutex> Guard(Mutex); 61 if (Funcs.empty()) 62 return createStringError(std::errc::invalid_argument, 63 "no functions to encode"); 64 if (!Finalized) 65 return createStringError(std::errc::invalid_argument, 66 "GsymCreator wasn't finalized prior to encoding"); 67 68 if (Funcs.size() > UINT32_MAX) 69 return createStringError(std::errc::invalid_argument, 70 "too many FunctionInfos"); 71 72 const uint64_t MinAddr = BaseAddress ? *BaseAddress : Funcs.front().startAddress(); 73 const uint64_t MaxAddr = Funcs.back().startAddress(); 74 const uint64_t AddrDelta = MaxAddr - MinAddr; 75 Header Hdr; 76 Hdr.Magic = GSYM_MAGIC; 77 Hdr.Version = GSYM_VERSION; 78 Hdr.AddrOffSize = 0; 79 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 80 Hdr.BaseAddress = MinAddr; 81 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 82 Hdr.StrtabOffset = 0; // We will fix this up later. 83 Hdr.StrtabSize = 0; // We will fix this up later. 84 memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 85 if (UUID.size() > sizeof(Hdr.UUID)) 86 return createStringError(std::errc::invalid_argument, 87 "invalid UUID size %u", (uint32_t)UUID.size()); 88 // Set the address offset size correctly in the GSYM header. 89 if (AddrDelta <= UINT8_MAX) 90 Hdr.AddrOffSize = 1; 91 else if (AddrDelta <= UINT16_MAX) 92 Hdr.AddrOffSize = 2; 93 else if (AddrDelta <= UINT32_MAX) 94 Hdr.AddrOffSize = 4; 95 else 96 Hdr.AddrOffSize = 8; 97 // Copy the UUID value if we have one. 98 if (UUID.size() > 0) 99 memcpy(Hdr.UUID, UUID.data(), UUID.size()); 100 // Write out the header. 101 llvm::Error Err = Hdr.encode(O); 102 if (Err) 103 return Err; 104 105 // Write out the address offsets. 106 O.alignTo(Hdr.AddrOffSize); 107 for (const auto &FuncInfo : Funcs) { 108 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 109 switch(Hdr.AddrOffSize) { 110 case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break; 111 case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break; 112 case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break; 113 case 8: O.writeU64(AddrOffset); break; 114 } 115 } 116 117 // Write out all zeros for the AddrInfoOffsets. 118 O.alignTo(4); 119 const off_t AddrInfoOffsetsOffset = O.tell(); 120 for (size_t i = 0, n = Funcs.size(); i < n; ++i) 121 O.writeU32(0); 122 123 // Write out the file table 124 O.alignTo(4); 125 assert(!Files.empty()); 126 assert(Files[0].Dir == 0); 127 assert(Files[0].Base == 0); 128 size_t NumFiles = Files.size(); 129 if (NumFiles > UINT32_MAX) 130 return createStringError(std::errc::invalid_argument, 131 "too many files"); 132 O.writeU32(static_cast<uint32_t>(NumFiles)); 133 for (auto File: Files) { 134 O.writeU32(File.Dir); 135 O.writeU32(File.Base); 136 } 137 138 // Write out the sting table. 139 const off_t StrtabOffset = O.tell(); 140 StrTab.write(O.get_stream()); 141 const off_t StrtabSize = O.tell() - StrtabOffset; 142 std::vector<uint32_t> AddrInfoOffsets; 143 144 // Write out the address infos for each function info. 145 for (const auto &FuncInfo : Funcs) { 146 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 147 AddrInfoOffsets.push_back(OffsetOrErr.get()); 148 else 149 return OffsetOrErr.takeError(); 150 } 151 // Fixup the string table offset and size in the header 152 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 153 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 154 155 // Fixup all address info offsets 156 uint64_t Offset = 0; 157 for (auto AddrInfoOffset: AddrInfoOffsets) { 158 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 159 Offset += 4; 160 } 161 return ErrorSuccess(); 162 } 163 164 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { 165 std::lock_guard<std::recursive_mutex> Guard(Mutex); 166 if (Finalized) 167 return createStringError(std::errc::invalid_argument, 168 "already finalized"); 169 Finalized = true; 170 171 // Sort function infos so we can emit sorted functions. 172 llvm::sort(Funcs); 173 174 // Don't let the string table indexes change by finalizing in order. 175 StrTab.finalizeInOrder(); 176 177 // Remove duplicates function infos that have both entries from debug info 178 // (DWARF or Breakpad) and entries from the SymbolTable. 179 // 180 // Also handle overlapping function. Usually there shouldn't be any, but they 181 // can and do happen in some rare cases. 182 // 183 // (a) (b) (c) 184 // ^ ^ ^ ^ 185 // |X |Y |X ^ |X 186 // | | | |Y | ^ 187 // | | | v v |Y 188 // v v v v 189 // 190 // In (a) and (b), Y is ignored and X will be reported for the full range. 191 // In (c), both functions will be included in the result and lookups for an 192 // address in the intersection will return Y because of binary search. 193 // 194 // Note that in case of (b), we cannot include Y in the result because then 195 // we wouldn't find any function for range (end of Y, end of X) 196 // with binary search 197 auto NumBefore = Funcs.size(); 198 auto Curr = Funcs.begin(); 199 auto Prev = Funcs.end(); 200 while (Curr != Funcs.end()) { 201 // Can't check for overlaps or same address ranges if we don't have a 202 // previous entry 203 if (Prev != Funcs.end()) { 204 if (Prev->Range.intersects(Curr->Range)) { 205 // Overlapping address ranges. 206 if (Prev->Range == Curr->Range) { 207 // Same address range. Check if one is from debug info and the other 208 // is from a symbol table. If so, then keep the one with debug info. 209 // Our sorting guarantees that entries with matching address ranges 210 // that have debug info are last in the sort. 211 if (*Prev == *Curr) { 212 // FunctionInfo entries match exactly (range, lines, inlines) 213 OS << "warning: duplicate function info entries for range: " 214 << Curr->Range << '\n'; 215 Curr = Funcs.erase(Prev); 216 } else { 217 if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { 218 // Same address range, one with no debug info (symbol) and the 219 // next with debug info. Keep the latter. 220 Curr = Funcs.erase(Prev); 221 } else { 222 OS << "warning: same address range contains different debug " 223 << "info. Removing:\n" 224 << *Prev << "\nIn favor of this one:\n" 225 << *Curr << "\n"; 226 Curr = Funcs.erase(Prev); 227 } 228 } 229 } else { 230 // print warnings about overlaps 231 OS << "warning: function ranges overlap:\n" 232 << *Prev << "\n" 233 << *Curr << "\n"; 234 } 235 } else if (Prev->Range.size() == 0 && 236 Curr->Range.contains(Prev->Range.Start)) { 237 OS << "warning: removing symbol:\n" 238 << *Prev << "\nKeeping:\n" 239 << *Curr << "\n"; 240 Curr = Funcs.erase(Prev); 241 } 242 } 243 if (Curr == Funcs.end()) 244 break; 245 Prev = Curr++; 246 } 247 248 // If our last function info entry doesn't have a size and if we have valid 249 // text ranges, we should set the size of the last entry since any search for 250 // a high address might match our last entry. By fixing up this size, we can 251 // help ensure we don't cause lookups to always return the last symbol that 252 // has no size when doing lookups. 253 if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { 254 if (auto Range = ValidTextRanges->getRangeThatContains( 255 Funcs.back().Range.Start)) { 256 Funcs.back().Range.End = Range->End; 257 } 258 } 259 OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 260 << Funcs.size() << " total\n"; 261 return Error::success(); 262 } 263 264 uint32_t GsymCreator::insertString(StringRef S, bool Copy) { 265 if (S.empty()) 266 return 0; 267 std::lock_guard<std::recursive_mutex> Guard(Mutex); 268 if (Copy) { 269 // We need to provide backing storage for the string if requested 270 // since StringTableBuilder stores references to strings. Any string 271 // that comes from a section in an object file doesn't need to be 272 // copied, but any string created by code will need to be copied. 273 // This allows GsymCreator to be really fast when parsing DWARF and 274 // other object files as most strings don't need to be copied. 275 CachedHashStringRef CHStr(S); 276 if (!StrTab.contains(CHStr)) 277 S = StringStorage.insert(S).first->getKey(); 278 } 279 return StrTab.add(S); 280 } 281 282 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 283 std::lock_guard<std::recursive_mutex> Guard(Mutex); 284 Ranges.insert(FI.Range); 285 Funcs.emplace_back(FI); 286 } 287 288 void GsymCreator::forEachFunctionInfo( 289 std::function<bool(FunctionInfo &)> const &Callback) { 290 std::lock_guard<std::recursive_mutex> Guard(Mutex); 291 for (auto &FI : Funcs) { 292 if (!Callback(FI)) 293 break; 294 } 295 } 296 297 void GsymCreator::forEachFunctionInfo( 298 std::function<bool(const FunctionInfo &)> const &Callback) const { 299 std::lock_guard<std::recursive_mutex> Guard(Mutex); 300 for (const auto &FI : Funcs) { 301 if (!Callback(FI)) 302 break; 303 } 304 } 305 306 size_t GsymCreator::getNumFunctionInfos() const{ 307 std::lock_guard<std::recursive_mutex> Guard(Mutex); 308 return Funcs.size(); 309 } 310 311 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { 312 if (ValidTextRanges) 313 return ValidTextRanges->contains(Addr); 314 return true; // No valid text ranges has been set, so accept all ranges. 315 } 316 317 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { 318 std::lock_guard<std::recursive_mutex> Guard(Mutex); 319 return Ranges.contains(Addr); 320 } 321