xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
18bcb0991SDimitry Andric //===- GsymCreator.cpp ----------------------------------------------------===//
28bcb0991SDimitry Andric //
38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
68bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
78bcb0991SDimitry Andric 
88bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h"
98bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/FileWriter.h"
108bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/Header.h"
118bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/LineTable.h"
12*0fca6ea1SDimitry Andric #include "llvm/DebugInfo/GSYM/OutputAggregator.h"
138bcb0991SDimitry Andric #include "llvm/MC/StringTableBuilder.h"
148bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h"
158bcb0991SDimitry Andric 
168bcb0991SDimitry Andric #include <algorithm>
178bcb0991SDimitry Andric #include <cassert>
188bcb0991SDimitry Andric #include <functional>
198bcb0991SDimitry Andric #include <vector>
208bcb0991SDimitry Andric 
218bcb0991SDimitry Andric using namespace llvm;
228bcb0991SDimitry Andric using namespace gsym;
238bcb0991SDimitry Andric 
GsymCreator(bool Quiet)24fe6060f1SDimitry Andric GsymCreator::GsymCreator(bool Quiet)
25fe6060f1SDimitry Andric     : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
268bcb0991SDimitry Andric   insertFile(StringRef());
278bcb0991SDimitry Andric }
288bcb0991SDimitry Andric 
insertFile(StringRef Path,llvm::sys::path::Style Style)29fe6060f1SDimitry Andric uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
308bcb0991SDimitry Andric   llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
318bcb0991SDimitry Andric   llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
325ffd83dbSDimitry Andric   // We must insert the strings first, then call the FileEntry constructor.
335ffd83dbSDimitry Andric   // If we inline the insertString() function call into the constructor, the
345ffd83dbSDimitry Andric   // call order is undefined due to parameter lists not having any ordering
355ffd83dbSDimitry Andric   // requirements.
365ffd83dbSDimitry Andric   const uint32_t Dir = insertString(directory);
375ffd83dbSDimitry Andric   const uint32_t Base = insertString(filename);
3806c3fb27SDimitry Andric   return insertFileEntry(FileEntry(Dir, Base));
3906c3fb27SDimitry Andric }
408bcb0991SDimitry Andric 
insertFileEntry(FileEntry FE)4106c3fb27SDimitry Andric uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
42fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
438bcb0991SDimitry Andric   const auto NextIndex = Files.size();
448bcb0991SDimitry Andric   // Find FE in hash map and insert if not present.
458bcb0991SDimitry Andric   auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
468bcb0991SDimitry Andric   if (R.second)
478bcb0991SDimitry Andric     Files.emplace_back(FE);
488bcb0991SDimitry Andric   return R.first->second;
498bcb0991SDimitry Andric }
508bcb0991SDimitry Andric 
copyFile(const GsymCreator & SrcGC,uint32_t FileIdx)5106c3fb27SDimitry Andric uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
5206c3fb27SDimitry Andric   // File index zero is reserved for a FileEntry with no directory and no
5306c3fb27SDimitry Andric   // filename. Any other file and we need to copy the strings for the directory
5406c3fb27SDimitry Andric   // and filename.
5506c3fb27SDimitry Andric   if (FileIdx == 0)
5606c3fb27SDimitry Andric     return 0;
5706c3fb27SDimitry Andric   const FileEntry SrcFE = SrcGC.Files[FileIdx];
5806c3fb27SDimitry Andric   // Copy the strings for the file and then add the newly converted file entry.
59*0fca6ea1SDimitry Andric   uint32_t Dir =
60*0fca6ea1SDimitry Andric       SrcFE.Dir == 0
61*0fca6ea1SDimitry Andric           ? 0
62*0fca6ea1SDimitry Andric           : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
6306c3fb27SDimitry Andric   uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
6406c3fb27SDimitry Andric   FileEntry DstFE(Dir, Base);
6506c3fb27SDimitry Andric   return insertFileEntry(DstFE);
6606c3fb27SDimitry Andric }
6706c3fb27SDimitry Andric 
save(StringRef Path,llvm::endianness ByteOrder,std::optional<uint64_t> SegmentSize) const685f757f3fSDimitry Andric llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
6906c3fb27SDimitry Andric                               std::optional<uint64_t> SegmentSize) const {
7006c3fb27SDimitry Andric   if (SegmentSize)
7106c3fb27SDimitry Andric     return saveSegments(Path, ByteOrder, *SegmentSize);
728bcb0991SDimitry Andric   std::error_code EC;
738bcb0991SDimitry Andric   raw_fd_ostream OutStrm(Path, EC);
748bcb0991SDimitry Andric   if (EC)
758bcb0991SDimitry Andric     return llvm::errorCodeToError(EC);
768bcb0991SDimitry Andric   FileWriter O(OutStrm, ByteOrder);
778bcb0991SDimitry Andric   return encode(O);
788bcb0991SDimitry Andric }
798bcb0991SDimitry Andric 
encode(FileWriter & O) const808bcb0991SDimitry Andric llvm::Error GsymCreator::encode(FileWriter &O) const {
81fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
828bcb0991SDimitry Andric   if (Funcs.empty())
838bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
848bcb0991SDimitry Andric                              "no functions to encode");
858bcb0991SDimitry Andric   if (!Finalized)
868bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
878bcb0991SDimitry Andric                              "GsymCreator wasn't finalized prior to encoding");
888bcb0991SDimitry Andric 
898bcb0991SDimitry Andric   if (Funcs.size() > UINT32_MAX)
908bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
918bcb0991SDimitry Andric                              "too many FunctionInfos");
925ffd83dbSDimitry Andric 
9306c3fb27SDimitry Andric   std::optional<uint64_t> BaseAddress = getBaseAddress();
9406c3fb27SDimitry Andric   // Base address should be valid if we have any functions.
9506c3fb27SDimitry Andric   if (!BaseAddress)
9606c3fb27SDimitry Andric     return createStringError(std::errc::invalid_argument,
9706c3fb27SDimitry Andric                              "invalid base address");
988bcb0991SDimitry Andric   Header Hdr;
998bcb0991SDimitry Andric   Hdr.Magic = GSYM_MAGIC;
1008bcb0991SDimitry Andric   Hdr.Version = GSYM_VERSION;
10106c3fb27SDimitry Andric   Hdr.AddrOffSize = getAddressOffsetSize();
1028bcb0991SDimitry Andric   Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
10306c3fb27SDimitry Andric   Hdr.BaseAddress = *BaseAddress;
1048bcb0991SDimitry Andric   Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
1058bcb0991SDimitry Andric   Hdr.StrtabOffset = 0; // We will fix this up later.
1065ffd83dbSDimitry Andric   Hdr.StrtabSize = 0;   // We will fix this up later.
1078bcb0991SDimitry Andric   memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
1088bcb0991SDimitry Andric   if (UUID.size() > sizeof(Hdr.UUID))
1098bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
1108bcb0991SDimitry Andric                              "invalid UUID size %u", (uint32_t)UUID.size());
1118bcb0991SDimitry Andric   // Copy the UUID value if we have one.
1128bcb0991SDimitry Andric   if (UUID.size() > 0)
1138bcb0991SDimitry Andric     memcpy(Hdr.UUID, UUID.data(), UUID.size());
1148bcb0991SDimitry Andric   // Write out the header.
1158bcb0991SDimitry Andric   llvm::Error Err = Hdr.encode(O);
1168bcb0991SDimitry Andric   if (Err)
1178bcb0991SDimitry Andric     return Err;
1188bcb0991SDimitry Andric 
11906c3fb27SDimitry Andric   const uint64_t MaxAddressOffset = getMaxAddressOffset();
1208bcb0991SDimitry Andric   // Write out the address offsets.
1218bcb0991SDimitry Andric   O.alignTo(Hdr.AddrOffSize);
1228bcb0991SDimitry Andric   for (const auto &FuncInfo : Funcs) {
1238bcb0991SDimitry Andric     uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
12406c3fb27SDimitry Andric     // Make sure we calculated the address offsets byte size correctly by
12506c3fb27SDimitry Andric     // verifying the current address offset is within ranges. We have seen bugs
12606c3fb27SDimitry Andric     // introduced when the code changes that can cause problems here so it is
12706c3fb27SDimitry Andric     // good to catch this during testing.
12806c3fb27SDimitry Andric     assert(AddrOffset <= MaxAddressOffset);
12906c3fb27SDimitry Andric     (void)MaxAddressOffset;
1308bcb0991SDimitry Andric     switch (Hdr.AddrOffSize) {
131fe6060f1SDimitry Andric     case 1:
132fe6060f1SDimitry Andric       O.writeU8(static_cast<uint8_t>(AddrOffset));
133fe6060f1SDimitry Andric       break;
134fe6060f1SDimitry Andric     case 2:
135fe6060f1SDimitry Andric       O.writeU16(static_cast<uint16_t>(AddrOffset));
136fe6060f1SDimitry Andric       break;
137fe6060f1SDimitry Andric     case 4:
138fe6060f1SDimitry Andric       O.writeU32(static_cast<uint32_t>(AddrOffset));
139fe6060f1SDimitry Andric       break;
140fe6060f1SDimitry Andric     case 8:
141fe6060f1SDimitry Andric       O.writeU64(AddrOffset);
142fe6060f1SDimitry Andric       break;
1438bcb0991SDimitry Andric     }
1448bcb0991SDimitry Andric   }
1458bcb0991SDimitry Andric 
1468bcb0991SDimitry Andric   // Write out all zeros for the AddrInfoOffsets.
1478bcb0991SDimitry Andric   O.alignTo(4);
1488bcb0991SDimitry Andric   const off_t AddrInfoOffsetsOffset = O.tell();
1498bcb0991SDimitry Andric   for (size_t i = 0, n = Funcs.size(); i < n; ++i)
1508bcb0991SDimitry Andric     O.writeU32(0);
1518bcb0991SDimitry Andric 
1528bcb0991SDimitry Andric   // Write out the file table
1538bcb0991SDimitry Andric   O.alignTo(4);
1548bcb0991SDimitry Andric   assert(!Files.empty());
1558bcb0991SDimitry Andric   assert(Files[0].Dir == 0);
1568bcb0991SDimitry Andric   assert(Files[0].Base == 0);
1578bcb0991SDimitry Andric   size_t NumFiles = Files.size();
1588bcb0991SDimitry Andric   if (NumFiles > UINT32_MAX)
159fe6060f1SDimitry Andric     return createStringError(std::errc::invalid_argument, "too many files");
1608bcb0991SDimitry Andric   O.writeU32(static_cast<uint32_t>(NumFiles));
1618bcb0991SDimitry Andric   for (auto File : Files) {
1628bcb0991SDimitry Andric     O.writeU32(File.Dir);
1638bcb0991SDimitry Andric     O.writeU32(File.Base);
1648bcb0991SDimitry Andric   }
1658bcb0991SDimitry Andric 
16606c3fb27SDimitry Andric   // Write out the string table.
1678bcb0991SDimitry Andric   const off_t StrtabOffset = O.tell();
1688bcb0991SDimitry Andric   StrTab.write(O.get_stream());
1698bcb0991SDimitry Andric   const off_t StrtabSize = O.tell() - StrtabOffset;
1708bcb0991SDimitry Andric   std::vector<uint32_t> AddrInfoOffsets;
1718bcb0991SDimitry Andric 
1728bcb0991SDimitry Andric   // Write out the address infos for each function info.
1738bcb0991SDimitry Andric   for (const auto &FuncInfo : Funcs) {
1748bcb0991SDimitry Andric     if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
1758bcb0991SDimitry Andric       AddrInfoOffsets.push_back(OffsetOrErr.get());
1768bcb0991SDimitry Andric     else
1778bcb0991SDimitry Andric       return OffsetOrErr.takeError();
1788bcb0991SDimitry Andric   }
1798bcb0991SDimitry Andric   // Fixup the string table offset and size in the header
1808bcb0991SDimitry Andric   O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
1818bcb0991SDimitry Andric   O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
1828bcb0991SDimitry Andric 
1838bcb0991SDimitry Andric   // Fixup all address info offsets
1848bcb0991SDimitry Andric   uint64_t Offset = 0;
1858bcb0991SDimitry Andric   for (auto AddrInfoOffset : AddrInfoOffsets) {
1868bcb0991SDimitry Andric     O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
1878bcb0991SDimitry Andric     Offset += 4;
1888bcb0991SDimitry Andric   }
1898bcb0991SDimitry Andric   return ErrorSuccess();
1908bcb0991SDimitry Andric }
1918bcb0991SDimitry Andric 
finalize(OutputAggregator & Out)192*0fca6ea1SDimitry Andric llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
193fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
1948bcb0991SDimitry Andric   if (Finalized)
195fe6060f1SDimitry Andric     return createStringError(std::errc::invalid_argument, "already finalized");
1968bcb0991SDimitry Andric   Finalized = true;
1978bcb0991SDimitry Andric 
1988bcb0991SDimitry Andric   // Don't let the string table indexes change by finalizing in order.
1998bcb0991SDimitry Andric   StrTab.finalizeInOrder();
2008bcb0991SDimitry Andric 
2018bcb0991SDimitry Andric   // Remove duplicates function infos that have both entries from debug info
2028bcb0991SDimitry Andric   // (DWARF or Breakpad) and entries from the SymbolTable.
2038bcb0991SDimitry Andric   //
2048bcb0991SDimitry Andric   // Also handle overlapping function. Usually there shouldn't be any, but they
2058bcb0991SDimitry Andric   // can and do happen in some rare cases.
2068bcb0991SDimitry Andric   //
2078bcb0991SDimitry Andric   // (a)          (b)         (c)
2088bcb0991SDimitry Andric   //     ^  ^       ^            ^
2098bcb0991SDimitry Andric   //     |X |Y      |X ^         |X
2108bcb0991SDimitry Andric   //     |  |       |  |Y        |  ^
2118bcb0991SDimitry Andric   //     |  |       |  v         v  |Y
2128bcb0991SDimitry Andric   //     v  v       v               v
2138bcb0991SDimitry Andric   //
2148bcb0991SDimitry Andric   // In (a) and (b), Y is ignored and X will be reported for the full range.
2158bcb0991SDimitry Andric   // In (c), both functions will be included in the result and lookups for an
2168bcb0991SDimitry Andric   // address in the intersection will return Y because of binary search.
2178bcb0991SDimitry Andric   //
2188bcb0991SDimitry Andric   // Note that in case of (b), we cannot include Y in the result because then
2198bcb0991SDimitry Andric   // we wouldn't find any function for range (end of Y, end of X)
2208bcb0991SDimitry Andric   // with binary search
2215f757f3fSDimitry Andric 
2225f757f3fSDimitry Andric   const auto NumBefore = Funcs.size();
2235f757f3fSDimitry Andric   // Only sort and unique if this isn't a segment. If this is a segment we
2245f757f3fSDimitry Andric   // already finalized the main GsymCreator with all of the function infos
2255f757f3fSDimitry Andric   // and then the already sorted and uniqued function infos were added to this
2265f757f3fSDimitry Andric   // object.
2275f757f3fSDimitry Andric   if (!IsSegment) {
2285f757f3fSDimitry Andric     if (NumBefore > 1) {
2295f757f3fSDimitry Andric       // Sort function infos so we can emit sorted functions.
2305f757f3fSDimitry Andric       llvm::sort(Funcs);
2315f757f3fSDimitry Andric       std::vector<FunctionInfo> FinalizedFuncs;
2325f757f3fSDimitry Andric       FinalizedFuncs.reserve(Funcs.size());
2335f757f3fSDimitry Andric       FinalizedFuncs.emplace_back(std::move(Funcs.front()));
2345f757f3fSDimitry Andric       for (size_t Idx=1; Idx < NumBefore; ++Idx) {
2355f757f3fSDimitry Andric         FunctionInfo &Prev = FinalizedFuncs.back();
2365f757f3fSDimitry Andric         FunctionInfo &Curr = Funcs[Idx];
237fe6060f1SDimitry Andric         // Empty ranges won't intersect, but we still need to
238fe6060f1SDimitry Andric         // catch the case where we have multiple symbols at the
239fe6060f1SDimitry Andric         // same address and coalesce them.
240fe6060f1SDimitry Andric         const bool ranges_equal = Prev.Range == Curr.Range;
241fe6060f1SDimitry Andric         if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
242fe6060f1SDimitry Andric           // Overlapping ranges or empty identical ranges.
243fe6060f1SDimitry Andric           if (ranges_equal) {
244fe6060f1SDimitry Andric             // Same address range. Check if one is from debug
245fe6060f1SDimitry Andric             // info and the other is from a symbol table. If
246fe6060f1SDimitry Andric             // so, then keep the one with debug info. Our
247fe6060f1SDimitry Andric             // sorting guarantees that entries with matching
248fe6060f1SDimitry Andric             // address ranges that have debug info are last in
249fe6060f1SDimitry Andric             // the sort.
2505f757f3fSDimitry Andric             if (!(Prev == Curr)) {
251*0fca6ea1SDimitry Andric               if (Prev.hasRichInfo() && Curr.hasRichInfo())
252*0fca6ea1SDimitry Andric                 Out.Report(
253*0fca6ea1SDimitry Andric                     "Duplicate address ranges with different debug info.",
254*0fca6ea1SDimitry Andric                     [&](raw_ostream &OS) {
255fe6060f1SDimitry Andric                       OS << "warning: same address range contains "
256fe6060f1SDimitry Andric                             "different debug "
2578bcb0991SDimitry Andric                          << "info. Removing:\n"
258fe6060f1SDimitry Andric                          << Prev << "\nIn favor of this one:\n"
259fe6060f1SDimitry Andric                          << Curr << "\n";
260*0fca6ea1SDimitry Andric                     });
261*0fca6ea1SDimitry Andric 
2625f757f3fSDimitry Andric               // We want to swap the current entry with the previous since
2635f757f3fSDimitry Andric               // later entries with the same range always have more debug info
2645f757f3fSDimitry Andric               // or different debug info.
2655f757f3fSDimitry Andric               std::swap(Prev, Curr);
2668bcb0991SDimitry Andric             }
2678bcb0991SDimitry Andric           } else {
268*0fca6ea1SDimitry Andric             Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
269*0fca6ea1SDimitry Andric               // print warnings about overlaps
2708bcb0991SDimitry Andric               OS << "warning: function ranges overlap:\n"
271fe6060f1SDimitry Andric                 << Prev << "\n"
272fe6060f1SDimitry Andric                 << Curr << "\n";
273*0fca6ea1SDimitry Andric             });
2745f757f3fSDimitry Andric             FinalizedFuncs.emplace_back(std::move(Curr));
275fe6060f1SDimitry Andric           }
2765f757f3fSDimitry Andric         } else {
2775f757f3fSDimitry Andric           if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
2785f757f3fSDimitry Andric             // Symbols on macOS don't have address ranges, so if the range
2795f757f3fSDimitry Andric             // doesn't match and the size is zero, then we replace the empty
2805f757f3fSDimitry Andric             // symbol function info with the current one.
2815f757f3fSDimitry Andric             std::swap(Prev, Curr);
2825f757f3fSDimitry Andric           } else {
2835f757f3fSDimitry Andric             FinalizedFuncs.emplace_back(std::move(Curr));
2848bcb0991SDimitry Andric           }
2858bcb0991SDimitry Andric         }
2865f757f3fSDimitry Andric       }
2875f757f3fSDimitry Andric       std::swap(Funcs, FinalizedFuncs);
2885f757f3fSDimitry Andric     }
2895ffd83dbSDimitry Andric     // If our last function info entry doesn't have a size and if we have valid
2905ffd83dbSDimitry Andric     // text ranges, we should set the size of the last entry since any search for
2915ffd83dbSDimitry Andric     // a high address might match our last entry. By fixing up this size, we can
2925ffd83dbSDimitry Andric     // help ensure we don't cause lookups to always return the last symbol that
2935ffd83dbSDimitry Andric     // has no size when doing lookups.
2945ffd83dbSDimitry Andric     if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
295fe6060f1SDimitry Andric       if (auto Range =
29681ad6265SDimitry Andric               ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
29781ad6265SDimitry Andric         Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
2985ffd83dbSDimitry Andric       }
2995ffd83dbSDimitry Andric     }
300*0fca6ea1SDimitry Andric     Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
3018bcb0991SDimitry Andric         << Funcs.size() << " total\n";
3025f757f3fSDimitry Andric   }
3038bcb0991SDimitry Andric   return Error::success();
3048bcb0991SDimitry Andric }
3058bcb0991SDimitry Andric 
copyString(const GsymCreator & SrcGC,uint32_t StrOff)30606c3fb27SDimitry Andric uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
30706c3fb27SDimitry Andric   // String offset at zero is always the empty string, no copying needed.
30806c3fb27SDimitry Andric   if (StrOff == 0)
30906c3fb27SDimitry Andric     return 0;
31006c3fb27SDimitry Andric   return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
31106c3fb27SDimitry Andric }
31206c3fb27SDimitry Andric 
insertString(StringRef S,bool Copy)3135ffd83dbSDimitry Andric uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
3148bcb0991SDimitry Andric   if (S.empty())
3158bcb0991SDimitry Andric     return 0;
316fe6060f1SDimitry Andric 
317fe6060f1SDimitry Andric   // The hash can be calculated outside the lock.
318fe6060f1SDimitry Andric   CachedHashStringRef CHStr(S);
319fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3205ffd83dbSDimitry Andric   if (Copy) {
3215ffd83dbSDimitry Andric     // We need to provide backing storage for the string if requested
3225ffd83dbSDimitry Andric     // since StringTableBuilder stores references to strings. Any string
3235ffd83dbSDimitry Andric     // that comes from a section in an object file doesn't need to be
3245ffd83dbSDimitry Andric     // copied, but any string created by code will need to be copied.
3255ffd83dbSDimitry Andric     // This allows GsymCreator to be really fast when parsing DWARF and
3265ffd83dbSDimitry Andric     // other object files as most strings don't need to be copied.
3275ffd83dbSDimitry Andric     if (!StrTab.contains(CHStr))
328fe6060f1SDimitry Andric       CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
329fe6060f1SDimitry Andric                                   CHStr.hash()};
3305ffd83dbSDimitry Andric   }
33106c3fb27SDimitry Andric   const uint32_t StrOff = StrTab.add(CHStr);
33206c3fb27SDimitry Andric   // Save a mapping of string offsets to the cached string reference in case
33306c3fb27SDimitry Andric   // we need to segment the GSYM file and copy string from one string table to
33406c3fb27SDimitry Andric   // another.
33506c3fb27SDimitry Andric   if (StringOffsetMap.count(StrOff) == 0)
33606c3fb27SDimitry Andric     StringOffsetMap.insert(std::make_pair(StrOff, CHStr));
33706c3fb27SDimitry Andric   return StrOff;
3388bcb0991SDimitry Andric }
3398bcb0991SDimitry Andric 
addFunctionInfo(FunctionInfo && FI)3408bcb0991SDimitry Andric void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
341fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
342fe6060f1SDimitry Andric   Funcs.emplace_back(std::move(FI));
3438bcb0991SDimitry Andric }
3448bcb0991SDimitry Andric 
forEachFunctionInfo(std::function<bool (FunctionInfo &)> const & Callback)3458bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
3468bcb0991SDimitry Andric     std::function<bool(FunctionInfo &)> const &Callback) {
347fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3488bcb0991SDimitry Andric   for (auto &FI : Funcs) {
3498bcb0991SDimitry Andric     if (!Callback(FI))
3508bcb0991SDimitry Andric       break;
3518bcb0991SDimitry Andric   }
3528bcb0991SDimitry Andric }
3538bcb0991SDimitry Andric 
forEachFunctionInfo(std::function<bool (const FunctionInfo &)> const & Callback) const3548bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
3558bcb0991SDimitry Andric     std::function<bool(const FunctionInfo &)> const &Callback) const {
356fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3578bcb0991SDimitry Andric   for (const auto &FI : Funcs) {
3588bcb0991SDimitry Andric     if (!Callback(FI))
3598bcb0991SDimitry Andric       break;
3608bcb0991SDimitry Andric   }
3618bcb0991SDimitry Andric }
3625ffd83dbSDimitry Andric 
getNumFunctionInfos() const3635ffd83dbSDimitry Andric size_t GsymCreator::getNumFunctionInfos() const {
364fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3655ffd83dbSDimitry Andric   return Funcs.size();
3665ffd83dbSDimitry Andric }
3675ffd83dbSDimitry Andric 
IsValidTextAddress(uint64_t Addr) const3685ffd83dbSDimitry Andric bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
3695ffd83dbSDimitry Andric   if (ValidTextRanges)
3705ffd83dbSDimitry Andric     return ValidTextRanges->contains(Addr);
3715ffd83dbSDimitry Andric   return true; // No valid text ranges has been set, so accept all ranges.
3725ffd83dbSDimitry Andric }
3735ffd83dbSDimitry Andric 
getFirstFunctionAddress() const37406c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
3755f757f3fSDimitry Andric   // If we have finalized then Funcs are sorted. If we are a segment then
3765f757f3fSDimitry Andric   // Funcs will be sorted as well since function infos get added from an
3775f757f3fSDimitry Andric   // already finalized GsymCreator object where its functions were sorted and
3785f757f3fSDimitry Andric   // uniqued.
3795f757f3fSDimitry Andric   if ((Finalized || IsSegment) && !Funcs.empty())
38006c3fb27SDimitry Andric     return std::optional<uint64_t>(Funcs.front().startAddress());
38106c3fb27SDimitry Andric   return std::nullopt;
38206c3fb27SDimitry Andric }
38306c3fb27SDimitry Andric 
getLastFunctionAddress() const38406c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
3855f757f3fSDimitry Andric   // If we have finalized then Funcs are sorted. If we are a segment then
3865f757f3fSDimitry Andric   // Funcs will be sorted as well since function infos get added from an
3875f757f3fSDimitry Andric   // already finalized GsymCreator object where its functions were sorted and
3885f757f3fSDimitry Andric   // uniqued.
3895f757f3fSDimitry Andric   if ((Finalized || IsSegment) && !Funcs.empty())
39006c3fb27SDimitry Andric     return std::optional<uint64_t>(Funcs.back().startAddress());
39106c3fb27SDimitry Andric   return std::nullopt;
39206c3fb27SDimitry Andric }
39306c3fb27SDimitry Andric 
getBaseAddress() const39406c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getBaseAddress() const {
39506c3fb27SDimitry Andric   if (BaseAddress)
39606c3fb27SDimitry Andric     return BaseAddress;
39706c3fb27SDimitry Andric   return getFirstFunctionAddress();
39806c3fb27SDimitry Andric }
39906c3fb27SDimitry Andric 
getMaxAddressOffset() const40006c3fb27SDimitry Andric uint64_t GsymCreator::getMaxAddressOffset() const {
40106c3fb27SDimitry Andric   switch (getAddressOffsetSize()) {
40206c3fb27SDimitry Andric     case 1: return UINT8_MAX;
40306c3fb27SDimitry Andric     case 2: return UINT16_MAX;
40406c3fb27SDimitry Andric     case 4: return UINT32_MAX;
40506c3fb27SDimitry Andric     case 8: return UINT64_MAX;
40606c3fb27SDimitry Andric   }
40706c3fb27SDimitry Andric   llvm_unreachable("invalid address offset");
40806c3fb27SDimitry Andric }
40906c3fb27SDimitry Andric 
getAddressOffsetSize() const41006c3fb27SDimitry Andric uint8_t GsymCreator::getAddressOffsetSize() const {
41106c3fb27SDimitry Andric   const std::optional<uint64_t> BaseAddress = getBaseAddress();
41206c3fb27SDimitry Andric   const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
41306c3fb27SDimitry Andric   if (BaseAddress && LastFuncAddr) {
41406c3fb27SDimitry Andric     const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
41506c3fb27SDimitry Andric     if (AddrDelta <= UINT8_MAX)
41606c3fb27SDimitry Andric       return 1;
41706c3fb27SDimitry Andric     else if (AddrDelta <= UINT16_MAX)
41806c3fb27SDimitry Andric       return 2;
41906c3fb27SDimitry Andric     else if (AddrDelta <= UINT32_MAX)
42006c3fb27SDimitry Andric       return 4;
42106c3fb27SDimitry Andric     return 8;
42206c3fb27SDimitry Andric   }
42306c3fb27SDimitry Andric   return 1;
42406c3fb27SDimitry Andric }
42506c3fb27SDimitry Andric 
calculateHeaderAndTableSize() const42606c3fb27SDimitry Andric uint64_t GsymCreator::calculateHeaderAndTableSize() const {
42706c3fb27SDimitry Andric   uint64_t Size = sizeof(Header);
42806c3fb27SDimitry Andric   const size_t NumFuncs = Funcs.size();
42906c3fb27SDimitry Andric   // Add size of address offset table
43006c3fb27SDimitry Andric   Size += NumFuncs * getAddressOffsetSize();
43106c3fb27SDimitry Andric   // Add size of address info offsets which are 32 bit integers in version 1.
43206c3fb27SDimitry Andric   Size += NumFuncs * sizeof(uint32_t);
43306c3fb27SDimitry Andric   // Add file table size
43406c3fb27SDimitry Andric   Size += Files.size() * sizeof(FileEntry);
43506c3fb27SDimitry Andric   // Add string table size
43606c3fb27SDimitry Andric   Size += StrTab.getSize();
43706c3fb27SDimitry Andric 
43806c3fb27SDimitry Andric   return Size;
43906c3fb27SDimitry Andric }
44006c3fb27SDimitry Andric 
44106c3fb27SDimitry Andric // This function takes a InlineInfo class that was copy constructed from an
44206c3fb27SDimitry Andric // InlineInfo from the \a SrcGC and updates all members that point to strings
44306c3fb27SDimitry Andric // and files to point to strings and files from this GsymCreator.
fixupInlineInfo(const GsymCreator & SrcGC,InlineInfo & II)44406c3fb27SDimitry Andric void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
44506c3fb27SDimitry Andric   II.Name = copyString(SrcGC, II.Name);
44606c3fb27SDimitry Andric   II.CallFile = copyFile(SrcGC, II.CallFile);
44706c3fb27SDimitry Andric   for (auto &ChildII: II.Children)
44806c3fb27SDimitry Andric     fixupInlineInfo(SrcGC, ChildII);
44906c3fb27SDimitry Andric }
45006c3fb27SDimitry Andric 
copyFunctionInfo(const GsymCreator & SrcGC,size_t FuncIdx)45106c3fb27SDimitry Andric uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
45206c3fb27SDimitry Andric   // To copy a function info we need to copy any files and strings over into
45306c3fb27SDimitry Andric   // this GsymCreator and then copy the function info and update the string
45406c3fb27SDimitry Andric   // table offsets to match the new offsets.
45506c3fb27SDimitry Andric   const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
45606c3fb27SDimitry Andric 
45706c3fb27SDimitry Andric   FunctionInfo DstFI;
45806c3fb27SDimitry Andric   DstFI.Range = SrcFI.Range;
45906c3fb27SDimitry Andric   DstFI.Name = copyString(SrcGC, SrcFI.Name);
46006c3fb27SDimitry Andric   // Copy the line table if there is one.
46106c3fb27SDimitry Andric   if (SrcFI.OptLineTable) {
46206c3fb27SDimitry Andric     // Copy the entire line table.
46306c3fb27SDimitry Andric     DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
46406c3fb27SDimitry Andric     // Fixup all LineEntry::File entries which are indexes in the the file table
46506c3fb27SDimitry Andric     // from SrcGC and must be converted to file indexes from this GsymCreator.
46606c3fb27SDimitry Andric     LineTable &DstLT = DstFI.OptLineTable.value();
46706c3fb27SDimitry Andric     const size_t NumLines = DstLT.size();
46806c3fb27SDimitry Andric     for (size_t I=0; I<NumLines; ++I) {
46906c3fb27SDimitry Andric       LineEntry &LE = DstLT.get(I);
47006c3fb27SDimitry Andric       LE.File = copyFile(SrcGC, LE.File);
47106c3fb27SDimitry Andric     }
47206c3fb27SDimitry Andric   }
47306c3fb27SDimitry Andric   // Copy the inline information if needed.
47406c3fb27SDimitry Andric   if (SrcFI.Inline) {
47506c3fb27SDimitry Andric     // Make a copy of the source inline information.
47606c3fb27SDimitry Andric     DstFI.Inline = SrcFI.Inline.value();
47706c3fb27SDimitry Andric     // Fixup all strings and files in the copied inline information.
47806c3fb27SDimitry Andric     fixupInlineInfo(SrcGC, *DstFI.Inline);
47906c3fb27SDimitry Andric   }
48006c3fb27SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
4815f757f3fSDimitry Andric   Funcs.emplace_back(DstFI);
48206c3fb27SDimitry Andric   return Funcs.back().cacheEncoding();
48306c3fb27SDimitry Andric }
48406c3fb27SDimitry Andric 
saveSegments(StringRef Path,llvm::endianness ByteOrder,uint64_t SegmentSize) const48506c3fb27SDimitry Andric llvm::Error GsymCreator::saveSegments(StringRef Path,
4865f757f3fSDimitry Andric                                       llvm::endianness ByteOrder,
48706c3fb27SDimitry Andric                                       uint64_t SegmentSize) const {
48806c3fb27SDimitry Andric   if (SegmentSize == 0)
48906c3fb27SDimitry Andric     return createStringError(std::errc::invalid_argument,
49006c3fb27SDimitry Andric                              "invalid segment size zero");
49106c3fb27SDimitry Andric 
49206c3fb27SDimitry Andric   size_t FuncIdx = 0;
49306c3fb27SDimitry Andric   const size_t NumFuncs = Funcs.size();
49406c3fb27SDimitry Andric   while (FuncIdx < NumFuncs) {
49506c3fb27SDimitry Andric     llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
49606c3fb27SDimitry Andric         createSegment(SegmentSize, FuncIdx);
49706c3fb27SDimitry Andric     if (ExpectedGC) {
49806c3fb27SDimitry Andric       GsymCreator *GC = ExpectedGC->get();
49906c3fb27SDimitry Andric       if (GC == NULL)
50006c3fb27SDimitry Andric         break; // We had not more functions to encode.
501*0fca6ea1SDimitry Andric       // Don't collect any messages at all
502*0fca6ea1SDimitry Andric       OutputAggregator Out(nullptr);
503*0fca6ea1SDimitry Andric       llvm::Error Err = GC->finalize(Out);
50406c3fb27SDimitry Andric       if (Err)
50506c3fb27SDimitry Andric         return Err;
50606c3fb27SDimitry Andric       std::string SegmentedGsymPath;
50706c3fb27SDimitry Andric       raw_string_ostream SGP(SegmentedGsymPath);
50806c3fb27SDimitry Andric       std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
50906c3fb27SDimitry Andric       if (FirstFuncAddr) {
51006c3fb27SDimitry Andric         SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
51106c3fb27SDimitry Andric         SGP.flush();
51206c3fb27SDimitry Andric         Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
51306c3fb27SDimitry Andric         if (Err)
51406c3fb27SDimitry Andric           return Err;
51506c3fb27SDimitry Andric       }
51606c3fb27SDimitry Andric     } else {
51706c3fb27SDimitry Andric       return ExpectedGC.takeError();
51806c3fb27SDimitry Andric     }
51906c3fb27SDimitry Andric   }
52006c3fb27SDimitry Andric   return Error::success();
52106c3fb27SDimitry Andric }
52206c3fb27SDimitry Andric 
52306c3fb27SDimitry Andric llvm::Expected<std::unique_ptr<GsymCreator>>
createSegment(uint64_t SegmentSize,size_t & FuncIdx) const52406c3fb27SDimitry Andric GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
52506c3fb27SDimitry Andric   // No function entries, return empty unique pointer
52606c3fb27SDimitry Andric   if (FuncIdx >= Funcs.size())
52706c3fb27SDimitry Andric     return std::unique_ptr<GsymCreator>();
52806c3fb27SDimitry Andric 
52906c3fb27SDimitry Andric   std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
5305f757f3fSDimitry Andric 
5315f757f3fSDimitry Andric   // Tell the creator that this is a segment.
5325f757f3fSDimitry Andric   GC->setIsSegment();
5335f757f3fSDimitry Andric 
53406c3fb27SDimitry Andric   // Set the base address if there is one.
53506c3fb27SDimitry Andric   if (BaseAddress)
53606c3fb27SDimitry Andric     GC->setBaseAddress(*BaseAddress);
53706c3fb27SDimitry Andric   // Copy the UUID value from this object into the new creator.
53806c3fb27SDimitry Andric   GC->setUUID(UUID);
53906c3fb27SDimitry Andric   const size_t NumFuncs = Funcs.size();
54006c3fb27SDimitry Andric   // Track how big the function infos are for the current segment so we can
54106c3fb27SDimitry Andric   // emit segments that are close to the requested size. It is quick math to
54206c3fb27SDimitry Andric   // determine the current header and tables sizes, so we can do that each loop.
54306c3fb27SDimitry Andric   uint64_t SegmentFuncInfosSize = 0;
54406c3fb27SDimitry Andric   for (; FuncIdx < NumFuncs; ++FuncIdx) {
54506c3fb27SDimitry Andric     const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
54606c3fb27SDimitry Andric     if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
54706c3fb27SDimitry Andric       if (SegmentFuncInfosSize == 0)
54806c3fb27SDimitry Andric         return createStringError(std::errc::invalid_argument,
54906c3fb27SDimitry Andric                                  "a segment size of %" PRIu64 " is to small to "
55006c3fb27SDimitry Andric                                  "fit any function infos, specify a larger value",
55106c3fb27SDimitry Andric                                  SegmentSize);
55206c3fb27SDimitry Andric 
55306c3fb27SDimitry Andric       break;
55406c3fb27SDimitry Andric     }
55506c3fb27SDimitry Andric     SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
55606c3fb27SDimitry Andric   }
55706c3fb27SDimitry Andric   return std::move(GC);
55806c3fb27SDimitry Andric }
559