18bcb0991SDimitry Andric //===- GsymCreator.cpp ----------------------------------------------------===//
28bcb0991SDimitry Andric //
38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
68bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
78bcb0991SDimitry Andric
88bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h"
98bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/FileWriter.h"
108bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/Header.h"
118bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/LineTable.h"
12*0fca6ea1SDimitry Andric #include "llvm/DebugInfo/GSYM/OutputAggregator.h"
138bcb0991SDimitry Andric #include "llvm/MC/StringTableBuilder.h"
148bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h"
158bcb0991SDimitry Andric
168bcb0991SDimitry Andric #include <algorithm>
178bcb0991SDimitry Andric #include <cassert>
188bcb0991SDimitry Andric #include <functional>
198bcb0991SDimitry Andric #include <vector>
208bcb0991SDimitry Andric
218bcb0991SDimitry Andric using namespace llvm;
228bcb0991SDimitry Andric using namespace gsym;
238bcb0991SDimitry Andric
GsymCreator(bool Quiet)24fe6060f1SDimitry Andric GsymCreator::GsymCreator(bool Quiet)
25fe6060f1SDimitry Andric : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
268bcb0991SDimitry Andric insertFile(StringRef());
278bcb0991SDimitry Andric }
288bcb0991SDimitry Andric
insertFile(StringRef Path,llvm::sys::path::Style Style)29fe6060f1SDimitry Andric uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
308bcb0991SDimitry Andric llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
318bcb0991SDimitry Andric llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
325ffd83dbSDimitry Andric // We must insert the strings first, then call the FileEntry constructor.
335ffd83dbSDimitry Andric // If we inline the insertString() function call into the constructor, the
345ffd83dbSDimitry Andric // call order is undefined due to parameter lists not having any ordering
355ffd83dbSDimitry Andric // requirements.
365ffd83dbSDimitry Andric const uint32_t Dir = insertString(directory);
375ffd83dbSDimitry Andric const uint32_t Base = insertString(filename);
3806c3fb27SDimitry Andric return insertFileEntry(FileEntry(Dir, Base));
3906c3fb27SDimitry Andric }
408bcb0991SDimitry Andric
insertFileEntry(FileEntry FE)4106c3fb27SDimitry Andric uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
42fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex);
438bcb0991SDimitry Andric const auto NextIndex = Files.size();
448bcb0991SDimitry Andric // Find FE in hash map and insert if not present.
458bcb0991SDimitry Andric auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
468bcb0991SDimitry Andric if (R.second)
478bcb0991SDimitry Andric Files.emplace_back(FE);
488bcb0991SDimitry Andric return R.first->second;
498bcb0991SDimitry Andric }
508bcb0991SDimitry Andric
copyFile(const GsymCreator & SrcGC,uint32_t FileIdx)5106c3fb27SDimitry Andric uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
5206c3fb27SDimitry Andric // File index zero is reserved for a FileEntry with no directory and no
5306c3fb27SDimitry Andric // filename. Any other file and we need to copy the strings for the directory
5406c3fb27SDimitry Andric // and filename.
5506c3fb27SDimitry Andric if (FileIdx == 0)
5606c3fb27SDimitry Andric return 0;
5706c3fb27SDimitry Andric const FileEntry SrcFE = SrcGC.Files[FileIdx];
5806c3fb27SDimitry Andric // Copy the strings for the file and then add the newly converted file entry.
59*0fca6ea1SDimitry Andric uint32_t Dir =
60*0fca6ea1SDimitry Andric SrcFE.Dir == 0
61*0fca6ea1SDimitry Andric ? 0
62*0fca6ea1SDimitry Andric : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
6306c3fb27SDimitry Andric uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
6406c3fb27SDimitry Andric FileEntry DstFE(Dir, Base);
6506c3fb27SDimitry Andric return insertFileEntry(DstFE);
6606c3fb27SDimitry Andric }
6706c3fb27SDimitry Andric
save(StringRef Path,llvm::endianness ByteOrder,std::optional<uint64_t> SegmentSize) const685f757f3fSDimitry Andric llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
6906c3fb27SDimitry Andric std::optional<uint64_t> SegmentSize) const {
7006c3fb27SDimitry Andric if (SegmentSize)
7106c3fb27SDimitry Andric return saveSegments(Path, ByteOrder, *SegmentSize);
728bcb0991SDimitry Andric std::error_code EC;
738bcb0991SDimitry Andric raw_fd_ostream OutStrm(Path, EC);
748bcb0991SDimitry Andric if (EC)
758bcb0991SDimitry Andric return llvm::errorCodeToError(EC);
768bcb0991SDimitry Andric FileWriter O(OutStrm, ByteOrder);
778bcb0991SDimitry Andric return encode(O);
788bcb0991SDimitry Andric }
798bcb0991SDimitry Andric
encode(FileWriter & O) const808bcb0991SDimitry Andric llvm::Error GsymCreator::encode(FileWriter &O) const {
81fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex);
828bcb0991SDimitry Andric if (Funcs.empty())
838bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument,
848bcb0991SDimitry Andric "no functions to encode");
858bcb0991SDimitry Andric if (!Finalized)
868bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument,
878bcb0991SDimitry Andric "GsymCreator wasn't finalized prior to encoding");
888bcb0991SDimitry Andric
898bcb0991SDimitry Andric if (Funcs.size() > UINT32_MAX)
908bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument,
918bcb0991SDimitry Andric "too many FunctionInfos");
925ffd83dbSDimitry Andric
9306c3fb27SDimitry Andric std::optional<uint64_t> BaseAddress = getBaseAddress();
9406c3fb27SDimitry Andric // Base address should be valid if we have any functions.
9506c3fb27SDimitry Andric if (!BaseAddress)
9606c3fb27SDimitry Andric return createStringError(std::errc::invalid_argument,
9706c3fb27SDimitry Andric "invalid base address");
988bcb0991SDimitry Andric Header Hdr;
998bcb0991SDimitry Andric Hdr.Magic = GSYM_MAGIC;
1008bcb0991SDimitry Andric Hdr.Version = GSYM_VERSION;
10106c3fb27SDimitry Andric Hdr.AddrOffSize = getAddressOffsetSize();
1028bcb0991SDimitry Andric Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
10306c3fb27SDimitry Andric Hdr.BaseAddress = *BaseAddress;
1048bcb0991SDimitry Andric Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
1058bcb0991SDimitry Andric Hdr.StrtabOffset = 0; // We will fix this up later.
1065ffd83dbSDimitry Andric Hdr.StrtabSize = 0; // We will fix this up later.
1078bcb0991SDimitry Andric memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
1088bcb0991SDimitry Andric if (UUID.size() > sizeof(Hdr.UUID))
1098bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument,
1108bcb0991SDimitry Andric "invalid UUID size %u", (uint32_t)UUID.size());
1118bcb0991SDimitry Andric // Copy the UUID value if we have one.
1128bcb0991SDimitry Andric if (UUID.size() > 0)
1138bcb0991SDimitry Andric memcpy(Hdr.UUID, UUID.data(), UUID.size());
1148bcb0991SDimitry Andric // Write out the header.
1158bcb0991SDimitry Andric llvm::Error Err = Hdr.encode(O);
1168bcb0991SDimitry Andric if (Err)
1178bcb0991SDimitry Andric return Err;
1188bcb0991SDimitry Andric
11906c3fb27SDimitry Andric const uint64_t MaxAddressOffset = getMaxAddressOffset();
1208bcb0991SDimitry Andric // Write out the address offsets.
1218bcb0991SDimitry Andric O.alignTo(Hdr.AddrOffSize);
1228bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) {
1238bcb0991SDimitry Andric uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
12406c3fb27SDimitry Andric // Make sure we calculated the address offsets byte size correctly by
12506c3fb27SDimitry Andric // verifying the current address offset is within ranges. We have seen bugs
12606c3fb27SDimitry Andric // introduced when the code changes that can cause problems here so it is
12706c3fb27SDimitry Andric // good to catch this during testing.
12806c3fb27SDimitry Andric assert(AddrOffset <= MaxAddressOffset);
12906c3fb27SDimitry Andric (void)MaxAddressOffset;
1308bcb0991SDimitry Andric switch (Hdr.AddrOffSize) {
131fe6060f1SDimitry Andric case 1:
132fe6060f1SDimitry Andric O.writeU8(static_cast<uint8_t>(AddrOffset));
133fe6060f1SDimitry Andric break;
134fe6060f1SDimitry Andric case 2:
135fe6060f1SDimitry Andric O.writeU16(static_cast<uint16_t>(AddrOffset));
136fe6060f1SDimitry Andric break;
137fe6060f1SDimitry Andric case 4:
138fe6060f1SDimitry Andric O.writeU32(static_cast<uint32_t>(AddrOffset));
139fe6060f1SDimitry Andric break;
140fe6060f1SDimitry Andric case 8:
141fe6060f1SDimitry Andric O.writeU64(AddrOffset);
142fe6060f1SDimitry Andric break;
1438bcb0991SDimitry Andric }
1448bcb0991SDimitry Andric }
1458bcb0991SDimitry Andric
1468bcb0991SDimitry Andric // Write out all zeros for the AddrInfoOffsets.
1478bcb0991SDimitry Andric O.alignTo(4);
1488bcb0991SDimitry Andric const off_t AddrInfoOffsetsOffset = O.tell();
1498bcb0991SDimitry Andric for (size_t i = 0, n = Funcs.size(); i < n; ++i)
1508bcb0991SDimitry Andric O.writeU32(0);
1518bcb0991SDimitry Andric
1528bcb0991SDimitry Andric // Write out the file table
1538bcb0991SDimitry Andric O.alignTo(4);
1548bcb0991SDimitry Andric assert(!Files.empty());
1558bcb0991SDimitry Andric assert(Files[0].Dir == 0);
1568bcb0991SDimitry Andric assert(Files[0].Base == 0);
1578bcb0991SDimitry Andric size_t NumFiles = Files.size();
1588bcb0991SDimitry Andric if (NumFiles > UINT32_MAX)
159fe6060f1SDimitry Andric return createStringError(std::errc::invalid_argument, "too many files");
1608bcb0991SDimitry Andric O.writeU32(static_cast<uint32_t>(NumFiles));
1618bcb0991SDimitry Andric for (auto File : Files) {
1628bcb0991SDimitry Andric O.writeU32(File.Dir);
1638bcb0991SDimitry Andric O.writeU32(File.Base);
1648bcb0991SDimitry Andric }
1658bcb0991SDimitry Andric
16606c3fb27SDimitry Andric // Write out the string table.
1678bcb0991SDimitry Andric const off_t StrtabOffset = O.tell();
1688bcb0991SDimitry Andric StrTab.write(O.get_stream());
1698bcb0991SDimitry Andric const off_t StrtabSize = O.tell() - StrtabOffset;
1708bcb0991SDimitry Andric std::vector<uint32_t> AddrInfoOffsets;
1718bcb0991SDimitry Andric
1728bcb0991SDimitry Andric // Write out the address infos for each function info.
1738bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) {
1748bcb0991SDimitry Andric if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
1758bcb0991SDimitry Andric AddrInfoOffsets.push_back(OffsetOrErr.get());
1768bcb0991SDimitry Andric else
1778bcb0991SDimitry Andric return OffsetOrErr.takeError();
1788bcb0991SDimitry Andric }
1798bcb0991SDimitry Andric // Fixup the string table offset and size in the header
1808bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
1818bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
1828bcb0991SDimitry Andric
1838bcb0991SDimitry Andric // Fixup all address info offsets
1848bcb0991SDimitry Andric uint64_t Offset = 0;
1858bcb0991SDimitry Andric for (auto AddrInfoOffset : AddrInfoOffsets) {
1868bcb0991SDimitry Andric O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
1878bcb0991SDimitry Andric Offset += 4;
1888bcb0991SDimitry Andric }
1898bcb0991SDimitry Andric return ErrorSuccess();
1908bcb0991SDimitry Andric }
1918bcb0991SDimitry Andric
finalize(OutputAggregator & Out)192*0fca6ea1SDimitry Andric llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
193fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex);
1948bcb0991SDimitry Andric if (Finalized)
195fe6060f1SDimitry Andric return createStringError(std::errc::invalid_argument, "already finalized");
1968bcb0991SDimitry Andric Finalized = true;
1978bcb0991SDimitry Andric
1988bcb0991SDimitry Andric // Don't let the string table indexes change by finalizing in order.
1998bcb0991SDimitry Andric StrTab.finalizeInOrder();
2008bcb0991SDimitry Andric
2018bcb0991SDimitry Andric // Remove duplicates function infos that have both entries from debug info
2028bcb0991SDimitry Andric // (DWARF or Breakpad) and entries from the SymbolTable.
2038bcb0991SDimitry Andric //
2048bcb0991SDimitry Andric // Also handle overlapping function. Usually there shouldn't be any, but they
2058bcb0991SDimitry Andric // can and do happen in some rare cases.
2068bcb0991SDimitry Andric //
2078bcb0991SDimitry Andric // (a) (b) (c)
2088bcb0991SDimitry Andric // ^ ^ ^ ^
2098bcb0991SDimitry Andric // |X |Y |X ^ |X
2108bcb0991SDimitry Andric // | | | |Y | ^
2118bcb0991SDimitry Andric // | | | v v |Y
2128bcb0991SDimitry Andric // v v v v
2138bcb0991SDimitry Andric //
2148bcb0991SDimitry Andric // In (a) and (b), Y is ignored and X will be reported for the full range.
2158bcb0991SDimitry Andric // In (c), both functions will be included in the result and lookups for an
2168bcb0991SDimitry Andric // address in the intersection will return Y because of binary search.
2178bcb0991SDimitry Andric //
2188bcb0991SDimitry Andric // Note that in case of (b), we cannot include Y in the result because then
2198bcb0991SDimitry Andric // we wouldn't find any function for range (end of Y, end of X)
2208bcb0991SDimitry Andric // with binary search
2215f757f3fSDimitry Andric
2225f757f3fSDimitry Andric const auto NumBefore = Funcs.size();
2235f757f3fSDimitry Andric // Only sort and unique if this isn't a segment. If this is a segment we
2245f757f3fSDimitry Andric // already finalized the main GsymCreator with all of the function infos
2255f757f3fSDimitry Andric // and then the already sorted and uniqued function infos were added to this
2265f757f3fSDimitry Andric // object.
2275f757f3fSDimitry Andric if (!IsSegment) {
2285f757f3fSDimitry Andric if (NumBefore > 1) {
2295f757f3fSDimitry Andric // Sort function infos so we can emit sorted functions.
2305f757f3fSDimitry Andric llvm::sort(Funcs);
2315f757f3fSDimitry Andric std::vector<FunctionInfo> FinalizedFuncs;
2325f757f3fSDimitry Andric FinalizedFuncs.reserve(Funcs.size());
2335f757f3fSDimitry Andric FinalizedFuncs.emplace_back(std::move(Funcs.front()));
2345f757f3fSDimitry Andric for (size_t Idx=1; Idx < NumBefore; ++Idx) {
2355f757f3fSDimitry Andric FunctionInfo &Prev = FinalizedFuncs.back();
2365f757f3fSDimitry Andric FunctionInfo &Curr = Funcs[Idx];
237fe6060f1SDimitry Andric // Empty ranges won't intersect, but we still need to
238fe6060f1SDimitry Andric // catch the case where we have multiple symbols at the
239fe6060f1SDimitry Andric // same address and coalesce them.
240fe6060f1SDimitry Andric const bool ranges_equal = Prev.Range == Curr.Range;
241fe6060f1SDimitry Andric if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
242fe6060f1SDimitry Andric // Overlapping ranges or empty identical ranges.
243fe6060f1SDimitry Andric if (ranges_equal) {
244fe6060f1SDimitry Andric // Same address range. Check if one is from debug
245fe6060f1SDimitry Andric // info and the other is from a symbol table. If
246fe6060f1SDimitry Andric // so, then keep the one with debug info. Our
247fe6060f1SDimitry Andric // sorting guarantees that entries with matching
248fe6060f1SDimitry Andric // address ranges that have debug info are last in
249fe6060f1SDimitry Andric // the sort.
2505f757f3fSDimitry Andric if (!(Prev == Curr)) {
251*0fca6ea1SDimitry Andric if (Prev.hasRichInfo() && Curr.hasRichInfo())
252*0fca6ea1SDimitry Andric Out.Report(
253*0fca6ea1SDimitry Andric "Duplicate address ranges with different debug info.",
254*0fca6ea1SDimitry Andric [&](raw_ostream &OS) {
255fe6060f1SDimitry Andric OS << "warning: same address range contains "
256fe6060f1SDimitry Andric "different debug "
2578bcb0991SDimitry Andric << "info. Removing:\n"
258fe6060f1SDimitry Andric << Prev << "\nIn favor of this one:\n"
259fe6060f1SDimitry Andric << Curr << "\n";
260*0fca6ea1SDimitry Andric });
261*0fca6ea1SDimitry Andric
2625f757f3fSDimitry Andric // We want to swap the current entry with the previous since
2635f757f3fSDimitry Andric // later entries with the same range always have more debug info
2645f757f3fSDimitry Andric // or different debug info.
2655f757f3fSDimitry Andric std::swap(Prev, Curr);
2668bcb0991SDimitry Andric }
2678bcb0991SDimitry Andric } else {
268*0fca6ea1SDimitry Andric Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
269*0fca6ea1SDimitry Andric // print warnings about overlaps
2708bcb0991SDimitry Andric OS << "warning: function ranges overlap:\n"
271fe6060f1SDimitry Andric << Prev << "\n"
272fe6060f1SDimitry Andric << Curr << "\n";
273*0fca6ea1SDimitry Andric });
2745f757f3fSDimitry Andric FinalizedFuncs.emplace_back(std::move(Curr));
275fe6060f1SDimitry Andric }
2765f757f3fSDimitry Andric } else {
2775f757f3fSDimitry Andric if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
2785f757f3fSDimitry Andric // Symbols on macOS don't have address ranges, so if the range
2795f757f3fSDimitry Andric // doesn't match and the size is zero, then we replace the empty
2805f757f3fSDimitry Andric // symbol function info with the current one.
2815f757f3fSDimitry Andric std::swap(Prev, Curr);
2825f757f3fSDimitry Andric } else {
2835f757f3fSDimitry Andric FinalizedFuncs.emplace_back(std::move(Curr));
2848bcb0991SDimitry Andric }
2858bcb0991SDimitry Andric }
2865f757f3fSDimitry Andric }
2875f757f3fSDimitry Andric std::swap(Funcs, FinalizedFuncs);
2885f757f3fSDimitry Andric }
2895ffd83dbSDimitry Andric // If our last function info entry doesn't have a size and if we have valid
2905ffd83dbSDimitry Andric // text ranges, we should set the size of the last entry since any search for
2915ffd83dbSDimitry Andric // a high address might match our last entry. By fixing up this size, we can
2925ffd83dbSDimitry Andric // help ensure we don't cause lookups to always return the last symbol that
2935ffd83dbSDimitry Andric // has no size when doing lookups.
2945ffd83dbSDimitry Andric if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
295fe6060f1SDimitry Andric if (auto Range =
29681ad6265SDimitry Andric ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
29781ad6265SDimitry Andric Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
2985ffd83dbSDimitry Andric }
2995ffd83dbSDimitry Andric }
300*0fca6ea1SDimitry Andric Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
3018bcb0991SDimitry Andric << Funcs.size() << " total\n";
3025f757f3fSDimitry Andric }
3038bcb0991SDimitry Andric return Error::success();
3048bcb0991SDimitry Andric }
3058bcb0991SDimitry Andric
copyString(const GsymCreator & SrcGC,uint32_t StrOff)30606c3fb27SDimitry Andric uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
30706c3fb27SDimitry Andric // String offset at zero is always the empty string, no copying needed.
30806c3fb27SDimitry Andric if (StrOff == 0)
30906c3fb27SDimitry Andric return 0;
31006c3fb27SDimitry Andric return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
31106c3fb27SDimitry Andric }
31206c3fb27SDimitry Andric
insertString(StringRef S,bool Copy)3135ffd83dbSDimitry Andric uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
3148bcb0991SDimitry Andric if (S.empty())
3158bcb0991SDimitry Andric return 0;
316fe6060f1SDimitry Andric
317fe6060f1SDimitry Andric // The hash can be calculated outside the lock.
318fe6060f1SDimitry Andric CachedHashStringRef CHStr(S);
319fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex);
3205ffd83dbSDimitry Andric if (Copy) {
3215ffd83dbSDimitry Andric // We need to provide backing storage for the string if requested
3225ffd83dbSDimitry Andric // since StringTableBuilder stores references to strings. Any string
3235ffd83dbSDimitry Andric // that comes from a section in an object file doesn't need to be
3245ffd83dbSDimitry Andric // copied, but any string created by code will need to be copied.
3255ffd83dbSDimitry Andric // This allows GsymCreator to be really fast when parsing DWARF and
3265ffd83dbSDimitry Andric // other object files as most strings don't need to be copied.
3275ffd83dbSDimitry Andric if (!StrTab.contains(CHStr))
328fe6060f1SDimitry Andric CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
329fe6060f1SDimitry Andric CHStr.hash()};
3305ffd83dbSDimitry Andric }
33106c3fb27SDimitry Andric const uint32_t StrOff = StrTab.add(CHStr);
33206c3fb27SDimitry Andric // Save a mapping of string offsets to the cached string reference in case
33306c3fb27SDimitry Andric // we need to segment the GSYM file and copy string from one string table to
33406c3fb27SDimitry Andric // another.
33506c3fb27SDimitry Andric if (StringOffsetMap.count(StrOff) == 0)
33606c3fb27SDimitry Andric StringOffsetMap.insert(std::make_pair(StrOff, CHStr));
33706c3fb27SDimitry Andric return StrOff;
3388bcb0991SDimitry Andric }
3398bcb0991SDimitry Andric
addFunctionInfo(FunctionInfo && FI)3408bcb0991SDimitry Andric void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
341fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex);
342fe6060f1SDimitry Andric Funcs.emplace_back(std::move(FI));
3438bcb0991SDimitry Andric }
3448bcb0991SDimitry Andric
forEachFunctionInfo(std::function<bool (FunctionInfo &)> const & Callback)3458bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
3468bcb0991SDimitry Andric std::function<bool(FunctionInfo &)> const &Callback) {
347fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex);
3488bcb0991SDimitry Andric for (auto &FI : Funcs) {
3498bcb0991SDimitry Andric if (!Callback(FI))
3508bcb0991SDimitry Andric break;
3518bcb0991SDimitry Andric }
3528bcb0991SDimitry Andric }
3538bcb0991SDimitry Andric
forEachFunctionInfo(std::function<bool (const FunctionInfo &)> const & Callback) const3548bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
3558bcb0991SDimitry Andric std::function<bool(const FunctionInfo &)> const &Callback) const {
356fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex);
3578bcb0991SDimitry Andric for (const auto &FI : Funcs) {
3588bcb0991SDimitry Andric if (!Callback(FI))
3598bcb0991SDimitry Andric break;
3608bcb0991SDimitry Andric }
3618bcb0991SDimitry Andric }
3625ffd83dbSDimitry Andric
getNumFunctionInfos() const3635ffd83dbSDimitry Andric size_t GsymCreator::getNumFunctionInfos() const {
364fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex);
3655ffd83dbSDimitry Andric return Funcs.size();
3665ffd83dbSDimitry Andric }
3675ffd83dbSDimitry Andric
IsValidTextAddress(uint64_t Addr) const3685ffd83dbSDimitry Andric bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
3695ffd83dbSDimitry Andric if (ValidTextRanges)
3705ffd83dbSDimitry Andric return ValidTextRanges->contains(Addr);
3715ffd83dbSDimitry Andric return true; // No valid text ranges has been set, so accept all ranges.
3725ffd83dbSDimitry Andric }
3735ffd83dbSDimitry Andric
getFirstFunctionAddress() const37406c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
3755f757f3fSDimitry Andric // If we have finalized then Funcs are sorted. If we are a segment then
3765f757f3fSDimitry Andric // Funcs will be sorted as well since function infos get added from an
3775f757f3fSDimitry Andric // already finalized GsymCreator object where its functions were sorted and
3785f757f3fSDimitry Andric // uniqued.
3795f757f3fSDimitry Andric if ((Finalized || IsSegment) && !Funcs.empty())
38006c3fb27SDimitry Andric return std::optional<uint64_t>(Funcs.front().startAddress());
38106c3fb27SDimitry Andric return std::nullopt;
38206c3fb27SDimitry Andric }
38306c3fb27SDimitry Andric
getLastFunctionAddress() const38406c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
3855f757f3fSDimitry Andric // If we have finalized then Funcs are sorted. If we are a segment then
3865f757f3fSDimitry Andric // Funcs will be sorted as well since function infos get added from an
3875f757f3fSDimitry Andric // already finalized GsymCreator object where its functions were sorted and
3885f757f3fSDimitry Andric // uniqued.
3895f757f3fSDimitry Andric if ((Finalized || IsSegment) && !Funcs.empty())
39006c3fb27SDimitry Andric return std::optional<uint64_t>(Funcs.back().startAddress());
39106c3fb27SDimitry Andric return std::nullopt;
39206c3fb27SDimitry Andric }
39306c3fb27SDimitry Andric
getBaseAddress() const39406c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getBaseAddress() const {
39506c3fb27SDimitry Andric if (BaseAddress)
39606c3fb27SDimitry Andric return BaseAddress;
39706c3fb27SDimitry Andric return getFirstFunctionAddress();
39806c3fb27SDimitry Andric }
39906c3fb27SDimitry Andric
getMaxAddressOffset() const40006c3fb27SDimitry Andric uint64_t GsymCreator::getMaxAddressOffset() const {
40106c3fb27SDimitry Andric switch (getAddressOffsetSize()) {
40206c3fb27SDimitry Andric case 1: return UINT8_MAX;
40306c3fb27SDimitry Andric case 2: return UINT16_MAX;
40406c3fb27SDimitry Andric case 4: return UINT32_MAX;
40506c3fb27SDimitry Andric case 8: return UINT64_MAX;
40606c3fb27SDimitry Andric }
40706c3fb27SDimitry Andric llvm_unreachable("invalid address offset");
40806c3fb27SDimitry Andric }
40906c3fb27SDimitry Andric
getAddressOffsetSize() const41006c3fb27SDimitry Andric uint8_t GsymCreator::getAddressOffsetSize() const {
41106c3fb27SDimitry Andric const std::optional<uint64_t> BaseAddress = getBaseAddress();
41206c3fb27SDimitry Andric const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
41306c3fb27SDimitry Andric if (BaseAddress && LastFuncAddr) {
41406c3fb27SDimitry Andric const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
41506c3fb27SDimitry Andric if (AddrDelta <= UINT8_MAX)
41606c3fb27SDimitry Andric return 1;
41706c3fb27SDimitry Andric else if (AddrDelta <= UINT16_MAX)
41806c3fb27SDimitry Andric return 2;
41906c3fb27SDimitry Andric else if (AddrDelta <= UINT32_MAX)
42006c3fb27SDimitry Andric return 4;
42106c3fb27SDimitry Andric return 8;
42206c3fb27SDimitry Andric }
42306c3fb27SDimitry Andric return 1;
42406c3fb27SDimitry Andric }
42506c3fb27SDimitry Andric
calculateHeaderAndTableSize() const42606c3fb27SDimitry Andric uint64_t GsymCreator::calculateHeaderAndTableSize() const {
42706c3fb27SDimitry Andric uint64_t Size = sizeof(Header);
42806c3fb27SDimitry Andric const size_t NumFuncs = Funcs.size();
42906c3fb27SDimitry Andric // Add size of address offset table
43006c3fb27SDimitry Andric Size += NumFuncs * getAddressOffsetSize();
43106c3fb27SDimitry Andric // Add size of address info offsets which are 32 bit integers in version 1.
43206c3fb27SDimitry Andric Size += NumFuncs * sizeof(uint32_t);
43306c3fb27SDimitry Andric // Add file table size
43406c3fb27SDimitry Andric Size += Files.size() * sizeof(FileEntry);
43506c3fb27SDimitry Andric // Add string table size
43606c3fb27SDimitry Andric Size += StrTab.getSize();
43706c3fb27SDimitry Andric
43806c3fb27SDimitry Andric return Size;
43906c3fb27SDimitry Andric }
44006c3fb27SDimitry Andric
44106c3fb27SDimitry Andric // This function takes a InlineInfo class that was copy constructed from an
44206c3fb27SDimitry Andric // InlineInfo from the \a SrcGC and updates all members that point to strings
44306c3fb27SDimitry Andric // and files to point to strings and files from this GsymCreator.
fixupInlineInfo(const GsymCreator & SrcGC,InlineInfo & II)44406c3fb27SDimitry Andric void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
44506c3fb27SDimitry Andric II.Name = copyString(SrcGC, II.Name);
44606c3fb27SDimitry Andric II.CallFile = copyFile(SrcGC, II.CallFile);
44706c3fb27SDimitry Andric for (auto &ChildII: II.Children)
44806c3fb27SDimitry Andric fixupInlineInfo(SrcGC, ChildII);
44906c3fb27SDimitry Andric }
45006c3fb27SDimitry Andric
copyFunctionInfo(const GsymCreator & SrcGC,size_t FuncIdx)45106c3fb27SDimitry Andric uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
45206c3fb27SDimitry Andric // To copy a function info we need to copy any files and strings over into
45306c3fb27SDimitry Andric // this GsymCreator and then copy the function info and update the string
45406c3fb27SDimitry Andric // table offsets to match the new offsets.
45506c3fb27SDimitry Andric const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
45606c3fb27SDimitry Andric
45706c3fb27SDimitry Andric FunctionInfo DstFI;
45806c3fb27SDimitry Andric DstFI.Range = SrcFI.Range;
45906c3fb27SDimitry Andric DstFI.Name = copyString(SrcGC, SrcFI.Name);
46006c3fb27SDimitry Andric // Copy the line table if there is one.
46106c3fb27SDimitry Andric if (SrcFI.OptLineTable) {
46206c3fb27SDimitry Andric // Copy the entire line table.
46306c3fb27SDimitry Andric DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
46406c3fb27SDimitry Andric // Fixup all LineEntry::File entries which are indexes in the the file table
46506c3fb27SDimitry Andric // from SrcGC and must be converted to file indexes from this GsymCreator.
46606c3fb27SDimitry Andric LineTable &DstLT = DstFI.OptLineTable.value();
46706c3fb27SDimitry Andric const size_t NumLines = DstLT.size();
46806c3fb27SDimitry Andric for (size_t I=0; I<NumLines; ++I) {
46906c3fb27SDimitry Andric LineEntry &LE = DstLT.get(I);
47006c3fb27SDimitry Andric LE.File = copyFile(SrcGC, LE.File);
47106c3fb27SDimitry Andric }
47206c3fb27SDimitry Andric }
47306c3fb27SDimitry Andric // Copy the inline information if needed.
47406c3fb27SDimitry Andric if (SrcFI.Inline) {
47506c3fb27SDimitry Andric // Make a copy of the source inline information.
47606c3fb27SDimitry Andric DstFI.Inline = SrcFI.Inline.value();
47706c3fb27SDimitry Andric // Fixup all strings and files in the copied inline information.
47806c3fb27SDimitry Andric fixupInlineInfo(SrcGC, *DstFI.Inline);
47906c3fb27SDimitry Andric }
48006c3fb27SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex);
4815f757f3fSDimitry Andric Funcs.emplace_back(DstFI);
48206c3fb27SDimitry Andric return Funcs.back().cacheEncoding();
48306c3fb27SDimitry Andric }
48406c3fb27SDimitry Andric
saveSegments(StringRef Path,llvm::endianness ByteOrder,uint64_t SegmentSize) const48506c3fb27SDimitry Andric llvm::Error GsymCreator::saveSegments(StringRef Path,
4865f757f3fSDimitry Andric llvm::endianness ByteOrder,
48706c3fb27SDimitry Andric uint64_t SegmentSize) const {
48806c3fb27SDimitry Andric if (SegmentSize == 0)
48906c3fb27SDimitry Andric return createStringError(std::errc::invalid_argument,
49006c3fb27SDimitry Andric "invalid segment size zero");
49106c3fb27SDimitry Andric
49206c3fb27SDimitry Andric size_t FuncIdx = 0;
49306c3fb27SDimitry Andric const size_t NumFuncs = Funcs.size();
49406c3fb27SDimitry Andric while (FuncIdx < NumFuncs) {
49506c3fb27SDimitry Andric llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
49606c3fb27SDimitry Andric createSegment(SegmentSize, FuncIdx);
49706c3fb27SDimitry Andric if (ExpectedGC) {
49806c3fb27SDimitry Andric GsymCreator *GC = ExpectedGC->get();
49906c3fb27SDimitry Andric if (GC == NULL)
50006c3fb27SDimitry Andric break; // We had not more functions to encode.
501*0fca6ea1SDimitry Andric // Don't collect any messages at all
502*0fca6ea1SDimitry Andric OutputAggregator Out(nullptr);
503*0fca6ea1SDimitry Andric llvm::Error Err = GC->finalize(Out);
50406c3fb27SDimitry Andric if (Err)
50506c3fb27SDimitry Andric return Err;
50606c3fb27SDimitry Andric std::string SegmentedGsymPath;
50706c3fb27SDimitry Andric raw_string_ostream SGP(SegmentedGsymPath);
50806c3fb27SDimitry Andric std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
50906c3fb27SDimitry Andric if (FirstFuncAddr) {
51006c3fb27SDimitry Andric SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
51106c3fb27SDimitry Andric SGP.flush();
51206c3fb27SDimitry Andric Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
51306c3fb27SDimitry Andric if (Err)
51406c3fb27SDimitry Andric return Err;
51506c3fb27SDimitry Andric }
51606c3fb27SDimitry Andric } else {
51706c3fb27SDimitry Andric return ExpectedGC.takeError();
51806c3fb27SDimitry Andric }
51906c3fb27SDimitry Andric }
52006c3fb27SDimitry Andric return Error::success();
52106c3fb27SDimitry Andric }
52206c3fb27SDimitry Andric
52306c3fb27SDimitry Andric llvm::Expected<std::unique_ptr<GsymCreator>>
createSegment(uint64_t SegmentSize,size_t & FuncIdx) const52406c3fb27SDimitry Andric GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
52506c3fb27SDimitry Andric // No function entries, return empty unique pointer
52606c3fb27SDimitry Andric if (FuncIdx >= Funcs.size())
52706c3fb27SDimitry Andric return std::unique_ptr<GsymCreator>();
52806c3fb27SDimitry Andric
52906c3fb27SDimitry Andric std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
5305f757f3fSDimitry Andric
5315f757f3fSDimitry Andric // Tell the creator that this is a segment.
5325f757f3fSDimitry Andric GC->setIsSegment();
5335f757f3fSDimitry Andric
53406c3fb27SDimitry Andric // Set the base address if there is one.
53506c3fb27SDimitry Andric if (BaseAddress)
53606c3fb27SDimitry Andric GC->setBaseAddress(*BaseAddress);
53706c3fb27SDimitry Andric // Copy the UUID value from this object into the new creator.
53806c3fb27SDimitry Andric GC->setUUID(UUID);
53906c3fb27SDimitry Andric const size_t NumFuncs = Funcs.size();
54006c3fb27SDimitry Andric // Track how big the function infos are for the current segment so we can
54106c3fb27SDimitry Andric // emit segments that are close to the requested size. It is quick math to
54206c3fb27SDimitry Andric // determine the current header and tables sizes, so we can do that each loop.
54306c3fb27SDimitry Andric uint64_t SegmentFuncInfosSize = 0;
54406c3fb27SDimitry Andric for (; FuncIdx < NumFuncs; ++FuncIdx) {
54506c3fb27SDimitry Andric const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
54606c3fb27SDimitry Andric if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
54706c3fb27SDimitry Andric if (SegmentFuncInfosSize == 0)
54806c3fb27SDimitry Andric return createStringError(std::errc::invalid_argument,
54906c3fb27SDimitry Andric "a segment size of %" PRIu64 " is to small to "
55006c3fb27SDimitry Andric "fit any function infos, specify a larger value",
55106c3fb27SDimitry Andric SegmentSize);
55206c3fb27SDimitry Andric
55306c3fb27SDimitry Andric break;
55406c3fb27SDimitry Andric }
55506c3fb27SDimitry Andric SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
55606c3fb27SDimitry Andric }
55706c3fb27SDimitry Andric return std::move(GC);
55806c3fb27SDimitry Andric }
559