xref: /freebsd/contrib/llvm-project/lld/MachO/MapFile.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1fe6060f1SDimitry Andric //===- MapFile.cpp --------------------------------------------------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9bdd1243dSDimitry Andric // This file implements the -map option, which maps address ranges to their
10bdd1243dSDimitry Andric // respective contents, plus the input file these contents were originally from.
11bdd1243dSDimitry Andric // The contents (typically symbols) are listed in address order. Dead-stripped
12bdd1243dSDimitry Andric // contents are included as well.
13fe6060f1SDimitry Andric //
14fe6060f1SDimitry Andric // # Path: test
15fe6060f1SDimitry Andric // # Arch: x86_84
16fe6060f1SDimitry Andric // # Object files:
17fe6060f1SDimitry Andric // [  0] linker synthesized
18fe6060f1SDimitry Andric // [  1] a.o
19fe6060f1SDimitry Andric // # Sections:
20fe6060f1SDimitry Andric // # Address    Size       Segment  Section
21fe6060f1SDimitry Andric // 0x1000005C0  0x0000004C __TEXT   __text
22fe6060f1SDimitry Andric // # Symbols:
23bdd1243dSDimitry Andric // # Address    Size       File  Name
24bdd1243dSDimitry Andric // 0x1000005C0  0x00000001 [  1] _main
25bdd1243dSDimitry Andric // # Dead Stripped Symbols:
26bdd1243dSDimitry Andric // #            Size       File  Name
27bdd1243dSDimitry Andric // <<dead>>     0x00000001 [  1] _foo
28fe6060f1SDimitry Andric //
29fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
30fe6060f1SDimitry Andric 
31fe6060f1SDimitry Andric #include "MapFile.h"
32bdd1243dSDimitry Andric #include "ConcatOutputSection.h"
33fe6060f1SDimitry Andric #include "Config.h"
34fe6060f1SDimitry Andric #include "InputFiles.h"
35fe6060f1SDimitry Andric #include "InputSection.h"
36fe6060f1SDimitry Andric #include "OutputSegment.h"
37fe6060f1SDimitry Andric #include "Symbols.h"
3881ad6265SDimitry Andric #include "SyntheticSections.h"
39fe6060f1SDimitry Andric #include "Target.h"
40bdd1243dSDimitry Andric #include "lld/Common/ErrorHandler.h"
41bdd1243dSDimitry Andric #include "llvm/ADT/DenseMap.h"
42fe6060f1SDimitry Andric #include "llvm/Support/Parallel.h"
43fe6060f1SDimitry Andric #include "llvm/Support/TimeProfiler.h"
44fe6060f1SDimitry Andric 
45fe6060f1SDimitry Andric using namespace llvm;
46fe6060f1SDimitry Andric using namespace llvm::sys;
47fe6060f1SDimitry Andric using namespace lld;
48fe6060f1SDimitry Andric using namespace lld::macho;
49fe6060f1SDimitry Andric 
50bdd1243dSDimitry Andric struct CStringInfo {
51bdd1243dSDimitry Andric   uint32_t fileIndex;
52bdd1243dSDimitry Andric   StringRef str;
53bdd1243dSDimitry Andric };
54bdd1243dSDimitry Andric 
55bdd1243dSDimitry Andric struct MapInfo {
56bdd1243dSDimitry Andric   SmallVector<InputFile *> files;
57bdd1243dSDimitry Andric   SmallVector<Defined *> deadSymbols;
58bdd1243dSDimitry Andric   DenseMap<const OutputSection *,
59bdd1243dSDimitry Andric            SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>>
60bdd1243dSDimitry Andric       liveCStringsForSection;
61bdd1243dSDimitry Andric   SmallVector<CStringInfo> deadCStrings;
62bdd1243dSDimitry Andric };
63bdd1243dSDimitry Andric 
gatherMapInfo()64bdd1243dSDimitry Andric static MapInfo gatherMapInfo() {
65bdd1243dSDimitry Andric   MapInfo info;
66bdd1243dSDimitry Andric   for (InputFile *file : inputFiles) {
67bdd1243dSDimitry Andric     bool isReferencedFile = false;
68bdd1243dSDimitry Andric 
69bdd1243dSDimitry Andric     if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
70bdd1243dSDimitry Andric       uint32_t fileIndex = info.files.size() + 1;
71bdd1243dSDimitry Andric 
72bdd1243dSDimitry Andric       // Gather the dead symbols. We don't have to bother with the live ones
73bdd1243dSDimitry Andric       // because we will pick them up as we iterate over the OutputSections
74bdd1243dSDimitry Andric       // later.
75bdd1243dSDimitry Andric       for (Symbol *sym : file->symbols) {
76fe6060f1SDimitry Andric         if (auto *d = dyn_cast_or_null<Defined>(sym))
77bdd1243dSDimitry Andric           // Only emit the prevailing definition of a symbol. Also, don't emit
78bdd1243dSDimitry Andric           // the symbol if it is part of a cstring section (we use the literal
79bdd1243dSDimitry Andric           // value instead, similar to ld64)
80*0fca6ea1SDimitry Andric           if (d->isec() && d->getFile() == file &&
81*0fca6ea1SDimitry Andric               !isa<CStringInputSection>(d->isec())) {
82bdd1243dSDimitry Andric             isReferencedFile = true;
83bdd1243dSDimitry Andric             if (!d->isLive())
84bdd1243dSDimitry Andric               info.deadSymbols.push_back(d);
85bdd1243dSDimitry Andric           }
86bdd1243dSDimitry Andric       }
87bdd1243dSDimitry Andric 
88bdd1243dSDimitry Andric       // Gather all the cstrings (both live and dead). A CString(Output)Section
89bdd1243dSDimitry Andric       // doesn't provide us a way of figuring out which InputSections its
90bdd1243dSDimitry Andric       // cstring contents came from, so we need to build up that mapping here.
91bdd1243dSDimitry Andric       for (const Section *sec : file->sections) {
92bdd1243dSDimitry Andric         for (const Subsection &subsec : sec->subsections) {
93bdd1243dSDimitry Andric           if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
94bdd1243dSDimitry Andric             auto &liveCStrings = info.liveCStringsForSection[isec->parent];
95bdd1243dSDimitry Andric             for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
96bdd1243dSDimitry Andric               if (piece.live)
97bdd1243dSDimitry Andric                 liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
98bdd1243dSDimitry Andric                                         {fileIndex, isec->getStringRef(i)}});
99bdd1243dSDimitry Andric               else
100bdd1243dSDimitry Andric                 info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)});
101bdd1243dSDimitry Andric               isReferencedFile = true;
102bdd1243dSDimitry Andric             }
1031fd87a68SDimitry Andric           } else {
10481ad6265SDimitry Andric             break;
10581ad6265SDimitry Andric           }
10681ad6265SDimitry Andric         }
107bdd1243dSDimitry Andric       }
108bdd1243dSDimitry Andric     } else if (const auto *dylibFile = dyn_cast<DylibFile>(file)) {
109bdd1243dSDimitry Andric       isReferencedFile = dylibFile->isReferenced();
110bdd1243dSDimitry Andric     }
111fe6060f1SDimitry Andric 
112bdd1243dSDimitry Andric     if (isReferencedFile)
113bdd1243dSDimitry Andric       info.files.push_back(file);
114bdd1243dSDimitry Andric   }
115bdd1243dSDimitry Andric 
116bdd1243dSDimitry Andric   // cstrings are not stored in sorted order in their OutputSections, so we sort
117bdd1243dSDimitry Andric   // them here.
118bdd1243dSDimitry Andric   for (auto &liveCStrings : info.liveCStringsForSection)
119bdd1243dSDimitry Andric     parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) {
120bdd1243dSDimitry Andric       return p1.first < p2.first;
121bdd1243dSDimitry Andric     });
122bdd1243dSDimitry Andric   return info;
123bdd1243dSDimitry Andric }
124bdd1243dSDimitry Andric 
12506c3fb27SDimitry Andric // We use this instead of `toString(const InputFile *)` as we don't want to
12606c3fb27SDimitry Andric // include the dylib install name in our output.
printFileName(raw_fd_ostream & os,const InputFile * f)12706c3fb27SDimitry Andric static void printFileName(raw_fd_ostream &os, const InputFile *f) {
12806c3fb27SDimitry Andric   if (f->archiveName.empty())
12906c3fb27SDimitry Andric     os << f->getName();
13006c3fb27SDimitry Andric   else
13106c3fb27SDimitry Andric     os << f->archiveName << "(" << path::filename(f->getName()) + ")";
13206c3fb27SDimitry Andric }
13306c3fb27SDimitry Andric 
134bdd1243dSDimitry Andric // For printing the contents of the __stubs and __la_symbol_ptr sections.
printStubsEntries(raw_fd_ostream & os,const DenseMap<lld::macho::InputFile *,uint32_t> & readerToFileOrdinal,const OutputSection * osec,size_t entrySize)13506c3fb27SDimitry Andric static void printStubsEntries(
136bdd1243dSDimitry Andric     raw_fd_ostream &os,
137bdd1243dSDimitry Andric     const DenseMap<lld::macho::InputFile *, uint32_t> &readerToFileOrdinal,
138bdd1243dSDimitry Andric     const OutputSection *osec, size_t entrySize) {
139bdd1243dSDimitry Andric   for (const Symbol *sym : in.stubs->getEntries())
140bdd1243dSDimitry Andric     os << format("0x%08llX\t0x%08zX\t[%3u] %s\n",
141bdd1243dSDimitry Andric                  osec->addr + sym->stubsIndex * entrySize, entrySize,
142bdd1243dSDimitry Andric                  readerToFileOrdinal.lookup(sym->getFile()),
143bdd1243dSDimitry Andric                  sym->getName().str().data());
144bdd1243dSDimitry Andric }
145bdd1243dSDimitry Andric 
printNonLazyPointerSection(raw_fd_ostream & os,NonLazyPointerSectionBase * osec)14606c3fb27SDimitry Andric static void printNonLazyPointerSection(raw_fd_ostream &os,
147bdd1243dSDimitry Andric                                        NonLazyPointerSectionBase *osec) {
148bdd1243dSDimitry Andric   // ld64 considers stubs to belong to particular files, but considers GOT
149bdd1243dSDimitry Andric   // entries to be linker-synthesized. Not sure why they made that decision, but
150bdd1243dSDimitry Andric   // I think we can follow suit unless there's demand for better symbol-to-file
151bdd1243dSDimitry Andric   // associations.
152bdd1243dSDimitry Andric   for (const Symbol *sym : osec->getEntries())
153bdd1243dSDimitry Andric     os << format("0x%08llX\t0x%08zX\t[  0] non-lazy-pointer-to-local: %s\n",
154bdd1243dSDimitry Andric                  osec->addr + sym->gotIndex * target->wordSize,
155bdd1243dSDimitry Andric                  target->wordSize, sym->getName().str().data());
156fe6060f1SDimitry Andric }
157fe6060f1SDimitry Andric 
getSymSizeForMap(Defined * sym)158*0fca6ea1SDimitry Andric static uint64_t getSymSizeForMap(Defined *sym) {
159*0fca6ea1SDimitry Andric   if (sym->wasIdenticalCodeFolded)
160*0fca6ea1SDimitry Andric     return 0;
161*0fca6ea1SDimitry Andric   return sym->size;
162*0fca6ea1SDimitry Andric }
163*0fca6ea1SDimitry Andric 
writeMapFile()164fe6060f1SDimitry Andric void macho::writeMapFile() {
165fe6060f1SDimitry Andric   if (config->mapFile.empty())
166fe6060f1SDimitry Andric     return;
167fe6060f1SDimitry Andric 
168fe6060f1SDimitry Andric   TimeTraceScope timeScope("Write map file");
169fe6060f1SDimitry Andric 
170fe6060f1SDimitry Andric   // Open a map file for writing.
171fe6060f1SDimitry Andric   std::error_code ec;
172fe6060f1SDimitry Andric   raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None);
173fe6060f1SDimitry Andric   if (ec) {
174fe6060f1SDimitry Andric     error("cannot open " + config->mapFile + ": " + ec.message());
175fe6060f1SDimitry Andric     return;
176fe6060f1SDimitry Andric   }
177fe6060f1SDimitry Andric 
178fe6060f1SDimitry Andric   os << format("# Path: %s\n", config->outputFile.str().c_str());
179fe6060f1SDimitry Andric   os << format("# Arch: %s\n",
180fe6060f1SDimitry Andric                getArchitectureName(config->arch()).str().c_str());
181fe6060f1SDimitry Andric 
182bdd1243dSDimitry Andric   MapInfo info = gatherMapInfo();
183bdd1243dSDimitry Andric 
184fe6060f1SDimitry Andric   os << "# Object files:\n";
185fe6060f1SDimitry Andric   os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
186fe6060f1SDimitry Andric   uint32_t fileIndex = 1;
187fe6060f1SDimitry Andric   DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal;
188bdd1243dSDimitry Andric   for (InputFile *file : info.files) {
18906c3fb27SDimitry Andric     os << format("[%3u] ", fileIndex);
19006c3fb27SDimitry Andric     printFileName(os, file);
19106c3fb27SDimitry Andric     os << "\n";
192fe6060f1SDimitry Andric     readerToFileOrdinal[file] = fileIndex++;
193fe6060f1SDimitry Andric   }
194fe6060f1SDimitry Andric 
195fe6060f1SDimitry Andric   os << "# Sections:\n";
196fe6060f1SDimitry Andric   os << "# Address\tSize    \tSegment\tSection\n";
197fe6060f1SDimitry Andric   for (OutputSegment *seg : outputSegments)
198fe6060f1SDimitry Andric     for (OutputSection *osec : seg->getSections()) {
199fe6060f1SDimitry Andric       if (osec->isHidden())
200fe6060f1SDimitry Andric         continue;
201fe6060f1SDimitry Andric 
202fe6060f1SDimitry Andric       os << format("0x%08llX\t0x%08llX\t%s\t%s\n", osec->addr, osec->getSize(),
203fe6060f1SDimitry Andric                    seg->name.str().c_str(), osec->name.str().c_str());
204fe6060f1SDimitry Andric     }
205fe6060f1SDimitry Andric 
206*0fca6ea1SDimitry Andric   // Shared function to print an array of symbols.
207*0fca6ea1SDimitry Andric   auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
208*0fca6ea1SDimitry Andric     for (const ConcatInputSection *isec : arr) {
209*0fca6ea1SDimitry Andric       for (Defined *sym : isec->symbols) {
210*0fca6ea1SDimitry Andric         if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
211*0fca6ea1SDimitry Andric           os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
212*0fca6ea1SDimitry Andric                        getSymSizeForMap(sym),
213*0fca6ea1SDimitry Andric                        readerToFileOrdinal[sym->getFile()],
214*0fca6ea1SDimitry Andric                        sym->getName().str().data());
215*0fca6ea1SDimitry Andric       }
216*0fca6ea1SDimitry Andric     }
217*0fca6ea1SDimitry Andric   };
218*0fca6ea1SDimitry Andric 
219fe6060f1SDimitry Andric   os << "# Symbols:\n";
220bdd1243dSDimitry Andric   os << "# Address\tSize    \tFile  Name\n";
221bdd1243dSDimitry Andric   for (const OutputSegment *seg : outputSegments) {
222bdd1243dSDimitry Andric     for (const OutputSection *osec : seg->getSections()) {
223bdd1243dSDimitry Andric       if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
224*0fca6ea1SDimitry Andric         printIsecArrSyms(concatOsec->inputs);
225bdd1243dSDimitry Andric       } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
226bdd1243dSDimitry Andric         const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
227bdd1243dSDimitry Andric         uint64_t lastAddr = 0; // strings will never start at address 0, so this
228bdd1243dSDimitry Andric                                // is a sentinel value
229bdd1243dSDimitry Andric         for (const auto &[addr, info] : liveCStrings) {
230bdd1243dSDimitry Andric           uint64_t size = 0;
231bdd1243dSDimitry Andric           if (addr != lastAddr)
232bdd1243dSDimitry Andric             size = info.str.size() + 1; // include null terminator
233bdd1243dSDimitry Andric           lastAddr = addr;
234bdd1243dSDimitry Andric           os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size,
235bdd1243dSDimitry Andric                        info.fileIndex);
236bdd1243dSDimitry Andric           os.write_escaped(info.str) << "\n";
237bdd1243dSDimitry Andric         }
238bdd1243dSDimitry Andric       } else if (osec == (void *)in.unwindInfo) {
239bdd1243dSDimitry Andric         os << format("0x%08llX\t0x%08llX\t[  0] compact unwind info\n",
240bdd1243dSDimitry Andric                      osec->addr, osec->getSize());
241bdd1243dSDimitry Andric       } else if (osec == in.stubs) {
242bdd1243dSDimitry Andric         printStubsEntries(os, readerToFileOrdinal, osec, target->stubSize);
243bdd1243dSDimitry Andric       } else if (osec == in.lazyPointers) {
244bdd1243dSDimitry Andric         printStubsEntries(os, readerToFileOrdinal, osec, target->wordSize);
245bdd1243dSDimitry Andric       } else if (osec == in.stubHelper) {
246bdd1243dSDimitry Andric         // yes, ld64 calls it "helper helper"...
247bdd1243dSDimitry Andric         os << format("0x%08llX\t0x%08llX\t[  0] helper helper\n", osec->addr,
248bdd1243dSDimitry Andric                      osec->getSize());
249bdd1243dSDimitry Andric       } else if (osec == in.got) {
250bdd1243dSDimitry Andric         printNonLazyPointerSection(os, in.got);
251bdd1243dSDimitry Andric       } else if (osec == in.tlvPointers) {
252bdd1243dSDimitry Andric         printNonLazyPointerSection(os, in.tlvPointers);
253*0fca6ea1SDimitry Andric       } else if (osec == in.objcMethList) {
254*0fca6ea1SDimitry Andric         printIsecArrSyms(in.objcMethList->getInputs());
255bdd1243dSDimitry Andric       }
256bdd1243dSDimitry Andric       // TODO print other synthetic sections
257bdd1243dSDimitry Andric     }
258fe6060f1SDimitry Andric   }
259fe6060f1SDimitry Andric 
2601fd87a68SDimitry Andric   if (config->deadStrip) {
2611fd87a68SDimitry Andric     os << "# Dead Stripped Symbols:\n";
262bdd1243dSDimitry Andric     os << "#        \tSize    \tFile  Name\n";
263bdd1243dSDimitry Andric     for (Defined *sym : info.deadSymbols) {
2641fd87a68SDimitry Andric       assert(!sym->isLive());
265*0fca6ea1SDimitry Andric       os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", getSymSizeForMap(sym),
266bdd1243dSDimitry Andric                    readerToFileOrdinal[sym->getFile()],
267bdd1243dSDimitry Andric                    sym->getName().str().data());
268bdd1243dSDimitry Andric     }
269bdd1243dSDimitry Andric     for (CStringInfo &cstrInfo : info.deadCStrings) {
270bdd1243dSDimitry Andric       os << format("<<dead>>\t0x%08zX\t[%3u] literal string: ",
271bdd1243dSDimitry Andric                    cstrInfo.str.size() + 1, cstrInfo.fileIndex);
272bdd1243dSDimitry Andric       os.write_escaped(cstrInfo.str) << "\n";
2731fd87a68SDimitry Andric     }
2741fd87a68SDimitry Andric   }
275fe6060f1SDimitry Andric }
276