xref: /freebsd/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- ObjectFileELF.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjectFileELF.h"
10 
11 #include <algorithm>
12 #include <cassert>
13 #include <optional>
14 #include <unordered_map>
15 
16 #include "lldb/Core/Module.h"
17 #include "lldb/Core/ModuleSpec.h"
18 #include "lldb/Core/PluginManager.h"
19 #include "lldb/Core/Progress.h"
20 #include "lldb/Core/Section.h"
21 #include "lldb/Host/FileSystem.h"
22 #include "lldb/Host/LZMA.h"
23 #include "lldb/Symbol/DWARFCallFrameInfo.h"
24 #include "lldb/Symbol/SymbolContext.h"
25 #include "lldb/Target/Process.h"
26 #include "lldb/Target/SectionLoadList.h"
27 #include "lldb/Target/Target.h"
28 #include "lldb/Utility/ArchSpec.h"
29 #include "lldb/Utility/DataBufferHeap.h"
30 #include "lldb/Utility/FileSpecList.h"
31 #include "lldb/Utility/LLDBLog.h"
32 #include "lldb/Utility/Log.h"
33 #include "lldb/Utility/RangeMap.h"
34 #include "lldb/Utility/Status.h"
35 #include "lldb/Utility/Stream.h"
36 #include "lldb/Utility/Timer.h"
37 #include "llvm/ADT/IntervalMap.h"
38 #include "llvm/ADT/PointerUnion.h"
39 #include "llvm/ADT/StringRef.h"
40 #include "llvm/BinaryFormat/ELF.h"
41 #include "llvm/Object/Decompressor.h"
42 #include "llvm/Support/ARMBuildAttributes.h"
43 #include "llvm/Support/CRC.h"
44 #include "llvm/Support/FormatVariadic.h"
45 #include "llvm/Support/MathExtras.h"
46 #include "llvm/Support/MemoryBuffer.h"
47 #include "llvm/Support/MipsABIFlags.h"
48 
49 #define CASE_AND_STREAM(s, def, width)                                         \
50   case def:                                                                    \
51     s->Printf("%-*s", width, #def);                                            \
52     break;
53 
54 using namespace lldb;
55 using namespace lldb_private;
56 using namespace elf;
57 using namespace llvm::ELF;
58 
59 LLDB_PLUGIN_DEFINE(ObjectFileELF)
60 
61 // ELF note owner definitions
62 static const char *const LLDB_NT_OWNER_FREEBSD = "FreeBSD";
63 static const char *const LLDB_NT_OWNER_GNU = "GNU";
64 static const char *const LLDB_NT_OWNER_NETBSD = "NetBSD";
65 static const char *const LLDB_NT_OWNER_NETBSDCORE = "NetBSD-CORE";
66 static const char *const LLDB_NT_OWNER_OPENBSD = "OpenBSD";
67 static const char *const LLDB_NT_OWNER_ANDROID = "Android";
68 static const char *const LLDB_NT_OWNER_CORE = "CORE";
69 static const char *const LLDB_NT_OWNER_LINUX = "LINUX";
70 
71 // ELF note type definitions
72 static const elf_word LLDB_NT_FREEBSD_ABI_TAG = 0x01;
73 static const elf_word LLDB_NT_FREEBSD_ABI_SIZE = 4;
74 
75 static const elf_word LLDB_NT_GNU_ABI_TAG = 0x01;
76 static const elf_word LLDB_NT_GNU_ABI_SIZE = 16;
77 
78 static const elf_word LLDB_NT_GNU_BUILD_ID_TAG = 0x03;
79 
80 static const elf_word LLDB_NT_NETBSD_IDENT_TAG = 1;
81 static const elf_word LLDB_NT_NETBSD_IDENT_DESCSZ = 4;
82 static const elf_word LLDB_NT_NETBSD_IDENT_NAMESZ = 7;
83 static const elf_word LLDB_NT_NETBSD_PROCINFO = 1;
84 
85 // GNU ABI note OS constants
86 static const elf_word LLDB_NT_GNU_ABI_OS_LINUX = 0x00;
87 static const elf_word LLDB_NT_GNU_ABI_OS_HURD = 0x01;
88 static const elf_word LLDB_NT_GNU_ABI_OS_SOLARIS = 0x02;
89 
90 namespace {
91 
92 //===----------------------------------------------------------------------===//
93 /// \class ELFRelocation
94 /// Generic wrapper for ELFRel and ELFRela.
95 ///
96 /// This helper class allows us to parse both ELFRel and ELFRela relocation
97 /// entries in a generic manner.
98 class ELFRelocation {
99 public:
100   /// Constructs an ELFRelocation entry with a personality as given by @p
101   /// type.
102   ///
103   /// \param type Either DT_REL or DT_RELA.  Any other value is invalid.
104   ELFRelocation(unsigned type);
105 
106   ~ELFRelocation();
107 
108   bool Parse(const lldb_private::DataExtractor &data, lldb::offset_t *offset);
109 
110   static unsigned RelocType32(const ELFRelocation &rel);
111 
112   static unsigned RelocType64(const ELFRelocation &rel);
113 
114   static unsigned RelocSymbol32(const ELFRelocation &rel);
115 
116   static unsigned RelocSymbol64(const ELFRelocation &rel);
117 
118   static elf_addr RelocOffset32(const ELFRelocation &rel);
119 
120   static elf_addr RelocOffset64(const ELFRelocation &rel);
121 
122   static elf_sxword RelocAddend32(const ELFRelocation &rel);
123 
124   static elf_sxword RelocAddend64(const ELFRelocation &rel);
125 
IsRela()126   bool IsRela() { return (llvm::isa<ELFRela *>(reloc)); }
127 
128 private:
129   typedef llvm::PointerUnion<ELFRel *, ELFRela *> RelocUnion;
130 
131   RelocUnion reloc;
132 };
133 } // end anonymous namespace
134 
ELFRelocation(unsigned type)135 ELFRelocation::ELFRelocation(unsigned type) {
136   if (type == DT_REL || type == SHT_REL)
137     reloc = new ELFRel();
138   else if (type == DT_RELA || type == SHT_RELA)
139     reloc = new ELFRela();
140   else {
141     assert(false && "unexpected relocation type");
142     reloc = static_cast<ELFRel *>(nullptr);
143   }
144 }
145 
~ELFRelocation()146 ELFRelocation::~ELFRelocation() {
147   if (auto *elfrel = llvm::dyn_cast<ELFRel *>(reloc))
148     delete elfrel;
149   else
150     delete llvm::cast<ELFRela *>(reloc);
151 }
152 
Parse(const lldb_private::DataExtractor & data,lldb::offset_t * offset)153 bool ELFRelocation::Parse(const lldb_private::DataExtractor &data,
154                           lldb::offset_t *offset) {
155   if (auto *elfrel = llvm::dyn_cast<ELFRel *>(reloc))
156     return elfrel->Parse(data, offset);
157   else
158     return llvm::cast<ELFRela *>(reloc)->Parse(data, offset);
159 }
160 
RelocType32(const ELFRelocation & rel)161 unsigned ELFRelocation::RelocType32(const ELFRelocation &rel) {
162   if (auto *elfrel = llvm::dyn_cast<ELFRel *>(rel.reloc))
163     return ELFRel::RelocType32(*elfrel);
164   else
165     return ELFRela::RelocType32(*llvm::cast<ELFRela *>(rel.reloc));
166 }
167 
RelocType64(const ELFRelocation & rel)168 unsigned ELFRelocation::RelocType64(const ELFRelocation &rel) {
169   if (auto *elfrel = llvm::dyn_cast<ELFRel *>(rel.reloc))
170     return ELFRel::RelocType64(*elfrel);
171   else
172     return ELFRela::RelocType64(*llvm::cast<ELFRela *>(rel.reloc));
173 }
174 
RelocSymbol32(const ELFRelocation & rel)175 unsigned ELFRelocation::RelocSymbol32(const ELFRelocation &rel) {
176   if (auto *elfrel = llvm::dyn_cast<ELFRel *>(rel.reloc))
177     return ELFRel::RelocSymbol32(*elfrel);
178   else
179     return ELFRela::RelocSymbol32(*llvm::cast<ELFRela *>(rel.reloc));
180 }
181 
RelocSymbol64(const ELFRelocation & rel)182 unsigned ELFRelocation::RelocSymbol64(const ELFRelocation &rel) {
183   if (auto *elfrel = llvm::dyn_cast<ELFRel *>(rel.reloc))
184     return ELFRel::RelocSymbol64(*elfrel);
185   else
186     return ELFRela::RelocSymbol64(*llvm::cast<ELFRela *>(rel.reloc));
187 }
188 
RelocOffset32(const ELFRelocation & rel)189 elf_addr ELFRelocation::RelocOffset32(const ELFRelocation &rel) {
190   if (auto *elfrel = llvm::dyn_cast<ELFRel *>(rel.reloc))
191     return elfrel->r_offset;
192   else
193     return llvm::cast<ELFRela *>(rel.reloc)->r_offset;
194 }
195 
RelocOffset64(const ELFRelocation & rel)196 elf_addr ELFRelocation::RelocOffset64(const ELFRelocation &rel) {
197   if (auto *elfrel = llvm::dyn_cast<ELFRel *>(rel.reloc))
198     return elfrel->r_offset;
199   else
200     return llvm::cast<ELFRela *>(rel.reloc)->r_offset;
201 }
202 
RelocAddend32(const ELFRelocation & rel)203 elf_sxword ELFRelocation::RelocAddend32(const ELFRelocation &rel) {
204   if (llvm::isa<ELFRel *>(rel.reloc))
205     return 0;
206   else
207     return llvm::cast<ELFRela *>(rel.reloc)->r_addend;
208 }
209 
RelocAddend64(const ELFRelocation & rel)210 elf_sxword  ELFRelocation::RelocAddend64(const ELFRelocation &rel) {
211   if (llvm::isa<ELFRel *>(rel.reloc))
212     return 0;
213   else
214     return llvm::cast<ELFRela *>(rel.reloc)->r_addend;
215 }
216 
SegmentID(size_t PHdrIndex)217 static user_id_t SegmentID(size_t PHdrIndex) {
218   return ~user_id_t(PHdrIndex);
219 }
220 
Parse(const DataExtractor & data,lldb::offset_t * offset)221 bool ELFNote::Parse(const DataExtractor &data, lldb::offset_t *offset) {
222   // Read all fields.
223   if (data.GetU32(offset, &n_namesz, 3) == nullptr)
224     return false;
225 
226   // The name field is required to be nul-terminated, and n_namesz includes the
227   // terminating nul in observed implementations (contrary to the ELF-64 spec).
228   // A special case is needed for cores generated by some older Linux versions,
229   // which write a note named "CORE" without a nul terminator and n_namesz = 4.
230   if (n_namesz == 4) {
231     char buf[4];
232     if (data.ExtractBytes(*offset, 4, data.GetByteOrder(), buf) != 4)
233       return false;
234     if (strncmp(buf, "CORE", 4) == 0) {
235       n_name = "CORE";
236       *offset += 4;
237       return true;
238     }
239   }
240 
241   const char *cstr = data.GetCStr(offset, llvm::alignTo(n_namesz, 4));
242   if (cstr == nullptr) {
243     Log *log = GetLog(LLDBLog::Symbols);
244     LLDB_LOGF(log, "Failed to parse note name lacking nul terminator");
245 
246     return false;
247   }
248   n_name = cstr;
249   return true;
250 }
251 
mipsVariantFromElfFlags(const elf::ELFHeader & header)252 static uint32_t mipsVariantFromElfFlags (const elf::ELFHeader &header) {
253   const uint32_t mips_arch = header.e_flags & llvm::ELF::EF_MIPS_ARCH;
254   uint32_t endian = header.e_ident[EI_DATA];
255   uint32_t arch_variant = ArchSpec::eMIPSSubType_unknown;
256   uint32_t fileclass = header.e_ident[EI_CLASS];
257 
258   // If there aren't any elf flags available (e.g core elf file) then return
259   // default
260   // 32 or 64 bit arch (without any architecture revision) based on object file's class.
261   if (header.e_type == ET_CORE) {
262     switch (fileclass) {
263     case llvm::ELF::ELFCLASS32:
264       return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32el
265                                      : ArchSpec::eMIPSSubType_mips32;
266     case llvm::ELF::ELFCLASS64:
267       return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64el
268                                      : ArchSpec::eMIPSSubType_mips64;
269     default:
270       return arch_variant;
271     }
272   }
273 
274   switch (mips_arch) {
275   case llvm::ELF::EF_MIPS_ARCH_1:
276   case llvm::ELF::EF_MIPS_ARCH_2:
277   case llvm::ELF::EF_MIPS_ARCH_32:
278     return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32el
279                                    : ArchSpec::eMIPSSubType_mips32;
280   case llvm::ELF::EF_MIPS_ARCH_32R2:
281     return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32r2el
282                                    : ArchSpec::eMIPSSubType_mips32r2;
283   case llvm::ELF::EF_MIPS_ARCH_32R6:
284     return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32r6el
285                                    : ArchSpec::eMIPSSubType_mips32r6;
286   case llvm::ELF::EF_MIPS_ARCH_3:
287   case llvm::ELF::EF_MIPS_ARCH_4:
288   case llvm::ELF::EF_MIPS_ARCH_5:
289   case llvm::ELF::EF_MIPS_ARCH_64:
290     return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64el
291                                    : ArchSpec::eMIPSSubType_mips64;
292   case llvm::ELF::EF_MIPS_ARCH_64R2:
293     return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64r2el
294                                    : ArchSpec::eMIPSSubType_mips64r2;
295   case llvm::ELF::EF_MIPS_ARCH_64R6:
296     return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64r6el
297                                    : ArchSpec::eMIPSSubType_mips64r6;
298   default:
299     break;
300   }
301 
302   return arch_variant;
303 }
304 
riscvVariantFromElfFlags(const elf::ELFHeader & header)305 static uint32_t riscvVariantFromElfFlags(const elf::ELFHeader &header) {
306   uint32_t fileclass = header.e_ident[EI_CLASS];
307   switch (fileclass) {
308   case llvm::ELF::ELFCLASS32:
309     return ArchSpec::eRISCVSubType_riscv32;
310   case llvm::ELF::ELFCLASS64:
311     return ArchSpec::eRISCVSubType_riscv64;
312   default:
313     return ArchSpec::eRISCVSubType_unknown;
314   }
315 }
316 
ppc64VariantFromElfFlags(const elf::ELFHeader & header)317 static uint32_t ppc64VariantFromElfFlags(const elf::ELFHeader &header) {
318   uint32_t endian = header.e_ident[EI_DATA];
319   if (endian == ELFDATA2LSB)
320     return ArchSpec::eCore_ppc64le_generic;
321   else
322     return ArchSpec::eCore_ppc64_generic;
323 }
324 
loongarchVariantFromElfFlags(const elf::ELFHeader & header)325 static uint32_t loongarchVariantFromElfFlags(const elf::ELFHeader &header) {
326   uint32_t fileclass = header.e_ident[EI_CLASS];
327   switch (fileclass) {
328   case llvm::ELF::ELFCLASS32:
329     return ArchSpec::eLoongArchSubType_loongarch32;
330   case llvm::ELF::ELFCLASS64:
331     return ArchSpec::eLoongArchSubType_loongarch64;
332   default:
333     return ArchSpec::eLoongArchSubType_unknown;
334   }
335 }
336 
subTypeFromElfHeader(const elf::ELFHeader & header)337 static uint32_t subTypeFromElfHeader(const elf::ELFHeader &header) {
338   if (header.e_machine == llvm::ELF::EM_MIPS)
339     return mipsVariantFromElfFlags(header);
340   else if (header.e_machine == llvm::ELF::EM_PPC64)
341     return ppc64VariantFromElfFlags(header);
342   else if (header.e_machine == llvm::ELF::EM_RISCV)
343     return riscvVariantFromElfFlags(header);
344   else if (header.e_machine == llvm::ELF::EM_LOONGARCH)
345     return loongarchVariantFromElfFlags(header);
346 
347   return LLDB_INVALID_CPUTYPE;
348 }
349 
350 char ObjectFileELF::ID;
351 
352 // Arbitrary constant used as UUID prefix for core files.
353 const uint32_t ObjectFileELF::g_core_uuid_magic(0xE210C);
354 
355 // Static methods.
Initialize()356 void ObjectFileELF::Initialize() {
357   PluginManager::RegisterPlugin(GetPluginNameStatic(),
358                                 GetPluginDescriptionStatic(), CreateInstance,
359                                 CreateMemoryInstance, GetModuleSpecifications);
360 }
361 
Terminate()362 void ObjectFileELF::Terminate() {
363   PluginManager::UnregisterPlugin(CreateInstance);
364 }
365 
CreateInstance(const lldb::ModuleSP & module_sp,DataBufferSP data_sp,lldb::offset_t data_offset,const lldb_private::FileSpec * file,lldb::offset_t file_offset,lldb::offset_t length)366 ObjectFile *ObjectFileELF::CreateInstance(const lldb::ModuleSP &module_sp,
367                                           DataBufferSP data_sp,
368                                           lldb::offset_t data_offset,
369                                           const lldb_private::FileSpec *file,
370                                           lldb::offset_t file_offset,
371                                           lldb::offset_t length) {
372   bool mapped_writable = false;
373   if (!data_sp) {
374     data_sp = MapFileDataWritable(*file, length, file_offset);
375     if (!data_sp)
376       return nullptr;
377     data_offset = 0;
378     mapped_writable = true;
379   }
380 
381   assert(data_sp);
382 
383   if (data_sp->GetByteSize() <= (llvm::ELF::EI_NIDENT + data_offset))
384     return nullptr;
385 
386   const uint8_t *magic = data_sp->GetBytes() + data_offset;
387   if (!ELFHeader::MagicBytesMatch(magic))
388     return nullptr;
389 
390   // Update the data to contain the entire file if it doesn't already
391   if (data_sp->GetByteSize() < length) {
392     data_sp = MapFileDataWritable(*file, length, file_offset);
393     if (!data_sp)
394       return nullptr;
395     data_offset = 0;
396     mapped_writable = true;
397     magic = data_sp->GetBytes();
398   }
399 
400   // If we didn't map the data as writable take ownership of the buffer.
401   if (!mapped_writable) {
402     data_sp = std::make_shared<DataBufferHeap>(data_sp->GetBytes(),
403                                                data_sp->GetByteSize());
404     data_offset = 0;
405     magic = data_sp->GetBytes();
406   }
407 
408   unsigned address_size = ELFHeader::AddressSizeInBytes(magic);
409   if (address_size == 4 || address_size == 8) {
410     std::unique_ptr<ObjectFileELF> objfile_up(new ObjectFileELF(
411         module_sp, data_sp, data_offset, file, file_offset, length));
412     ArchSpec spec = objfile_up->GetArchitecture();
413     if (spec && objfile_up->SetModulesArchitecture(spec))
414       return objfile_up.release();
415   }
416 
417   return nullptr;
418 }
419 
CreateMemoryInstance(const lldb::ModuleSP & module_sp,WritableDataBufferSP data_sp,const lldb::ProcessSP & process_sp,lldb::addr_t header_addr)420 ObjectFile *ObjectFileELF::CreateMemoryInstance(
421     const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp,
422     const lldb::ProcessSP &process_sp, lldb::addr_t header_addr) {
423   if (!data_sp || data_sp->GetByteSize() < (llvm::ELF::EI_NIDENT))
424     return nullptr;
425   const uint8_t *magic = data_sp->GetBytes();
426   if (!ELFHeader::MagicBytesMatch(magic))
427     return nullptr;
428   // Read the ELF header first so we can figure out how many bytes we need
429   // to read to get as least the ELF header + program headers.
430   DataExtractor data;
431   data.SetData(data_sp);
432   elf::ELFHeader hdr;
433   lldb::offset_t offset = 0;
434   if (!hdr.Parse(data, &offset))
435     return nullptr;
436 
437   // Make sure the address size is set correctly in the ELF header.
438   if (!hdr.Is32Bit() && !hdr.Is64Bit())
439     return nullptr;
440   // Figure out where the program headers end and read enough bytes to get the
441   // program headers in their entirety.
442   lldb::offset_t end_phdrs = hdr.e_phoff + (hdr.e_phentsize * hdr.e_phnum);
443   if (end_phdrs > data_sp->GetByteSize())
444     data_sp = ReadMemory(process_sp, header_addr, end_phdrs);
445 
446   std::unique_ptr<ObjectFileELF> objfile_up(
447       new ObjectFileELF(module_sp, data_sp, process_sp, header_addr));
448   ArchSpec spec = objfile_up->GetArchitecture();
449   if (spec && objfile_up->SetModulesArchitecture(spec))
450     return objfile_up.release();
451 
452   return nullptr;
453 }
454 
MagicBytesMatch(DataBufferSP & data_sp,lldb::addr_t data_offset,lldb::addr_t data_length)455 bool ObjectFileELF::MagicBytesMatch(DataBufferSP &data_sp,
456                                     lldb::addr_t data_offset,
457                                     lldb::addr_t data_length) {
458   if (data_sp &&
459       data_sp->GetByteSize() > (llvm::ELF::EI_NIDENT + data_offset)) {
460     const uint8_t *magic = data_sp->GetBytes() + data_offset;
461     return ELFHeader::MagicBytesMatch(magic);
462   }
463   return false;
464 }
465 
calc_crc32(uint32_t init,const DataExtractor & data)466 static uint32_t calc_crc32(uint32_t init, const DataExtractor &data) {
467   return llvm::crc32(init,
468                      llvm::ArrayRef(data.GetDataStart(), data.GetByteSize()));
469 }
470 
CalculateELFNotesSegmentsCRC32(const ProgramHeaderColl & program_headers,DataExtractor & object_data)471 uint32_t ObjectFileELF::CalculateELFNotesSegmentsCRC32(
472     const ProgramHeaderColl &program_headers, DataExtractor &object_data) {
473 
474   uint32_t core_notes_crc = 0;
475 
476   for (const ELFProgramHeader &H : program_headers) {
477     if (H.p_type == llvm::ELF::PT_NOTE) {
478       const elf_off ph_offset = H.p_offset;
479       const size_t ph_size = H.p_filesz;
480 
481       DataExtractor segment_data;
482       if (segment_data.SetData(object_data, ph_offset, ph_size) != ph_size) {
483         // The ELF program header contained incorrect data, probably corefile
484         // is incomplete or corrupted.
485         break;
486       }
487 
488       core_notes_crc = calc_crc32(core_notes_crc, segment_data);
489     }
490   }
491 
492   return core_notes_crc;
493 }
494 
OSABIAsCString(unsigned char osabi_byte)495 static const char *OSABIAsCString(unsigned char osabi_byte) {
496 #define _MAKE_OSABI_CASE(x)                                                    \
497   case x:                                                                      \
498     return #x
499   switch (osabi_byte) {
500     _MAKE_OSABI_CASE(ELFOSABI_NONE);
501     _MAKE_OSABI_CASE(ELFOSABI_HPUX);
502     _MAKE_OSABI_CASE(ELFOSABI_NETBSD);
503     _MAKE_OSABI_CASE(ELFOSABI_GNU);
504     _MAKE_OSABI_CASE(ELFOSABI_HURD);
505     _MAKE_OSABI_CASE(ELFOSABI_SOLARIS);
506     _MAKE_OSABI_CASE(ELFOSABI_AIX);
507     _MAKE_OSABI_CASE(ELFOSABI_IRIX);
508     _MAKE_OSABI_CASE(ELFOSABI_FREEBSD);
509     _MAKE_OSABI_CASE(ELFOSABI_TRU64);
510     _MAKE_OSABI_CASE(ELFOSABI_MODESTO);
511     _MAKE_OSABI_CASE(ELFOSABI_OPENBSD);
512     _MAKE_OSABI_CASE(ELFOSABI_OPENVMS);
513     _MAKE_OSABI_CASE(ELFOSABI_NSK);
514     _MAKE_OSABI_CASE(ELFOSABI_AROS);
515     _MAKE_OSABI_CASE(ELFOSABI_FENIXOS);
516     _MAKE_OSABI_CASE(ELFOSABI_C6000_ELFABI);
517     _MAKE_OSABI_CASE(ELFOSABI_C6000_LINUX);
518     _MAKE_OSABI_CASE(ELFOSABI_ARM);
519     _MAKE_OSABI_CASE(ELFOSABI_STANDALONE);
520   default:
521     return "<unknown-osabi>";
522   }
523 #undef _MAKE_OSABI_CASE
524 }
525 
526 //
527 // WARNING : This function is being deprecated
528 // It's functionality has moved to ArchSpec::SetArchitecture This function is
529 // only being kept to validate the move.
530 //
531 // TODO : Remove this function
GetOsFromOSABI(unsigned char osabi_byte,llvm::Triple::OSType & ostype)532 static bool GetOsFromOSABI(unsigned char osabi_byte,
533                            llvm::Triple::OSType &ostype) {
534   switch (osabi_byte) {
535   case ELFOSABI_AIX:
536     ostype = llvm::Triple::OSType::AIX;
537     break;
538   case ELFOSABI_FREEBSD:
539     ostype = llvm::Triple::OSType::FreeBSD;
540     break;
541   case ELFOSABI_GNU:
542     ostype = llvm::Triple::OSType::Linux;
543     break;
544   case ELFOSABI_NETBSD:
545     ostype = llvm::Triple::OSType::NetBSD;
546     break;
547   case ELFOSABI_OPENBSD:
548     ostype = llvm::Triple::OSType::OpenBSD;
549     break;
550   case ELFOSABI_SOLARIS:
551     ostype = llvm::Triple::OSType::Solaris;
552     break;
553   default:
554     ostype = llvm::Triple::OSType::UnknownOS;
555   }
556   return ostype != llvm::Triple::OSType::UnknownOS;
557 }
558 
GetModuleSpecifications(const lldb_private::FileSpec & file,lldb::DataBufferSP & data_sp,lldb::offset_t data_offset,lldb::offset_t file_offset,lldb::offset_t length,lldb_private::ModuleSpecList & specs)559 size_t ObjectFileELF::GetModuleSpecifications(
560     const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
561     lldb::offset_t data_offset, lldb::offset_t file_offset,
562     lldb::offset_t length, lldb_private::ModuleSpecList &specs) {
563   Log *log = GetLog(LLDBLog::Modules);
564 
565   const size_t initial_count = specs.GetSize();
566 
567   if (ObjectFileELF::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
568     DataExtractor data;
569     data.SetData(data_sp);
570     elf::ELFHeader header;
571     lldb::offset_t header_offset = data_offset;
572     if (header.Parse(data, &header_offset)) {
573       if (data_sp) {
574         ModuleSpec spec(file);
575         // In Android API level 23 and above, bionic dynamic linker is able to
576         // load .so file directly from zip file. In that case, .so file is
577         // page aligned and uncompressed, and this module spec should retain the
578         // .so file offset and file size to pass through the information from
579         // lldb-server to LLDB. For normal file, file_offset should be 0,
580         // length should be the size of the file.
581         spec.SetObjectOffset(file_offset);
582         spec.SetObjectSize(length);
583 
584         const uint32_t sub_type = subTypeFromElfHeader(header);
585         spec.GetArchitecture().SetArchitecture(
586             eArchTypeELF, header.e_machine, sub_type, header.e_ident[EI_OSABI]);
587 
588         if (spec.GetArchitecture().IsValid()) {
589           llvm::Triple::OSType ostype;
590           llvm::Triple::VendorType vendor;
591           llvm::Triple::OSType spec_ostype =
592               spec.GetArchitecture().GetTriple().getOS();
593 
594           LLDB_LOGF(log, "ObjectFileELF::%s file '%s' module OSABI: %s",
595                     __FUNCTION__, file.GetPath().c_str(),
596                     OSABIAsCString(header.e_ident[EI_OSABI]));
597 
598           // SetArchitecture should have set the vendor to unknown
599           vendor = spec.GetArchitecture().GetTriple().getVendor();
600           assert(vendor == llvm::Triple::UnknownVendor);
601           UNUSED_IF_ASSERT_DISABLED(vendor);
602 
603           //
604           // Validate it is ok to remove GetOsFromOSABI
605           GetOsFromOSABI(header.e_ident[EI_OSABI], ostype);
606           assert(spec_ostype == ostype);
607           if (spec_ostype != llvm::Triple::OSType::UnknownOS) {
608             LLDB_LOGF(log,
609                       "ObjectFileELF::%s file '%s' set ELF module OS type "
610                       "from ELF header OSABI.",
611                       __FUNCTION__, file.GetPath().c_str());
612           }
613 
614           // When ELF file does not contain GNU build ID, the later code will
615           // calculate CRC32 with this data_sp file_offset and length. It is
616           // important for Android zip .so file, which is a slice of a file,
617           // to not access the outside of the file slice range.
618           if (data_sp->GetByteSize() < length)
619             data_sp = MapFileData(file, length, file_offset);
620           if (data_sp)
621             data.SetData(data_sp);
622           // In case there is header extension in the section #0, the header we
623           // parsed above could have sentinel values for e_phnum, e_shnum, and
624           // e_shstrndx.  In this case we need to reparse the header with a
625           // bigger data source to get the actual values.
626           if (header.HasHeaderExtension()) {
627             lldb::offset_t header_offset = data_offset;
628             header.Parse(data, &header_offset);
629           }
630 
631           uint32_t gnu_debuglink_crc = 0;
632           std::string gnu_debuglink_file;
633           SectionHeaderColl section_headers;
634           lldb_private::UUID &uuid = spec.GetUUID();
635 
636           GetSectionHeaderInfo(section_headers, data, header, uuid,
637                                gnu_debuglink_file, gnu_debuglink_crc,
638                                spec.GetArchitecture());
639 
640           llvm::Triple &spec_triple = spec.GetArchitecture().GetTriple();
641 
642           LLDB_LOGF(log,
643                     "ObjectFileELF::%s file '%s' module set to triple: %s "
644                     "(architecture %s)",
645                     __FUNCTION__, file.GetPath().c_str(),
646                     spec_triple.getTriple().c_str(),
647                     spec.GetArchitecture().GetArchitectureName());
648 
649           if (!uuid.IsValid()) {
650             uint32_t core_notes_crc = 0;
651 
652             if (!gnu_debuglink_crc) {
653               LLDB_SCOPED_TIMERF(
654                   "Calculating module crc32 %s with size %" PRIu64 " KiB",
655                   file.GetFilename().AsCString(),
656                   (length - file_offset) / 1024);
657 
658               // For core files - which usually don't happen to have a
659               // gnu_debuglink, and are pretty bulky - calculating whole
660               // contents crc32 would be too much of luxury.  Thus we will need
661               // to fallback to something simpler.
662               if (header.e_type == llvm::ELF::ET_CORE) {
663                 ProgramHeaderColl program_headers;
664                 GetProgramHeaderInfo(program_headers, data, header);
665 
666                 core_notes_crc =
667                     CalculateELFNotesSegmentsCRC32(program_headers, data);
668               } else {
669                 gnu_debuglink_crc = calc_crc32(0, data);
670               }
671             }
672             using u32le = llvm::support::ulittle32_t;
673             if (gnu_debuglink_crc) {
674               // Use 4 bytes of crc from the .gnu_debuglink section.
675               u32le data(gnu_debuglink_crc);
676               uuid = UUID(&data, sizeof(data));
677             } else if (core_notes_crc) {
678               // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make
679               // it look different form .gnu_debuglink crc followed by 4 bytes
680               // of note segments crc.
681               u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)};
682               uuid = UUID(data, sizeof(data));
683             }
684           }
685 
686           specs.Append(spec);
687         }
688       }
689     }
690   }
691 
692   return specs.GetSize() - initial_count;
693 }
694 
695 // ObjectFile protocol
696 
ObjectFileELF(const lldb::ModuleSP & module_sp,DataBufferSP data_sp,lldb::offset_t data_offset,const FileSpec * file,lldb::offset_t file_offset,lldb::offset_t length)697 ObjectFileELF::ObjectFileELF(const lldb::ModuleSP &module_sp,
698                              DataBufferSP data_sp, lldb::offset_t data_offset,
699                              const FileSpec *file, lldb::offset_t file_offset,
700                              lldb::offset_t length)
701     : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset) {
702   if (file)
703     m_file = *file;
704 }
705 
ObjectFileELF(const lldb::ModuleSP & module_sp,DataBufferSP header_data_sp,const lldb::ProcessSP & process_sp,addr_t header_addr)706 ObjectFileELF::ObjectFileELF(const lldb::ModuleSP &module_sp,
707                              DataBufferSP header_data_sp,
708                              const lldb::ProcessSP &process_sp,
709                              addr_t header_addr)
710     : ObjectFile(module_sp, process_sp, header_addr, header_data_sp) {}
711 
IsExecutable() const712 bool ObjectFileELF::IsExecutable() const {
713   return ((m_header.e_type & ET_EXEC) != 0) || (m_header.e_entry != 0);
714 }
715 
SetLoadAddress(Target & target,lldb::addr_t value,bool value_is_offset)716 bool ObjectFileELF::SetLoadAddress(Target &target, lldb::addr_t value,
717                                    bool value_is_offset) {
718   ModuleSP module_sp = GetModule();
719   if (module_sp) {
720     size_t num_loaded_sections = 0;
721     SectionList *section_list = GetSectionList();
722     if (section_list) {
723       if (!value_is_offset) {
724         addr_t base = GetBaseAddress().GetFileAddress();
725         if (base == LLDB_INVALID_ADDRESS)
726           return false;
727         value -= base;
728       }
729 
730       const size_t num_sections = section_list->GetSize();
731       size_t sect_idx = 0;
732 
733       for (sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
734         // Iterate through the object file sections to find all of the sections
735         // that have SHF_ALLOC in their flag bits.
736         SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
737 
738         // PT_TLS segments can have the same p_vaddr and p_paddr as other
739         // PT_LOAD segments so we shouldn't load them. If we do load them, then
740         // the SectionLoadList will incorrectly fill in the instance variable
741         // SectionLoadList::m_addr_to_sect with the same address as a PT_LOAD
742         // segment and we won't be able to resolve addresses in the PT_LOAD
743         // segment whose p_vaddr entry matches that of the PT_TLS. Any variables
744         // that appear in the PT_TLS segments get resolved by the DWARF
745         // expressions. If this ever changes we will need to fix all object
746         // file plug-ins, but until then, we don't want PT_TLS segments to
747         // remove the entry from SectionLoadList::m_addr_to_sect when we call
748         // SetSectionLoadAddress() below.
749         if (section_sp->IsThreadSpecific())
750           continue;
751         if (section_sp->Test(SHF_ALLOC) ||
752             section_sp->GetType() == eSectionTypeContainer) {
753           lldb::addr_t load_addr = section_sp->GetFileAddress();
754           // We don't want to update the load address of a section with type
755           // eSectionTypeAbsoluteAddress as they already have the absolute load
756           // address already specified
757           if (section_sp->GetType() != eSectionTypeAbsoluteAddress)
758             load_addr += value;
759 
760           // On 32-bit systems the load address have to fit into 4 bytes. The
761           // rest of the bytes are the overflow from the addition.
762           if (GetAddressByteSize() == 4)
763             load_addr &= 0xFFFFFFFF;
764 
765           if (target.SetSectionLoadAddress(section_sp, load_addr))
766             ++num_loaded_sections;
767         }
768       }
769       return num_loaded_sections > 0;
770     }
771   }
772   return false;
773 }
774 
GetByteOrder() const775 ByteOrder ObjectFileELF::GetByteOrder() const {
776   if (m_header.e_ident[EI_DATA] == ELFDATA2MSB)
777     return eByteOrderBig;
778   if (m_header.e_ident[EI_DATA] == ELFDATA2LSB)
779     return eByteOrderLittle;
780   return eByteOrderInvalid;
781 }
782 
GetAddressByteSize() const783 uint32_t ObjectFileELF::GetAddressByteSize() const {
784   return m_data.GetAddressByteSize();
785 }
786 
GetAddressClass(addr_t file_addr)787 AddressClass ObjectFileELF::GetAddressClass(addr_t file_addr) {
788   Symtab *symtab = GetSymtab();
789   if (!symtab)
790     return AddressClass::eUnknown;
791 
792   // The address class is determined based on the symtab. Ask it from the
793   // object file what contains the symtab information.
794   ObjectFile *symtab_objfile = symtab->GetObjectFile();
795   if (symtab_objfile != nullptr && symtab_objfile != this)
796     return symtab_objfile->GetAddressClass(file_addr);
797 
798   auto res = ObjectFile::GetAddressClass(file_addr);
799   if (res != AddressClass::eCode)
800     return res;
801 
802   auto ub = m_address_class_map.upper_bound(file_addr);
803   if (ub == m_address_class_map.begin()) {
804     // No entry in the address class map before the address. Return default
805     // address class for an address in a code section.
806     return AddressClass::eCode;
807   }
808 
809   // Move iterator to the address class entry preceding address
810   --ub;
811 
812   return ub->second;
813 }
814 
SectionIndex(const SectionHeaderCollIter & I)815 size_t ObjectFileELF::SectionIndex(const SectionHeaderCollIter &I) {
816   return std::distance(m_section_headers.begin(), I);
817 }
818 
SectionIndex(const SectionHeaderCollConstIter & I) const819 size_t ObjectFileELF::SectionIndex(const SectionHeaderCollConstIter &I) const {
820   return std::distance(m_section_headers.begin(), I);
821 }
822 
ParseHeader()823 bool ObjectFileELF::ParseHeader() {
824   lldb::offset_t offset = 0;
825   return m_header.Parse(m_data, &offset);
826 }
827 
GetUUID()828 UUID ObjectFileELF::GetUUID() {
829   // Need to parse the section list to get the UUIDs, so make sure that's been
830   // done.
831   if (!ParseSectionHeaders() && GetType() != ObjectFile::eTypeCoreFile)
832     return UUID();
833 
834   if (!m_uuid) {
835     using u32le = llvm::support::ulittle32_t;
836     if (GetType() == ObjectFile::eTypeCoreFile) {
837       uint32_t core_notes_crc = 0;
838 
839       if (!ParseProgramHeaders())
840         return UUID();
841 
842       core_notes_crc =
843           CalculateELFNotesSegmentsCRC32(m_program_headers, m_data);
844 
845       if (core_notes_crc) {
846         // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make it
847         // look different form .gnu_debuglink crc - followed by 4 bytes of note
848         // segments crc.
849         u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)};
850         m_uuid = UUID(data, sizeof(data));
851       }
852     } else {
853       if (!m_gnu_debuglink_crc)
854         m_gnu_debuglink_crc = calc_crc32(0, m_data);
855       if (m_gnu_debuglink_crc) {
856         // Use 4 bytes of crc from the .gnu_debuglink section.
857         u32le data(m_gnu_debuglink_crc);
858         m_uuid = UUID(&data, sizeof(data));
859       }
860     }
861   }
862 
863   return m_uuid;
864 }
865 
GetDebugLink()866 std::optional<FileSpec> ObjectFileELF::GetDebugLink() {
867   if (m_gnu_debuglink_file.empty())
868     return std::nullopt;
869   return FileSpec(m_gnu_debuglink_file);
870 }
871 
GetDependentModules(FileSpecList & files)872 uint32_t ObjectFileELF::GetDependentModules(FileSpecList &files) {
873   size_t num_modules = ParseDependentModules();
874   uint32_t num_specs = 0;
875 
876   for (unsigned i = 0; i < num_modules; ++i) {
877     if (files.AppendIfUnique(m_filespec_up->GetFileSpecAtIndex(i)))
878       num_specs++;
879   }
880 
881   return num_specs;
882 }
883 
GetImageInfoAddress(Target * target)884 Address ObjectFileELF::GetImageInfoAddress(Target *target) {
885   if (!ParseDynamicSymbols())
886     return Address();
887 
888   SectionList *section_list = GetSectionList();
889   if (!section_list)
890     return Address();
891 
892   for (size_t i = 0; i < m_dynamic_symbols.size(); ++i) {
893     const ELFDynamic &symbol = m_dynamic_symbols[i].symbol;
894 
895     if (symbol.d_tag != DT_DEBUG && symbol.d_tag != DT_MIPS_RLD_MAP &&
896         symbol.d_tag != DT_MIPS_RLD_MAP_REL)
897       continue;
898 
899     // Compute the offset as the number of previous entries plus the size of
900     // d_tag.
901     const addr_t offset = (i * 2 + 1) * GetAddressByteSize();
902     const addr_t d_file_addr = m_dynamic_base_addr + offset;
903     Address d_addr;
904     if (!d_addr.ResolveAddressUsingFileSections(d_file_addr, GetSectionList()))
905       return Address();
906     if (symbol.d_tag == DT_DEBUG)
907       return d_addr;
908 
909     // MIPS executables uses DT_MIPS_RLD_MAP_REL to support PIE. DT_MIPS_RLD_MAP
910     // exists in non-PIE.
911     if ((symbol.d_tag == DT_MIPS_RLD_MAP ||
912          symbol.d_tag == DT_MIPS_RLD_MAP_REL) &&
913         target) {
914       const addr_t d_load_addr = d_addr.GetLoadAddress(target);
915       if (d_load_addr == LLDB_INVALID_ADDRESS)
916         return Address();
917 
918       Status error;
919       if (symbol.d_tag == DT_MIPS_RLD_MAP) {
920         // DT_MIPS_RLD_MAP tag stores an absolute address of the debug pointer.
921         Address addr;
922         if (target->ReadPointerFromMemory(d_load_addr, error, addr, true))
923           return addr;
924       }
925       if (symbol.d_tag == DT_MIPS_RLD_MAP_REL) {
926         // DT_MIPS_RLD_MAP_REL tag stores the offset to the debug pointer,
927         // relative to the address of the tag.
928         uint64_t rel_offset;
929         rel_offset = target->ReadUnsignedIntegerFromMemory(
930             d_load_addr, GetAddressByteSize(), UINT64_MAX, error, true);
931         if (error.Success() && rel_offset != UINT64_MAX) {
932           Address addr;
933           addr_t debug_ptr_address =
934               d_load_addr - GetAddressByteSize() + rel_offset;
935           addr.SetOffset(debug_ptr_address);
936           return addr;
937         }
938       }
939     }
940   }
941   return Address();
942 }
943 
GetEntryPointAddress()944 lldb_private::Address ObjectFileELF::GetEntryPointAddress() {
945   if (m_entry_point_address.IsValid())
946     return m_entry_point_address;
947 
948   if (!ParseHeader() || !IsExecutable())
949     return m_entry_point_address;
950 
951   SectionList *section_list = GetSectionList();
952   addr_t offset = m_header.e_entry;
953 
954   if (!section_list)
955     m_entry_point_address.SetOffset(offset);
956   else
957     m_entry_point_address.ResolveAddressUsingFileSections(offset, section_list);
958   return m_entry_point_address;
959 }
960 
GetBaseAddress()961 Address ObjectFileELF::GetBaseAddress() {
962   if (GetType() == ObjectFile::eTypeObjectFile) {
963     for (SectionHeaderCollIter I = std::next(m_section_headers.begin());
964          I != m_section_headers.end(); ++I) {
965       const ELFSectionHeaderInfo &header = *I;
966       if (header.sh_flags & SHF_ALLOC)
967         return Address(GetSectionList()->FindSectionByID(SectionIndex(I)), 0);
968     }
969     return LLDB_INVALID_ADDRESS;
970   }
971 
972   for (const auto &EnumPHdr : llvm::enumerate(ProgramHeaders())) {
973     const ELFProgramHeader &H = EnumPHdr.value();
974     if (H.p_type != PT_LOAD)
975       continue;
976 
977     return Address(
978         GetSectionList()->FindSectionByID(SegmentID(EnumPHdr.index())), 0);
979   }
980   return LLDB_INVALID_ADDRESS;
981 }
982 
ParseDependentModules()983 size_t ObjectFileELF::ParseDependentModules() {
984   if (m_filespec_up)
985     return m_filespec_up->GetSize();
986 
987   m_filespec_up = std::make_unique<FileSpecList>();
988 
989   if (ParseDynamicSymbols()) {
990     for (const auto &entry : m_dynamic_symbols) {
991       if (entry.symbol.d_tag != DT_NEEDED)
992         continue;
993       if (!entry.name.empty()) {
994         FileSpec file_spec(entry.name);
995         FileSystem::Instance().Resolve(file_spec);
996         m_filespec_up->Append(file_spec);
997       }
998     }
999   }
1000   return m_filespec_up->GetSize();
1001 }
1002 
1003 // GetProgramHeaderInfo
GetProgramHeaderInfo(ProgramHeaderColl & program_headers,DataExtractor & object_data,const ELFHeader & header)1004 size_t ObjectFileELF::GetProgramHeaderInfo(ProgramHeaderColl &program_headers,
1005                                            DataExtractor &object_data,
1006                                            const ELFHeader &header) {
1007   // We have already parsed the program headers
1008   if (!program_headers.empty())
1009     return program_headers.size();
1010 
1011   // If there are no program headers to read we are done.
1012   if (header.e_phnum == 0)
1013     return 0;
1014 
1015   program_headers.resize(header.e_phnum);
1016   if (program_headers.size() != header.e_phnum)
1017     return 0;
1018 
1019   const size_t ph_size = header.e_phnum * header.e_phentsize;
1020   const elf_off ph_offset = header.e_phoff;
1021   DataExtractor data;
1022   if (data.SetData(object_data, ph_offset, ph_size) != ph_size)
1023     return 0;
1024 
1025   uint32_t idx;
1026   lldb::offset_t offset;
1027   for (idx = 0, offset = 0; idx < header.e_phnum; ++idx) {
1028     if (!program_headers[idx].Parse(data, &offset))
1029       break;
1030   }
1031 
1032   if (idx < program_headers.size())
1033     program_headers.resize(idx);
1034 
1035   return program_headers.size();
1036 }
1037 
1038 // ParseProgramHeaders
ParseProgramHeaders()1039 bool ObjectFileELF::ParseProgramHeaders() {
1040   return GetProgramHeaderInfo(m_program_headers, m_data, m_header) != 0;
1041 }
1042 
1043 lldb_private::Status
RefineModuleDetailsFromNote(lldb_private::DataExtractor & data,lldb_private::ArchSpec & arch_spec,lldb_private::UUID & uuid)1044 ObjectFileELF::RefineModuleDetailsFromNote(lldb_private::DataExtractor &data,
1045                                            lldb_private::ArchSpec &arch_spec,
1046                                            lldb_private::UUID &uuid) {
1047   Log *log = GetLog(LLDBLog::Modules);
1048   Status error;
1049 
1050   lldb::offset_t offset = 0;
1051 
1052   while (true) {
1053     // Parse the note header.  If this fails, bail out.
1054     const lldb::offset_t note_offset = offset;
1055     ELFNote note = ELFNote();
1056     if (!note.Parse(data, &offset)) {
1057       // We're done.
1058       return error;
1059     }
1060 
1061     LLDB_LOGF(log, "ObjectFileELF::%s parsing note name='%s', type=%" PRIu32,
1062               __FUNCTION__, note.n_name.c_str(), note.n_type);
1063 
1064     // Process FreeBSD ELF notes.
1065     if ((note.n_name == LLDB_NT_OWNER_FREEBSD) &&
1066         (note.n_type == LLDB_NT_FREEBSD_ABI_TAG) &&
1067         (note.n_descsz == LLDB_NT_FREEBSD_ABI_SIZE)) {
1068       // Pull out the min version info.
1069       uint32_t version_info;
1070       if (data.GetU32(&offset, &version_info, 1) == nullptr) {
1071         error =
1072             Status::FromErrorString("failed to read FreeBSD ABI note payload");
1073         return error;
1074       }
1075 
1076       // Convert the version info into a major/minor number.
1077       const uint32_t version_major = version_info / 100000;
1078       const uint32_t version_minor = (version_info / 1000) % 100;
1079 
1080       char os_name[32];
1081       snprintf(os_name, sizeof(os_name), "freebsd%" PRIu32 ".%" PRIu32,
1082                version_major, version_minor);
1083 
1084       // Set the elf OS version to FreeBSD.  Also clear the vendor.
1085       arch_spec.GetTriple().setOSName(os_name);
1086       arch_spec.GetTriple().setVendor(llvm::Triple::VendorType::UnknownVendor);
1087 
1088       LLDB_LOGF(log,
1089                 "ObjectFileELF::%s detected FreeBSD %" PRIu32 ".%" PRIu32
1090                 ".%" PRIu32,
1091                 __FUNCTION__, version_major, version_minor,
1092                 static_cast<uint32_t>(version_info % 1000));
1093     }
1094     // Process GNU ELF notes.
1095     else if (note.n_name == LLDB_NT_OWNER_GNU) {
1096       switch (note.n_type) {
1097       case LLDB_NT_GNU_ABI_TAG:
1098         if (note.n_descsz == LLDB_NT_GNU_ABI_SIZE) {
1099           // Pull out the min OS version supporting the ABI.
1100           uint32_t version_info[4];
1101           if (data.GetU32(&offset, &version_info[0], note.n_descsz / 4) ==
1102               nullptr) {
1103             error =
1104                 Status::FromErrorString("failed to read GNU ABI note payload");
1105             return error;
1106           }
1107 
1108           // Set the OS per the OS field.
1109           switch (version_info[0]) {
1110           case LLDB_NT_GNU_ABI_OS_LINUX:
1111             arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux);
1112             arch_spec.GetTriple().setVendor(
1113                 llvm::Triple::VendorType::UnknownVendor);
1114             LLDB_LOGF(log,
1115                       "ObjectFileELF::%s detected Linux, min version %" PRIu32
1116                       ".%" PRIu32 ".%" PRIu32,
1117                       __FUNCTION__, version_info[1], version_info[2],
1118                       version_info[3]);
1119             // FIXME we have the minimal version number, we could be propagating
1120             // that.  version_info[1] = OS Major, version_info[2] = OS Minor,
1121             // version_info[3] = Revision.
1122             break;
1123           case LLDB_NT_GNU_ABI_OS_HURD:
1124             arch_spec.GetTriple().setOS(llvm::Triple::OSType::UnknownOS);
1125             arch_spec.GetTriple().setVendor(
1126                 llvm::Triple::VendorType::UnknownVendor);
1127             LLDB_LOGF(log,
1128                       "ObjectFileELF::%s detected Hurd (unsupported), min "
1129                       "version %" PRIu32 ".%" PRIu32 ".%" PRIu32,
1130                       __FUNCTION__, version_info[1], version_info[2],
1131                       version_info[3]);
1132             break;
1133           case LLDB_NT_GNU_ABI_OS_SOLARIS:
1134             arch_spec.GetTriple().setOS(llvm::Triple::OSType::Solaris);
1135             arch_spec.GetTriple().setVendor(
1136                 llvm::Triple::VendorType::UnknownVendor);
1137             LLDB_LOGF(log,
1138                       "ObjectFileELF::%s detected Solaris, min version %" PRIu32
1139                       ".%" PRIu32 ".%" PRIu32,
1140                       __FUNCTION__, version_info[1], version_info[2],
1141                       version_info[3]);
1142             break;
1143           default:
1144             LLDB_LOGF(log,
1145                       "ObjectFileELF::%s unrecognized OS in note, id %" PRIu32
1146                       ", min version %" PRIu32 ".%" PRIu32 ".%" PRIu32,
1147                       __FUNCTION__, version_info[0], version_info[1],
1148                       version_info[2], version_info[3]);
1149             break;
1150           }
1151         }
1152         break;
1153 
1154       case LLDB_NT_GNU_BUILD_ID_TAG:
1155         // Only bother processing this if we don't already have the uuid set.
1156         if (!uuid.IsValid()) {
1157           // 16 bytes is UUID|MD5, 20 bytes is SHA1. Other linkers may produce a
1158           // build-id of a different length. Accept it as long as it's at least
1159           // 4 bytes as it will be better than our own crc32.
1160           if (note.n_descsz >= 4) {
1161             if (const uint8_t *buf = data.PeekData(offset, note.n_descsz)) {
1162               // Save the build id as the UUID for the module.
1163               uuid = UUID(buf, note.n_descsz);
1164             } else {
1165               error = Status::FromErrorString(
1166                   "failed to read GNU_BUILD_ID note payload");
1167               return error;
1168             }
1169           }
1170         }
1171         break;
1172       }
1173       if (arch_spec.IsMIPS() &&
1174           arch_spec.GetTriple().getOS() == llvm::Triple::OSType::UnknownOS)
1175         // The note.n_name == LLDB_NT_OWNER_GNU is valid for Linux platform
1176         arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux);
1177     }
1178     // Process NetBSD ELF executables and shared libraries
1179     else if ((note.n_name == LLDB_NT_OWNER_NETBSD) &&
1180              (note.n_type == LLDB_NT_NETBSD_IDENT_TAG) &&
1181              (note.n_descsz == LLDB_NT_NETBSD_IDENT_DESCSZ) &&
1182              (note.n_namesz == LLDB_NT_NETBSD_IDENT_NAMESZ)) {
1183       // Pull out the version info.
1184       uint32_t version_info;
1185       if (data.GetU32(&offset, &version_info, 1) == nullptr) {
1186         error =
1187             Status::FromErrorString("failed to read NetBSD ABI note payload");
1188         return error;
1189       }
1190       // Convert the version info into a major/minor/patch number.
1191       //     #define __NetBSD_Version__ MMmmrrpp00
1192       //
1193       //     M = major version
1194       //     m = minor version; a minor number of 99 indicates current.
1195       //     r = 0 (since NetBSD 3.0 not used)
1196       //     p = patchlevel
1197       const uint32_t version_major = version_info / 100000000;
1198       const uint32_t version_minor = (version_info % 100000000) / 1000000;
1199       const uint32_t version_patch = (version_info % 10000) / 100;
1200       // Set the elf OS version to NetBSD.  Also clear the vendor.
1201       arch_spec.GetTriple().setOSName(
1202           llvm::formatv("netbsd{0}.{1}.{2}", version_major, version_minor,
1203                         version_patch).str());
1204       arch_spec.GetTriple().setVendor(llvm::Triple::VendorType::UnknownVendor);
1205     }
1206     // Process NetBSD ELF core(5) notes
1207     else if ((note.n_name == LLDB_NT_OWNER_NETBSDCORE) &&
1208              (note.n_type == LLDB_NT_NETBSD_PROCINFO)) {
1209       // Set the elf OS version to NetBSD.  Also clear the vendor.
1210       arch_spec.GetTriple().setOS(llvm::Triple::OSType::NetBSD);
1211       arch_spec.GetTriple().setVendor(llvm::Triple::VendorType::UnknownVendor);
1212     }
1213     // Process OpenBSD ELF notes.
1214     else if (note.n_name == LLDB_NT_OWNER_OPENBSD) {
1215       // Set the elf OS version to OpenBSD.  Also clear the vendor.
1216       arch_spec.GetTriple().setOS(llvm::Triple::OSType::OpenBSD);
1217       arch_spec.GetTriple().setVendor(llvm::Triple::VendorType::UnknownVendor);
1218     } else if (note.n_name == LLDB_NT_OWNER_ANDROID) {
1219       arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux);
1220       arch_spec.GetTriple().setEnvironment(
1221           llvm::Triple::EnvironmentType::Android);
1222     } else if (note.n_name == LLDB_NT_OWNER_LINUX) {
1223       // This is sometimes found in core files and usually contains extended
1224       // register info
1225       arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux);
1226     } else if (note.n_name == LLDB_NT_OWNER_CORE) {
1227       // Parse the NT_FILE to look for stuff in paths to shared libraries
1228       // The contents look like this in a 64 bit ELF core file:
1229       //
1230       // count     = 0x000000000000000a (10)
1231       // page_size = 0x0000000000001000 (4096)
1232       // Index start              end                file_ofs           path
1233       // ===== ------------------ ------------------ ------------------ -------------------------------------
1234       // [  0] 0x0000000000401000 0x0000000000000000                    /tmp/a.out
1235       // [  1] 0x0000000000600000 0x0000000000601000 0x0000000000000000 /tmp/a.out
1236       // [  2] 0x0000000000601000 0x0000000000602000 0x0000000000000001 /tmp/a.out
1237       // [  3] 0x00007fa79c9ed000 0x00007fa79cba8000 0x0000000000000000 /lib/x86_64-linux-gnu/libc-2.19.so
1238       // [  4] 0x00007fa79cba8000 0x00007fa79cda7000 0x00000000000001bb /lib/x86_64-linux-gnu/libc-2.19.so
1239       // [  5] 0x00007fa79cda7000 0x00007fa79cdab000 0x00000000000001ba /lib/x86_64-linux-gnu/libc-2.19.so
1240       // [  6] 0x00007fa79cdab000 0x00007fa79cdad000 0x00000000000001be /lib/x86_64-linux-gnu/libc-2.19.so
1241       // [  7] 0x00007fa79cdb2000 0x00007fa79cdd5000 0x0000000000000000 /lib/x86_64-linux-gnu/ld-2.19.so
1242       // [  8] 0x00007fa79cfd4000 0x00007fa79cfd5000 0x0000000000000022 /lib/x86_64-linux-gnu/ld-2.19.so
1243       // [  9] 0x00007fa79cfd5000 0x00007fa79cfd6000 0x0000000000000023 /lib/x86_64-linux-gnu/ld-2.19.so
1244       //
1245       // In the 32 bit ELFs the count, page_size, start, end, file_ofs are
1246       // uint32_t.
1247       //
1248       // For reference: see readelf source code (in binutils).
1249       if (note.n_type == NT_FILE) {
1250         uint64_t count = data.GetAddress(&offset);
1251         const char *cstr;
1252         data.GetAddress(&offset); // Skip page size
1253         offset += count * 3 *
1254                   data.GetAddressByteSize(); // Skip all start/end/file_ofs
1255         for (size_t i = 0; i < count; ++i) {
1256           cstr = data.GetCStr(&offset);
1257           if (cstr == nullptr) {
1258             error = Status::FromErrorStringWithFormat(
1259                 "ObjectFileELF::%s trying to read "
1260                 "at an offset after the end "
1261                 "(GetCStr returned nullptr)",
1262                 __FUNCTION__);
1263             return error;
1264           }
1265           llvm::StringRef path(cstr);
1266           if (path.contains("/lib/x86_64-linux-gnu") || path.contains("/lib/i386-linux-gnu")) {
1267             arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux);
1268             break;
1269           }
1270         }
1271         if (arch_spec.IsMIPS() &&
1272             arch_spec.GetTriple().getOS() == llvm::Triple::OSType::UnknownOS)
1273           // In case of MIPSR6, the LLDB_NT_OWNER_GNU note is missing for some
1274           // cases (e.g. compile with -nostdlib) Hence set OS to Linux
1275           arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux);
1276       }
1277     }
1278 
1279     // Calculate the offset of the next note just in case "offset" has been
1280     // used to poke at the contents of the note data
1281     offset = note_offset + note.GetByteSize();
1282   }
1283 
1284   return error;
1285 }
1286 
ParseARMAttributes(DataExtractor & data,uint64_t length,ArchSpec & arch_spec)1287 void ObjectFileELF::ParseARMAttributes(DataExtractor &data, uint64_t length,
1288                                        ArchSpec &arch_spec) {
1289   lldb::offset_t Offset = 0;
1290 
1291   uint8_t FormatVersion = data.GetU8(&Offset);
1292   if (FormatVersion != llvm::ELFAttrs::Format_Version)
1293     return;
1294 
1295   Offset = Offset + sizeof(uint32_t); // Section Length
1296   llvm::StringRef VendorName = data.GetCStr(&Offset);
1297 
1298   if (VendorName != "aeabi")
1299     return;
1300 
1301   if (arch_spec.GetTriple().getEnvironment() ==
1302       llvm::Triple::UnknownEnvironment)
1303     arch_spec.GetTriple().setEnvironment(llvm::Triple::EABI);
1304 
1305   while (Offset < length) {
1306     uint8_t Tag = data.GetU8(&Offset);
1307     uint32_t Size = data.GetU32(&Offset);
1308 
1309     if (Tag != llvm::ARMBuildAttrs::File || Size == 0)
1310       continue;
1311 
1312     while (Offset < length) {
1313       uint64_t Tag = data.GetULEB128(&Offset);
1314       switch (Tag) {
1315       default:
1316         if (Tag < 32)
1317           data.GetULEB128(&Offset);
1318         else if (Tag % 2 == 0)
1319           data.GetULEB128(&Offset);
1320         else
1321           data.GetCStr(&Offset);
1322 
1323         break;
1324 
1325       case llvm::ARMBuildAttrs::CPU_raw_name:
1326       case llvm::ARMBuildAttrs::CPU_name:
1327         data.GetCStr(&Offset);
1328 
1329         break;
1330 
1331       case llvm::ARMBuildAttrs::ABI_VFP_args: {
1332         uint64_t VFPArgs = data.GetULEB128(&Offset);
1333 
1334         if (VFPArgs == llvm::ARMBuildAttrs::BaseAAPCS) {
1335           if (arch_spec.GetTriple().getEnvironment() ==
1336                   llvm::Triple::UnknownEnvironment ||
1337               arch_spec.GetTriple().getEnvironment() == llvm::Triple::EABIHF)
1338             arch_spec.GetTriple().setEnvironment(llvm::Triple::EABI);
1339 
1340           arch_spec.SetFlags(ArchSpec::eARM_abi_soft_float);
1341         } else if (VFPArgs == llvm::ARMBuildAttrs::HardFPAAPCS) {
1342           if (arch_spec.GetTriple().getEnvironment() ==
1343                   llvm::Triple::UnknownEnvironment ||
1344               arch_spec.GetTriple().getEnvironment() == llvm::Triple::EABI)
1345             arch_spec.GetTriple().setEnvironment(llvm::Triple::EABIHF);
1346 
1347           arch_spec.SetFlags(ArchSpec::eARM_abi_hard_float);
1348         }
1349 
1350         break;
1351       }
1352       }
1353     }
1354   }
1355 }
1356 
1357 // GetSectionHeaderInfo
GetSectionHeaderInfo(SectionHeaderColl & section_headers,DataExtractor & object_data,const elf::ELFHeader & header,lldb_private::UUID & uuid,std::string & gnu_debuglink_file,uint32_t & gnu_debuglink_crc,ArchSpec & arch_spec)1358 size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl &section_headers,
1359                                            DataExtractor &object_data,
1360                                            const elf::ELFHeader &header,
1361                                            lldb_private::UUID &uuid,
1362                                            std::string &gnu_debuglink_file,
1363                                            uint32_t &gnu_debuglink_crc,
1364                                            ArchSpec &arch_spec) {
1365   // Don't reparse the section headers if we already did that.
1366   if (!section_headers.empty())
1367     return section_headers.size();
1368 
1369   // Only initialize the arch_spec to okay defaults if they're not already set.
1370   // We'll refine this with note data as we parse the notes.
1371   if (arch_spec.GetTriple().getOS() == llvm::Triple::OSType::UnknownOS) {
1372     llvm::Triple::OSType ostype;
1373     llvm::Triple::OSType spec_ostype;
1374     const uint32_t sub_type = subTypeFromElfHeader(header);
1375     arch_spec.SetArchitecture(eArchTypeELF, header.e_machine, sub_type,
1376                               header.e_ident[EI_OSABI]);
1377 
1378     // Validate if it is ok to remove GetOsFromOSABI. Note, that now the OS is
1379     // determined based on EI_OSABI flag and the info extracted from ELF notes
1380     // (see RefineModuleDetailsFromNote). However in some cases that still
1381     // might be not enough: for example a shared library might not have any
1382     // notes at all and have EI_OSABI flag set to System V, as result the OS
1383     // will be set to UnknownOS.
1384     GetOsFromOSABI(header.e_ident[EI_OSABI], ostype);
1385     spec_ostype = arch_spec.GetTriple().getOS();
1386     assert(spec_ostype == ostype);
1387     UNUSED_IF_ASSERT_DISABLED(spec_ostype);
1388   }
1389 
1390   if (arch_spec.GetMachine() == llvm::Triple::mips ||
1391       arch_spec.GetMachine() == llvm::Triple::mipsel ||
1392       arch_spec.GetMachine() == llvm::Triple::mips64 ||
1393       arch_spec.GetMachine() == llvm::Triple::mips64el) {
1394     switch (header.e_flags & llvm::ELF::EF_MIPS_ARCH_ASE) {
1395     case llvm::ELF::EF_MIPS_MICROMIPS:
1396       arch_spec.SetFlags(ArchSpec::eMIPSAse_micromips);
1397       break;
1398     case llvm::ELF::EF_MIPS_ARCH_ASE_M16:
1399       arch_spec.SetFlags(ArchSpec::eMIPSAse_mips16);
1400       break;
1401     case llvm::ELF::EF_MIPS_ARCH_ASE_MDMX:
1402       arch_spec.SetFlags(ArchSpec::eMIPSAse_mdmx);
1403       break;
1404     default:
1405       break;
1406     }
1407   }
1408 
1409   if (arch_spec.GetMachine() == llvm::Triple::arm ||
1410       arch_spec.GetMachine() == llvm::Triple::thumb) {
1411     if (header.e_flags & llvm::ELF::EF_ARM_SOFT_FLOAT)
1412       arch_spec.SetFlags(ArchSpec::eARM_abi_soft_float);
1413     else if (header.e_flags & llvm::ELF::EF_ARM_VFP_FLOAT)
1414       arch_spec.SetFlags(ArchSpec::eARM_abi_hard_float);
1415   }
1416 
1417   if (arch_spec.GetMachine() == llvm::Triple::riscv32 ||
1418       arch_spec.GetMachine() == llvm::Triple::riscv64) {
1419     uint32_t flags = arch_spec.GetFlags();
1420 
1421     if (header.e_flags & llvm::ELF::EF_RISCV_RVC)
1422       flags |= ArchSpec::eRISCV_rvc;
1423     if (header.e_flags & llvm::ELF::EF_RISCV_RVE)
1424       flags |= ArchSpec::eRISCV_rve;
1425 
1426     if ((header.e_flags & llvm::ELF::EF_RISCV_FLOAT_ABI_SINGLE) ==
1427         llvm::ELF::EF_RISCV_FLOAT_ABI_SINGLE)
1428       flags |= ArchSpec::eRISCV_float_abi_single;
1429     else if ((header.e_flags & llvm::ELF::EF_RISCV_FLOAT_ABI_DOUBLE) ==
1430              llvm::ELF::EF_RISCV_FLOAT_ABI_DOUBLE)
1431       flags |= ArchSpec::eRISCV_float_abi_double;
1432     else if ((header.e_flags & llvm::ELF::EF_RISCV_FLOAT_ABI_QUAD) ==
1433              llvm::ELF::EF_RISCV_FLOAT_ABI_QUAD)
1434       flags |= ArchSpec::eRISCV_float_abi_quad;
1435 
1436     arch_spec.SetFlags(flags);
1437   }
1438 
1439   if (arch_spec.GetMachine() == llvm::Triple::loongarch32 ||
1440       arch_spec.GetMachine() == llvm::Triple::loongarch64) {
1441     uint32_t flags = arch_spec.GetFlags();
1442     switch (header.e_flags & llvm::ELF::EF_LOONGARCH_ABI_MODIFIER_MASK) {
1443     case llvm::ELF::EF_LOONGARCH_ABI_SINGLE_FLOAT:
1444       flags |= ArchSpec::eLoongArch_abi_single_float;
1445       break;
1446     case llvm::ELF::EF_LOONGARCH_ABI_DOUBLE_FLOAT:
1447       flags |= ArchSpec::eLoongArch_abi_double_float;
1448       break;
1449     case llvm::ELF::EF_LOONGARCH_ABI_SOFT_FLOAT:
1450       break;
1451     }
1452 
1453     arch_spec.SetFlags(flags);
1454   }
1455 
1456   // If there are no section headers we are done.
1457   if (header.e_shnum == 0)
1458     return 0;
1459 
1460   Log *log = GetLog(LLDBLog::Modules);
1461 
1462   section_headers.resize(header.e_shnum);
1463   if (section_headers.size() != header.e_shnum)
1464     return 0;
1465 
1466   const size_t sh_size = header.e_shnum * header.e_shentsize;
1467   const elf_off sh_offset = header.e_shoff;
1468   DataExtractor sh_data;
1469   if (sh_data.SetData(object_data, sh_offset, sh_size) != sh_size)
1470     return 0;
1471 
1472   uint32_t idx;
1473   lldb::offset_t offset;
1474   for (idx = 0, offset = 0; idx < header.e_shnum; ++idx) {
1475     if (!section_headers[idx].Parse(sh_data, &offset))
1476       break;
1477   }
1478   if (idx < section_headers.size())
1479     section_headers.resize(idx);
1480 
1481   const unsigned strtab_idx = header.e_shstrndx;
1482   if (strtab_idx && strtab_idx < section_headers.size()) {
1483     const ELFSectionHeaderInfo &sheader = section_headers[strtab_idx];
1484     const size_t byte_size = sheader.sh_size;
1485     const Elf64_Off offset = sheader.sh_offset;
1486     lldb_private::DataExtractor shstr_data;
1487 
1488     if (shstr_data.SetData(object_data, offset, byte_size) == byte_size) {
1489       for (SectionHeaderCollIter I = section_headers.begin();
1490            I != section_headers.end(); ++I) {
1491         static ConstString g_sect_name_gnu_debuglink(".gnu_debuglink");
1492         const ELFSectionHeaderInfo &sheader = *I;
1493         const uint64_t section_size =
1494             sheader.sh_type == SHT_NOBITS ? 0 : sheader.sh_size;
1495         ConstString name(shstr_data.PeekCStr(I->sh_name));
1496 
1497         I->section_name = name;
1498 
1499         if (arch_spec.IsMIPS()) {
1500           uint32_t arch_flags = arch_spec.GetFlags();
1501           DataExtractor data;
1502           if (sheader.sh_type == SHT_MIPS_ABIFLAGS) {
1503 
1504             if (section_size && (data.SetData(object_data, sheader.sh_offset,
1505                                               section_size) == section_size)) {
1506               // MIPS ASE Mask is at offset 12 in MIPS.abiflags section
1507               lldb::offset_t offset = 12; // MIPS ABI Flags Version: 0
1508               arch_flags |= data.GetU32(&offset);
1509 
1510               // The floating point ABI is at offset 7
1511               offset = 7;
1512               switch (data.GetU8(&offset)) {
1513               case llvm::Mips::Val_GNU_MIPS_ABI_FP_ANY:
1514                 arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_ANY;
1515                 break;
1516               case llvm::Mips::Val_GNU_MIPS_ABI_FP_DOUBLE:
1517                 arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_DOUBLE;
1518                 break;
1519               case llvm::Mips::Val_GNU_MIPS_ABI_FP_SINGLE:
1520                 arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_SINGLE;
1521                 break;
1522               case llvm::Mips::Val_GNU_MIPS_ABI_FP_SOFT:
1523                 arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_SOFT;
1524                 break;
1525               case llvm::Mips::Val_GNU_MIPS_ABI_FP_OLD_64:
1526                 arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_OLD_64;
1527                 break;
1528               case llvm::Mips::Val_GNU_MIPS_ABI_FP_XX:
1529                 arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_XX;
1530                 break;
1531               case llvm::Mips::Val_GNU_MIPS_ABI_FP_64:
1532                 arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_64;
1533                 break;
1534               case llvm::Mips::Val_GNU_MIPS_ABI_FP_64A:
1535                 arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_64A;
1536                 break;
1537               }
1538             }
1539           }
1540           // Settings appropriate ArchSpec ABI Flags
1541           switch (header.e_flags & llvm::ELF::EF_MIPS_ABI) {
1542           case llvm::ELF::EF_MIPS_ABI_O32:
1543             arch_flags |= lldb_private::ArchSpec::eMIPSABI_O32;
1544             break;
1545           case EF_MIPS_ABI_O64:
1546             arch_flags |= lldb_private::ArchSpec::eMIPSABI_O64;
1547             break;
1548           case EF_MIPS_ABI_EABI32:
1549             arch_flags |= lldb_private::ArchSpec::eMIPSABI_EABI32;
1550             break;
1551           case EF_MIPS_ABI_EABI64:
1552             arch_flags |= lldb_private::ArchSpec::eMIPSABI_EABI64;
1553             break;
1554           default:
1555             // ABI Mask doesn't cover N32 and N64 ABI.
1556             if (header.e_ident[EI_CLASS] == llvm::ELF::ELFCLASS64)
1557               arch_flags |= lldb_private::ArchSpec::eMIPSABI_N64;
1558             else if (header.e_flags & llvm::ELF::EF_MIPS_ABI2)
1559               arch_flags |= lldb_private::ArchSpec::eMIPSABI_N32;
1560             break;
1561           }
1562           arch_spec.SetFlags(arch_flags);
1563         }
1564 
1565         if (arch_spec.GetMachine() == llvm::Triple::arm ||
1566             arch_spec.GetMachine() == llvm::Triple::thumb) {
1567           DataExtractor data;
1568 
1569           if (sheader.sh_type == SHT_ARM_ATTRIBUTES && section_size != 0 &&
1570               data.SetData(object_data, sheader.sh_offset, section_size) == section_size)
1571             ParseARMAttributes(data, section_size, arch_spec);
1572         }
1573 
1574         if (name == g_sect_name_gnu_debuglink) {
1575           DataExtractor data;
1576           if (section_size && (data.SetData(object_data, sheader.sh_offset,
1577                                             section_size) == section_size)) {
1578             lldb::offset_t gnu_debuglink_offset = 0;
1579             gnu_debuglink_file = data.GetCStr(&gnu_debuglink_offset);
1580             gnu_debuglink_offset = llvm::alignTo(gnu_debuglink_offset, 4);
1581             data.GetU32(&gnu_debuglink_offset, &gnu_debuglink_crc, 1);
1582           }
1583         }
1584 
1585         // Process ELF note section entries.
1586         bool is_note_header = (sheader.sh_type == SHT_NOTE);
1587 
1588         // The section header ".note.android.ident" is stored as a
1589         // PROGBITS type header but it is actually a note header.
1590         static ConstString g_sect_name_android_ident(".note.android.ident");
1591         if (!is_note_header && name == g_sect_name_android_ident)
1592           is_note_header = true;
1593 
1594         if (is_note_header) {
1595           // Allow notes to refine module info.
1596           DataExtractor data;
1597           if (section_size && (data.SetData(object_data, sheader.sh_offset,
1598                                             section_size) == section_size)) {
1599             Status error = RefineModuleDetailsFromNote(data, arch_spec, uuid);
1600             if (error.Fail()) {
1601               LLDB_LOGF(log, "ObjectFileELF::%s ELF note processing failed: %s",
1602                         __FUNCTION__, error.AsCString());
1603             }
1604           }
1605         }
1606       }
1607 
1608       // Make any unknown triple components to be unspecified unknowns.
1609       if (arch_spec.GetTriple().getVendor() == llvm::Triple::UnknownVendor)
1610         arch_spec.GetTriple().setVendorName(llvm::StringRef());
1611       if (arch_spec.GetTriple().getOS() == llvm::Triple::UnknownOS)
1612         arch_spec.GetTriple().setOSName(llvm::StringRef());
1613 
1614       return section_headers.size();
1615     }
1616   }
1617 
1618   section_headers.clear();
1619   return 0;
1620 }
1621 
1622 llvm::StringRef
StripLinkerSymbolAnnotations(llvm::StringRef symbol_name) const1623 ObjectFileELF::StripLinkerSymbolAnnotations(llvm::StringRef symbol_name) const {
1624   size_t pos = symbol_name.find('@');
1625   return symbol_name.substr(0, pos);
1626 }
1627 
1628 // ParseSectionHeaders
ParseSectionHeaders()1629 size_t ObjectFileELF::ParseSectionHeaders() {
1630   return GetSectionHeaderInfo(m_section_headers, m_data, m_header, m_uuid,
1631                               m_gnu_debuglink_file, m_gnu_debuglink_crc,
1632                               m_arch_spec);
1633 }
1634 
1635 const ObjectFileELF::ELFSectionHeaderInfo *
GetSectionHeaderByIndex(lldb::user_id_t id)1636 ObjectFileELF::GetSectionHeaderByIndex(lldb::user_id_t id) {
1637   if (!ParseSectionHeaders())
1638     return nullptr;
1639 
1640   if (id < m_section_headers.size())
1641     return &m_section_headers[id];
1642 
1643   return nullptr;
1644 }
1645 
GetSectionIndexByName(const char * name)1646 lldb::user_id_t ObjectFileELF::GetSectionIndexByName(const char *name) {
1647   if (!name || !name[0] || !ParseSectionHeaders())
1648     return 0;
1649   for (size_t i = 1; i < m_section_headers.size(); ++i)
1650     if (m_section_headers[i].section_name == ConstString(name))
1651       return i;
1652   return 0;
1653 }
1654 
GetSectionTypeFromName(llvm::StringRef Name)1655 static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
1656   if (Name.consume_front(".debug_"))
1657     return ObjectFile::GetDWARFSectionTypeFromName(Name);
1658 
1659   return llvm::StringSwitch<SectionType>(Name)
1660       .Case(".ARM.exidx", eSectionTypeARMexidx)
1661       .Case(".ARM.extab", eSectionTypeARMextab)
1662       .Case(".ctf", eSectionTypeDebug)
1663       .Cases(".data", ".tdata", eSectionTypeData)
1664       .Case(".eh_frame", eSectionTypeEHFrame)
1665       .Case(".gnu_debugaltlink", eSectionTypeDWARFGNUDebugAltLink)
1666       .Case(".gosymtab", eSectionTypeGoSymtab)
1667       .Case(".text", eSectionTypeCode)
1668       .Case(".lldbsummaries", lldb::eSectionTypeLLDBTypeSummaries)
1669       .Case(".lldbformatters", lldb::eSectionTypeLLDBFormatters)
1670       .Case(".swift_ast", eSectionTypeSwiftModules)
1671       .Default(eSectionTypeOther);
1672 }
1673 
GetSectionType(const ELFSectionHeaderInfo & H) const1674 SectionType ObjectFileELF::GetSectionType(const ELFSectionHeaderInfo &H) const {
1675   switch (H.sh_type) {
1676   case SHT_PROGBITS:
1677     if (H.sh_flags & SHF_EXECINSTR)
1678       return eSectionTypeCode;
1679     break;
1680   case SHT_NOBITS:
1681     if (H.sh_flags & SHF_ALLOC)
1682       return eSectionTypeZeroFill;
1683     break;
1684   case SHT_SYMTAB:
1685     return eSectionTypeELFSymbolTable;
1686   case SHT_DYNSYM:
1687     return eSectionTypeELFDynamicSymbols;
1688   case SHT_RELA:
1689   case SHT_REL:
1690     return eSectionTypeELFRelocationEntries;
1691   case SHT_DYNAMIC:
1692     return eSectionTypeELFDynamicLinkInfo;
1693   }
1694   return GetSectionTypeFromName(H.section_name.GetStringRef());
1695 }
1696 
GetTargetByteSize(SectionType Type,const ArchSpec & arch)1697 static uint32_t GetTargetByteSize(SectionType Type, const ArchSpec &arch) {
1698   switch (Type) {
1699   case eSectionTypeData:
1700   case eSectionTypeZeroFill:
1701     return arch.GetDataByteSize();
1702   case eSectionTypeCode:
1703     return arch.GetCodeByteSize();
1704   default:
1705     return 1;
1706   }
1707 }
1708 
GetPermissions(const ELFSectionHeader & H)1709 static Permissions GetPermissions(const ELFSectionHeader &H) {
1710   Permissions Perm = Permissions(0);
1711   if (H.sh_flags & SHF_ALLOC)
1712     Perm |= ePermissionsReadable;
1713   if (H.sh_flags & SHF_WRITE)
1714     Perm |= ePermissionsWritable;
1715   if (H.sh_flags & SHF_EXECINSTR)
1716     Perm |= ePermissionsExecutable;
1717   return Perm;
1718 }
1719 
GetPermissions(const ELFProgramHeader & H)1720 static Permissions GetPermissions(const ELFProgramHeader &H) {
1721   Permissions Perm = Permissions(0);
1722   if (H.p_flags & PF_R)
1723     Perm |= ePermissionsReadable;
1724   if (H.p_flags & PF_W)
1725     Perm |= ePermissionsWritable;
1726   if (H.p_flags & PF_X)
1727     Perm |= ePermissionsExecutable;
1728   return Perm;
1729 }
1730 
1731 namespace {
1732 
1733 using VMRange = lldb_private::Range<addr_t, addr_t>;
1734 
1735 struct SectionAddressInfo {
1736   SectionSP Segment;
1737   VMRange Range;
1738 };
1739 
1740 // (Unlinked) ELF object files usually have 0 for every section address, meaning
1741 // we need to compute synthetic addresses in order for "file addresses" from
1742 // different sections to not overlap. This class handles that logic.
1743 class VMAddressProvider {
1744   using VMMap = llvm::IntervalMap<addr_t, SectionSP, 4,
1745                                        llvm::IntervalMapHalfOpenInfo<addr_t>>;
1746 
1747   ObjectFile::Type ObjectType;
1748   addr_t NextVMAddress = 0;
1749   VMMap::Allocator Alloc;
1750   VMMap Segments{Alloc};
1751   VMMap Sections{Alloc};
1752   lldb_private::Log *Log = GetLog(LLDBLog::Modules);
1753   size_t SegmentCount = 0;
1754   std::string SegmentName;
1755 
GetVMRange(const ELFSectionHeader & H)1756   VMRange GetVMRange(const ELFSectionHeader &H) {
1757     addr_t Address = H.sh_addr;
1758     addr_t Size = H.sh_flags & SHF_ALLOC ? H.sh_size : 0;
1759 
1760     // When this is a debug file for relocatable file, the address is all zero
1761     // and thus needs to use accumulate method
1762     if ((ObjectType == ObjectFile::Type::eTypeObjectFile ||
1763          (ObjectType == ObjectFile::Type::eTypeDebugInfo && H.sh_addr == 0)) &&
1764         Segments.empty() && (H.sh_flags & SHF_ALLOC)) {
1765       NextVMAddress =
1766           llvm::alignTo(NextVMAddress, std::max<addr_t>(H.sh_addralign, 1));
1767       Address = NextVMAddress;
1768       NextVMAddress += Size;
1769     }
1770     return VMRange(Address, Size);
1771   }
1772 
1773 public:
VMAddressProvider(ObjectFile::Type Type,llvm::StringRef SegmentName)1774   VMAddressProvider(ObjectFile::Type Type, llvm::StringRef SegmentName)
1775       : ObjectType(Type), SegmentName(std::string(SegmentName)) {}
1776 
GetNextSegmentName() const1777   std::string GetNextSegmentName() const {
1778     return llvm::formatv("{0}[{1}]", SegmentName, SegmentCount).str();
1779   }
1780 
GetAddressInfo(const ELFProgramHeader & H)1781   std::optional<VMRange> GetAddressInfo(const ELFProgramHeader &H) {
1782     if (H.p_memsz == 0) {
1783       LLDB_LOG(Log, "Ignoring zero-sized {0} segment. Corrupt object file?",
1784                SegmentName);
1785       return std::nullopt;
1786     }
1787 
1788     if (Segments.overlaps(H.p_vaddr, H.p_vaddr + H.p_memsz)) {
1789       LLDB_LOG(Log, "Ignoring overlapping {0} segment. Corrupt object file?",
1790                SegmentName);
1791       return std::nullopt;
1792     }
1793     return VMRange(H.p_vaddr, H.p_memsz);
1794   }
1795 
GetAddressInfo(const ELFSectionHeader & H)1796   std::optional<SectionAddressInfo> GetAddressInfo(const ELFSectionHeader &H) {
1797     VMRange Range = GetVMRange(H);
1798     SectionSP Segment;
1799     auto It = Segments.find(Range.GetRangeBase());
1800     if ((H.sh_flags & SHF_ALLOC) && It.valid()) {
1801       addr_t MaxSize;
1802       if (It.start() <= Range.GetRangeBase()) {
1803         MaxSize = It.stop() - Range.GetRangeBase();
1804         Segment = *It;
1805       } else
1806         MaxSize = It.start() - Range.GetRangeBase();
1807       if (Range.GetByteSize() > MaxSize) {
1808         LLDB_LOG(Log, "Shortening section crossing segment boundaries. "
1809                       "Corrupt object file?");
1810         Range.SetByteSize(MaxSize);
1811       }
1812     }
1813     if (Range.GetByteSize() > 0 &&
1814         Sections.overlaps(Range.GetRangeBase(), Range.GetRangeEnd())) {
1815       LLDB_LOG(Log, "Ignoring overlapping section. Corrupt object file?");
1816       return std::nullopt;
1817     }
1818     if (Segment)
1819       Range.Slide(-Segment->GetFileAddress());
1820     return SectionAddressInfo{Segment, Range};
1821   }
1822 
AddSegment(const VMRange & Range,SectionSP Seg)1823   void AddSegment(const VMRange &Range, SectionSP Seg) {
1824     Segments.insert(Range.GetRangeBase(), Range.GetRangeEnd(), std::move(Seg));
1825     ++SegmentCount;
1826   }
1827 
AddSection(SectionAddressInfo Info,SectionSP Sect)1828   void AddSection(SectionAddressInfo Info, SectionSP Sect) {
1829     if (Info.Range.GetByteSize() == 0)
1830       return;
1831     if (Info.Segment)
1832       Info.Range.Slide(Info.Segment->GetFileAddress());
1833     Sections.insert(Info.Range.GetRangeBase(), Info.Range.GetRangeEnd(),
1834                     std::move(Sect));
1835   }
1836 };
1837 }
1838 
1839 // We have to do this because ELF doesn't have section IDs, and also
1840 // doesn't require section names to be unique.  (We use the section index
1841 // for section IDs, but that isn't guaranteed to be the same in separate
1842 // debug images.)
FindMatchingSection(const SectionList & section_list,SectionSP section)1843 static SectionSP FindMatchingSection(const SectionList &section_list,
1844                                      SectionSP section) {
1845   SectionSP sect_sp;
1846 
1847   addr_t vm_addr = section->GetFileAddress();
1848   ConstString name = section->GetName();
1849   offset_t byte_size = section->GetByteSize();
1850   bool thread_specific = section->IsThreadSpecific();
1851   uint32_t permissions = section->GetPermissions();
1852   uint32_t alignment = section->GetLog2Align();
1853 
1854   for (auto sect : section_list) {
1855     if (sect->GetName() == name &&
1856         sect->IsThreadSpecific() == thread_specific &&
1857         sect->GetPermissions() == permissions &&
1858         sect->GetByteSize() == byte_size && sect->GetFileAddress() == vm_addr &&
1859         sect->GetLog2Align() == alignment) {
1860       sect_sp = sect;
1861       break;
1862     } else {
1863       sect_sp = FindMatchingSection(sect->GetChildren(), section);
1864       if (sect_sp)
1865         break;
1866     }
1867   }
1868 
1869   return sect_sp;
1870 }
1871 
CreateSections(SectionList & unified_section_list)1872 void ObjectFileELF::CreateSections(SectionList &unified_section_list) {
1873   if (m_sections_up)
1874     return;
1875 
1876   m_sections_up = std::make_unique<SectionList>();
1877   VMAddressProvider regular_provider(GetType(), "PT_LOAD");
1878   VMAddressProvider tls_provider(GetType(), "PT_TLS");
1879 
1880   for (const auto &EnumPHdr : llvm::enumerate(ProgramHeaders())) {
1881     const ELFProgramHeader &PHdr = EnumPHdr.value();
1882     if (PHdr.p_type != PT_LOAD && PHdr.p_type != PT_TLS)
1883       continue;
1884 
1885     VMAddressProvider &provider =
1886         PHdr.p_type == PT_TLS ? tls_provider : regular_provider;
1887     auto InfoOr = provider.GetAddressInfo(PHdr);
1888     if (!InfoOr)
1889       continue;
1890 
1891     uint32_t Log2Align = llvm::Log2_64(std::max<elf_xword>(PHdr.p_align, 1));
1892     SectionSP Segment = std::make_shared<Section>(
1893         GetModule(), this, SegmentID(EnumPHdr.index()),
1894         ConstString(provider.GetNextSegmentName()), eSectionTypeContainer,
1895         InfoOr->GetRangeBase(), InfoOr->GetByteSize(), PHdr.p_offset,
1896         PHdr.p_filesz, Log2Align, /*flags*/ 0);
1897     Segment->SetPermissions(GetPermissions(PHdr));
1898     Segment->SetIsThreadSpecific(PHdr.p_type == PT_TLS);
1899     m_sections_up->AddSection(Segment);
1900 
1901     provider.AddSegment(*InfoOr, std::move(Segment));
1902   }
1903 
1904   ParseSectionHeaders();
1905   if (m_section_headers.empty())
1906     return;
1907 
1908   for (SectionHeaderCollIter I = std::next(m_section_headers.begin());
1909        I != m_section_headers.end(); ++I) {
1910     const ELFSectionHeaderInfo &header = *I;
1911 
1912     ConstString &name = I->section_name;
1913     const uint64_t file_size =
1914         header.sh_type == SHT_NOBITS ? 0 : header.sh_size;
1915 
1916     VMAddressProvider &provider =
1917         header.sh_flags & SHF_TLS ? tls_provider : regular_provider;
1918     auto InfoOr = provider.GetAddressInfo(header);
1919     if (!InfoOr)
1920       continue;
1921 
1922     SectionType sect_type = GetSectionType(header);
1923 
1924     const uint32_t target_bytes_size =
1925         GetTargetByteSize(sect_type, m_arch_spec);
1926 
1927     elf::elf_xword log2align =
1928         (header.sh_addralign == 0) ? 0 : llvm::Log2_64(header.sh_addralign);
1929 
1930     SectionSP section_sp(new Section(
1931         InfoOr->Segment, GetModule(), // Module to which this section belongs.
1932         this,            // ObjectFile to which this section belongs and should
1933                          // read section data from.
1934         SectionIndex(I), // Section ID.
1935         name,            // Section name.
1936         sect_type,       // Section type.
1937         InfoOr->Range.GetRangeBase(), // VM address.
1938         InfoOr->Range.GetByteSize(),  // VM size in bytes of this section.
1939         header.sh_offset,             // Offset of this section in the file.
1940         file_size,           // Size of the section as found in the file.
1941         log2align,           // Alignment of the section
1942         header.sh_flags,     // Flags for this section.
1943         target_bytes_size)); // Number of host bytes per target byte
1944 
1945     section_sp->SetPermissions(GetPermissions(header));
1946     section_sp->SetIsThreadSpecific(header.sh_flags & SHF_TLS);
1947     (InfoOr->Segment ? InfoOr->Segment->GetChildren() : *m_sections_up)
1948         .AddSection(section_sp);
1949     provider.AddSection(std::move(*InfoOr), std::move(section_sp));
1950   }
1951 
1952   // For eTypeDebugInfo files, the Symbol Vendor will take care of updating the
1953   // unified section list.
1954   if (GetType() != eTypeDebugInfo)
1955     unified_section_list = *m_sections_up;
1956 
1957   // If there's a .gnu_debugdata section, we'll try to read the .symtab that's
1958   // embedded in there and replace the one in the original object file (if any).
1959   // If there's none in the orignal object file, we add it to it.
1960   if (auto gdd_obj_file = GetGnuDebugDataObjectFile()) {
1961     if (auto gdd_objfile_section_list = gdd_obj_file->GetSectionList()) {
1962       if (SectionSP symtab_section_sp =
1963               gdd_objfile_section_list->FindSectionByType(
1964                   eSectionTypeELFSymbolTable, true)) {
1965         SectionSP module_section_sp = unified_section_list.FindSectionByType(
1966             eSectionTypeELFSymbolTable, true);
1967         if (module_section_sp)
1968           unified_section_list.ReplaceSection(module_section_sp->GetID(),
1969                                               symtab_section_sp);
1970         else
1971           unified_section_list.AddSection(symtab_section_sp);
1972       }
1973     }
1974   }
1975 }
1976 
GetGnuDebugDataObjectFile()1977 std::shared_ptr<ObjectFileELF> ObjectFileELF::GetGnuDebugDataObjectFile() {
1978   if (m_gnu_debug_data_object_file != nullptr)
1979     return m_gnu_debug_data_object_file;
1980 
1981   SectionSP section =
1982       GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"));
1983   if (!section)
1984     return nullptr;
1985 
1986   if (!lldb_private::lzma::isAvailable()) {
1987     GetModule()->ReportWarning(
1988         "No LZMA support found for reading .gnu_debugdata section");
1989     return nullptr;
1990   }
1991 
1992   // Uncompress the data
1993   DataExtractor data;
1994   section->GetSectionData(data);
1995   llvm::SmallVector<uint8_t, 0> uncompressedData;
1996   auto err = lldb_private::lzma::uncompress(data.GetData(), uncompressedData);
1997   if (err) {
1998     GetModule()->ReportWarning(
1999         "An error occurred while decompression the section {0}: {1}",
2000         section->GetName().AsCString(), llvm::toString(std::move(err)).c_str());
2001     return nullptr;
2002   }
2003 
2004   // Construct ObjectFileELF object from decompressed buffer
2005   DataBufferSP gdd_data_buf(
2006       new DataBufferHeap(uncompressedData.data(), uncompressedData.size()));
2007   auto fspec = GetFileSpec().CopyByAppendingPathComponent(
2008       llvm::StringRef("gnu_debugdata"));
2009   m_gnu_debug_data_object_file.reset(new ObjectFileELF(
2010       GetModule(), gdd_data_buf, 0, &fspec, 0, gdd_data_buf->GetByteSize()));
2011 
2012   // This line is essential; otherwise a breakpoint can be set but not hit.
2013   m_gnu_debug_data_object_file->SetType(ObjectFile::eTypeDebugInfo);
2014 
2015   ArchSpec spec = m_gnu_debug_data_object_file->GetArchitecture();
2016   if (spec && m_gnu_debug_data_object_file->SetModulesArchitecture(spec))
2017     return m_gnu_debug_data_object_file;
2018 
2019   return nullptr;
2020 }
2021 
2022 // Find the arm/aarch64 mapping symbol character in the given symbol name.
2023 // Mapping symbols have the form of "$<char>[.<any>]*". Additionally we
2024 // recognize cases when the mapping symbol prefixed by an arbitrary string
2025 // because if a symbol prefix added to each symbol in the object file with
2026 // objcopy then the mapping symbols are also prefixed.
FindArmAarch64MappingSymbol(const char * symbol_name)2027 static char FindArmAarch64MappingSymbol(const char *symbol_name) {
2028   if (!symbol_name)
2029     return '\0';
2030 
2031   const char *dollar_pos = ::strchr(symbol_name, '$');
2032   if (!dollar_pos || dollar_pos[1] == '\0')
2033     return '\0';
2034 
2035   if (dollar_pos[2] == '\0' || dollar_pos[2] == '.')
2036     return dollar_pos[1];
2037   return '\0';
2038 }
2039 
2040 #define STO_MIPS_ISA (3 << 6)
2041 #define STO_MICROMIPS (2 << 6)
2042 #define IS_MICROMIPS(ST_OTHER) (((ST_OTHER)&STO_MIPS_ISA) == STO_MICROMIPS)
2043 
2044 // private
2045 std::pair<unsigned, ObjectFileELF::FileAddressToAddressClassMap>
ParseSymbols(Symtab * symtab,user_id_t start_id,SectionList * section_list,const size_t num_symbols,const DataExtractor & symtab_data,const DataExtractor & strtab_data)2046 ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
2047                             SectionList *section_list, const size_t num_symbols,
2048                             const DataExtractor &symtab_data,
2049                             const DataExtractor &strtab_data) {
2050   ELFSymbol symbol;
2051   lldb::offset_t offset = 0;
2052   // The changes these symbols would make to the class map. We will also update
2053   // m_address_class_map but need to tell the caller what changed because the
2054   // caller may be another object file.
2055   FileAddressToAddressClassMap address_class_map;
2056 
2057   static ConstString text_section_name(".text");
2058   static ConstString init_section_name(".init");
2059   static ConstString fini_section_name(".fini");
2060   static ConstString ctors_section_name(".ctors");
2061   static ConstString dtors_section_name(".dtors");
2062 
2063   static ConstString data_section_name(".data");
2064   static ConstString rodata_section_name(".rodata");
2065   static ConstString rodata1_section_name(".rodata1");
2066   static ConstString data2_section_name(".data1");
2067   static ConstString bss_section_name(".bss");
2068   static ConstString opd_section_name(".opd"); // For ppc64
2069 
2070   // On Android the oatdata and the oatexec symbols in the oat and odex files
2071   // covers the full .text section what causes issues with displaying unusable
2072   // symbol name to the user and very slow unwinding speed because the
2073   // instruction emulation based unwind plans try to emulate all instructions
2074   // in these symbols. Don't add these symbols to the symbol list as they have
2075   // no use for the debugger and they are causing a lot of trouble. Filtering
2076   // can't be restricted to Android because this special object file don't
2077   // contain the note section specifying the environment to Android but the
2078   // custom extension and file name makes it highly unlikely that this will
2079   // collide with anything else.
2080   llvm::StringRef file_extension = m_file.GetFileNameExtension();
2081   bool skip_oatdata_oatexec =
2082       file_extension == ".oat" || file_extension == ".odex";
2083 
2084   ArchSpec arch = GetArchitecture();
2085   ModuleSP module_sp(GetModule());
2086   SectionList *module_section_list =
2087       module_sp ? module_sp->GetSectionList() : nullptr;
2088 
2089   // We might have debug information in a separate object, in which case
2090   // we need to map the sections from that object to the sections in the
2091   // main object during symbol lookup.  If we had to compare the sections
2092   // for every single symbol, that would be expensive, so this map is
2093   // used to accelerate the process.
2094   std::unordered_map<lldb::SectionSP, lldb::SectionSP> section_map;
2095 
2096   unsigned i;
2097   for (i = 0; i < num_symbols; ++i) {
2098     if (!symbol.Parse(symtab_data, &offset))
2099       break;
2100 
2101     const char *symbol_name = strtab_data.PeekCStr(symbol.st_name);
2102     if (!symbol_name)
2103       symbol_name = "";
2104 
2105     // No need to add non-section symbols that have no names
2106     if (symbol.getType() != STT_SECTION &&
2107         (symbol_name == nullptr || symbol_name[0] == '\0'))
2108       continue;
2109 
2110     // Skipping oatdata and oatexec sections if it is requested. See details
2111     // above the definition of skip_oatdata_oatexec for the reasons.
2112     if (skip_oatdata_oatexec && (::strcmp(symbol_name, "oatdata") == 0 ||
2113                                  ::strcmp(symbol_name, "oatexec") == 0))
2114       continue;
2115 
2116     SectionSP symbol_section_sp;
2117     SymbolType symbol_type = eSymbolTypeInvalid;
2118     Elf64_Half shndx = symbol.st_shndx;
2119 
2120     switch (shndx) {
2121     case SHN_ABS:
2122       symbol_type = eSymbolTypeAbsolute;
2123       break;
2124     case SHN_UNDEF:
2125       symbol_type = eSymbolTypeUndefined;
2126       break;
2127     default:
2128       symbol_section_sp = section_list->FindSectionByID(shndx);
2129       break;
2130     }
2131 
2132     // If a symbol is undefined do not process it further even if it has a STT
2133     // type
2134     if (symbol_type != eSymbolTypeUndefined) {
2135       switch (symbol.getType()) {
2136       default:
2137       case STT_NOTYPE:
2138         // The symbol's type is not specified.
2139         break;
2140 
2141       case STT_OBJECT:
2142         // The symbol is associated with a data object, such as a variable, an
2143         // array, etc.
2144         symbol_type = eSymbolTypeData;
2145         break;
2146 
2147       case STT_FUNC:
2148         // The symbol is associated with a function or other executable code.
2149         symbol_type = eSymbolTypeCode;
2150         break;
2151 
2152       case STT_SECTION:
2153         // The symbol is associated with a section. Symbol table entries of
2154         // this type exist primarily for relocation and normally have STB_LOCAL
2155         // binding.
2156         break;
2157 
2158       case STT_FILE:
2159         // Conventionally, the symbol's name gives the name of the source file
2160         // associated with the object file. A file symbol has STB_LOCAL
2161         // binding, its section index is SHN_ABS, and it precedes the other
2162         // STB_LOCAL symbols for the file, if it is present.
2163         symbol_type = eSymbolTypeSourceFile;
2164         break;
2165 
2166       case STT_GNU_IFUNC:
2167         // The symbol is associated with an indirect function. The actual
2168         // function will be resolved if it is referenced.
2169         symbol_type = eSymbolTypeResolver;
2170         break;
2171       }
2172     }
2173 
2174     if (symbol_type == eSymbolTypeInvalid && symbol.getType() != STT_SECTION) {
2175       if (symbol_section_sp) {
2176         ConstString sect_name = symbol_section_sp->GetName();
2177         if (sect_name == text_section_name || sect_name == init_section_name ||
2178             sect_name == fini_section_name || sect_name == ctors_section_name ||
2179             sect_name == dtors_section_name) {
2180           symbol_type = eSymbolTypeCode;
2181         } else if (sect_name == data_section_name ||
2182                    sect_name == data2_section_name ||
2183                    sect_name == rodata_section_name ||
2184                    sect_name == rodata1_section_name ||
2185                    sect_name == bss_section_name) {
2186           symbol_type = eSymbolTypeData;
2187         }
2188       }
2189     }
2190 
2191     int64_t symbol_value_offset = 0;
2192     uint32_t additional_flags = 0;
2193 
2194     if (arch.IsValid()) {
2195       if (arch.GetMachine() == llvm::Triple::arm) {
2196         if (symbol.getBinding() == STB_LOCAL) {
2197           char mapping_symbol = FindArmAarch64MappingSymbol(symbol_name);
2198           if (symbol_type == eSymbolTypeCode) {
2199             switch (mapping_symbol) {
2200             case 'a':
2201               // $a[.<any>]* - marks an ARM instruction sequence
2202               address_class_map[symbol.st_value] = AddressClass::eCode;
2203               break;
2204             case 'b':
2205             case 't':
2206               // $b[.<any>]* - marks a THUMB BL instruction sequence
2207               // $t[.<any>]* - marks a THUMB instruction sequence
2208               address_class_map[symbol.st_value] =
2209                   AddressClass::eCodeAlternateISA;
2210               break;
2211             case 'd':
2212               // $d[.<any>]* - marks a data item sequence (e.g. lit pool)
2213               address_class_map[symbol.st_value] = AddressClass::eData;
2214               break;
2215             }
2216           }
2217           if (mapping_symbol)
2218             continue;
2219         }
2220       } else if (arch.GetMachine() == llvm::Triple::aarch64) {
2221         if (symbol.getBinding() == STB_LOCAL) {
2222           char mapping_symbol = FindArmAarch64MappingSymbol(symbol_name);
2223           if (symbol_type == eSymbolTypeCode) {
2224             switch (mapping_symbol) {
2225             case 'x':
2226               // $x[.<any>]* - marks an A64 instruction sequence
2227               address_class_map[symbol.st_value] = AddressClass::eCode;
2228               break;
2229             case 'd':
2230               // $d[.<any>]* - marks a data item sequence (e.g. lit pool)
2231               address_class_map[symbol.st_value] = AddressClass::eData;
2232               break;
2233             }
2234           }
2235           if (mapping_symbol)
2236             continue;
2237         }
2238       }
2239 
2240       if (arch.GetMachine() == llvm::Triple::arm) {
2241         if (symbol_type == eSymbolTypeCode) {
2242           if (symbol.st_value & 1) {
2243             // Subtracting 1 from the address effectively unsets the low order
2244             // bit, which results in the address actually pointing to the
2245             // beginning of the symbol. This delta will be used below in
2246             // conjunction with symbol.st_value to produce the final
2247             // symbol_value that we store in the symtab.
2248             symbol_value_offset = -1;
2249             address_class_map[symbol.st_value ^ 1] =
2250                 AddressClass::eCodeAlternateISA;
2251           } else {
2252             // This address is ARM
2253             address_class_map[symbol.st_value] = AddressClass::eCode;
2254           }
2255         }
2256       }
2257 
2258       /*
2259        * MIPS:
2260        * The bit #0 of an address is used for ISA mode (1 for microMIPS, 0 for
2261        * MIPS).
2262        * This allows processor to switch between microMIPS and MIPS without any
2263        * need
2264        * for special mode-control register. However, apart from .debug_line,
2265        * none of
2266        * the ELF/DWARF sections set the ISA bit (for symbol or section). Use
2267        * st_other
2268        * flag to check whether the symbol is microMIPS and then set the address
2269        * class
2270        * accordingly.
2271       */
2272       if (arch.IsMIPS()) {
2273         if (IS_MICROMIPS(symbol.st_other))
2274           address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA;
2275         else if ((symbol.st_value & 1) && (symbol_type == eSymbolTypeCode)) {
2276           symbol.st_value = symbol.st_value & (~1ull);
2277           address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA;
2278         } else {
2279           if (symbol_type == eSymbolTypeCode)
2280             address_class_map[symbol.st_value] = AddressClass::eCode;
2281           else if (symbol_type == eSymbolTypeData)
2282             address_class_map[symbol.st_value] = AddressClass::eData;
2283           else
2284             address_class_map[symbol.st_value] = AddressClass::eUnknown;
2285         }
2286       }
2287     }
2288 
2289     // symbol_value_offset may contain 0 for ARM symbols or -1 for THUMB
2290     // symbols. See above for more details.
2291     uint64_t symbol_value = symbol.st_value + symbol_value_offset;
2292 
2293     if (symbol_section_sp &&
2294         CalculateType() != ObjectFile::Type::eTypeObjectFile)
2295       symbol_value -= symbol_section_sp->GetFileAddress();
2296 
2297     if (symbol_section_sp && module_section_list &&
2298         module_section_list != section_list) {
2299       auto section_it = section_map.find(symbol_section_sp);
2300       if (section_it == section_map.end()) {
2301         section_it = section_map
2302                          .emplace(symbol_section_sp,
2303                                   FindMatchingSection(*module_section_list,
2304                                                       symbol_section_sp))
2305                          .first;
2306       }
2307       if (section_it->second)
2308         symbol_section_sp = section_it->second;
2309     }
2310 
2311     bool is_global = symbol.getBinding() == STB_GLOBAL;
2312     uint32_t flags = symbol.st_other << 8 | symbol.st_info | additional_flags;
2313     llvm::StringRef symbol_ref(symbol_name);
2314 
2315     // Symbol names may contain @VERSION suffixes. Find those and strip them
2316     // temporarily.
2317     size_t version_pos = symbol_ref.find('@');
2318     bool has_suffix = version_pos != llvm::StringRef::npos;
2319     llvm::StringRef symbol_bare = symbol_ref.substr(0, version_pos);
2320     Mangled mangled(symbol_bare);
2321 
2322     // Now append the suffix back to mangled and unmangled names. Only do it if
2323     // the demangling was successful (string is not empty).
2324     if (has_suffix) {
2325       llvm::StringRef suffix = symbol_ref.substr(version_pos);
2326 
2327       llvm::StringRef mangled_name = mangled.GetMangledName().GetStringRef();
2328       if (!mangled_name.empty())
2329         mangled.SetMangledName(ConstString((mangled_name + suffix).str()));
2330 
2331       ConstString demangled = mangled.GetDemangledName();
2332       llvm::StringRef demangled_name = demangled.GetStringRef();
2333       if (!demangled_name.empty())
2334         mangled.SetDemangledName(ConstString((demangled_name + suffix).str()));
2335     }
2336 
2337     // In ELF all symbol should have a valid size but it is not true for some
2338     // function symbols coming from hand written assembly. As none of the
2339     // function symbol should have 0 size we try to calculate the size for
2340     // these symbols in the symtab with saying that their original size is not
2341     // valid.
2342     bool symbol_size_valid =
2343         symbol.st_size != 0 || symbol.getType() != STT_FUNC;
2344 
2345     bool is_trampoline = false;
2346     if (arch.IsValid() && (arch.GetMachine() == llvm::Triple::aarch64)) {
2347       // On AArch64, trampolines are registered as code.
2348       // If we detect a trampoline (which starts with __AArch64ADRPThunk_ or
2349       // __AArch64AbsLongThunk_) we register the symbol as a trampoline. This
2350       // way we will be able to detect the trampoline when we step in a function
2351       // and step through the trampoline.
2352       if (symbol_type == eSymbolTypeCode) {
2353         llvm::StringRef trampoline_name = mangled.GetName().GetStringRef();
2354         if (trampoline_name.starts_with("__AArch64ADRPThunk_") ||
2355             trampoline_name.starts_with("__AArch64AbsLongThunk_")) {
2356           symbol_type = eSymbolTypeTrampoline;
2357           is_trampoline = true;
2358         }
2359       }
2360     }
2361 
2362     Symbol dc_symbol(
2363         i + start_id, // ID is the original symbol table index.
2364         mangled,
2365         symbol_type,                    // Type of this symbol
2366         is_global,                      // Is this globally visible?
2367         false,                          // Is this symbol debug info?
2368         is_trampoline,                  // Is this symbol a trampoline?
2369         false,                          // Is this symbol artificial?
2370         AddressRange(symbol_section_sp, // Section in which this symbol is
2371                                         // defined or null.
2372                      symbol_value,      // Offset in section or symbol value.
2373                      symbol.st_size),   // Size in bytes of this symbol.
2374         symbol_size_valid,              // Symbol size is valid
2375         has_suffix,                     // Contains linker annotations?
2376         flags);                         // Symbol flags.
2377     if (symbol.getBinding() == STB_WEAK)
2378       dc_symbol.SetIsWeak(true);
2379     symtab->AddSymbol(dc_symbol);
2380   }
2381 
2382   m_address_class_map.merge(address_class_map);
2383   return {i, address_class_map};
2384 }
2385 
2386 std::pair<unsigned, ObjectFileELF::FileAddressToAddressClassMap>
ParseSymbolTable(Symtab * symbol_table,user_id_t start_id,lldb_private::Section * symtab)2387 ObjectFileELF::ParseSymbolTable(Symtab *symbol_table, user_id_t start_id,
2388                                 lldb_private::Section *symtab) {
2389   if (symtab->GetObjectFile() != this) {
2390     // If the symbol table section is owned by a different object file, have it
2391     // do the parsing.
2392     ObjectFileELF *obj_file_elf =
2393         static_cast<ObjectFileELF *>(symtab->GetObjectFile());
2394     auto [num_symbols, address_class_map] =
2395         obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab);
2396 
2397     // The other object file returned the changes it made to its address
2398     // class map, make the same changes to ours.
2399     m_address_class_map.merge(address_class_map);
2400 
2401     return {num_symbols, address_class_map};
2402   }
2403 
2404   // Get section list for this object file.
2405   SectionList *section_list = m_sections_up.get();
2406   if (!section_list)
2407     return {};
2408 
2409   user_id_t symtab_id = symtab->GetID();
2410   const ELFSectionHeaderInfo *symtab_hdr = GetSectionHeaderByIndex(symtab_id);
2411   assert(symtab_hdr->sh_type == SHT_SYMTAB ||
2412          symtab_hdr->sh_type == SHT_DYNSYM);
2413 
2414   // sh_link: section header index of associated string table.
2415   user_id_t strtab_id = symtab_hdr->sh_link;
2416   Section *strtab = section_list->FindSectionByID(strtab_id).get();
2417 
2418   if (symtab && strtab) {
2419     assert(symtab->GetObjectFile() == this);
2420     assert(strtab->GetObjectFile() == this);
2421 
2422     DataExtractor symtab_data;
2423     DataExtractor strtab_data;
2424     if (ReadSectionData(symtab, symtab_data) &&
2425         ReadSectionData(strtab, strtab_data)) {
2426       size_t num_symbols = symtab_data.GetByteSize() / symtab_hdr->sh_entsize;
2427 
2428       return ParseSymbols(symbol_table, start_id, section_list, num_symbols,
2429                           symtab_data, strtab_data);
2430     }
2431   }
2432 
2433   return {0, {}};
2434 }
2435 
ParseDynamicSymbols()2436 size_t ObjectFileELF::ParseDynamicSymbols() {
2437   if (m_dynamic_symbols.size())
2438     return m_dynamic_symbols.size();
2439 
2440   std::optional<DataExtractor> dynamic_data = GetDynamicData();
2441   if (!dynamic_data)
2442     return 0;
2443 
2444   ELFDynamicWithName e;
2445   lldb::offset_t cursor = 0;
2446   while (e.symbol.Parse(*dynamic_data, &cursor)) {
2447     m_dynamic_symbols.push_back(e);
2448     if (e.symbol.d_tag == DT_NULL)
2449       break;
2450   }
2451   if (std::optional<DataExtractor> dynstr_data = GetDynstrData()) {
2452     for (ELFDynamicWithName &entry : m_dynamic_symbols) {
2453       switch (entry.symbol.d_tag) {
2454       case DT_NEEDED:
2455       case DT_SONAME:
2456       case DT_RPATH:
2457       case DT_RUNPATH:
2458       case DT_AUXILIARY:
2459       case DT_FILTER: {
2460         lldb::offset_t cursor = entry.symbol.d_val;
2461         const char *name = dynstr_data->GetCStr(&cursor);
2462         if (name)
2463           entry.name = std::string(name);
2464         break;
2465       }
2466       default:
2467         break;
2468       }
2469     }
2470   }
2471   return m_dynamic_symbols.size();
2472 }
2473 
FindDynamicSymbol(unsigned tag)2474 const ELFDynamic *ObjectFileELF::FindDynamicSymbol(unsigned tag) {
2475   if (!ParseDynamicSymbols())
2476     return nullptr;
2477   for (const auto &entry : m_dynamic_symbols) {
2478     if (entry.symbol.d_tag == tag)
2479       return &entry.symbol;
2480   }
2481   return nullptr;
2482 }
2483 
PLTRelocationType()2484 unsigned ObjectFileELF::PLTRelocationType() {
2485   // DT_PLTREL
2486   //  This member specifies the type of relocation entry to which the
2487   //  procedure linkage table refers. The d_val member holds DT_REL or
2488   //  DT_RELA, as appropriate. All relocations in a procedure linkage table
2489   //  must use the same relocation.
2490   const ELFDynamic *symbol = FindDynamicSymbol(DT_PLTREL);
2491 
2492   if (symbol)
2493     return symbol->d_val;
2494 
2495   return 0;
2496 }
2497 
2498 // Returns the size of the normal plt entries and the offset of the first
2499 // normal plt entry. The 0th entry in the plt table is usually a resolution
2500 // entry which have different size in some architectures then the rest of the
2501 // plt entries.
2502 static std::pair<uint64_t, uint64_t>
GetPltEntrySizeAndOffset(const ELFSectionHeader * rel_hdr,const ELFSectionHeader * plt_hdr)2503 GetPltEntrySizeAndOffset(const ELFSectionHeader *rel_hdr,
2504                          const ELFSectionHeader *plt_hdr) {
2505   const elf_xword num_relocations = rel_hdr->sh_size / rel_hdr->sh_entsize;
2506 
2507   // Clang 3.3 sets entsize to 4 for 32-bit binaries, but the plt entries are
2508   // 16 bytes. So round the entsize up by the alignment if addralign is set.
2509   elf_xword plt_entsize =
2510       plt_hdr->sh_addralign
2511           ? llvm::alignTo(plt_hdr->sh_entsize, plt_hdr->sh_addralign)
2512           : plt_hdr->sh_entsize;
2513 
2514   // Some linkers e.g ld for arm, fill plt_hdr->sh_entsize field incorrectly.
2515   // PLT entries relocation code in general requires multiple instruction and
2516   // should be greater than 4 bytes in most cases. Try to guess correct size
2517   // just in case.
2518   if (plt_entsize <= 4) {
2519     // The linker haven't set the plt_hdr->sh_entsize field. Try to guess the
2520     // size of the plt entries based on the number of entries and the size of
2521     // the plt section with the assumption that the size of the 0th entry is at
2522     // least as big as the size of the normal entries and it isn't much bigger
2523     // then that.
2524     if (plt_hdr->sh_addralign)
2525       plt_entsize = plt_hdr->sh_size / plt_hdr->sh_addralign /
2526                     (num_relocations + 1) * plt_hdr->sh_addralign;
2527     else
2528       plt_entsize = plt_hdr->sh_size / (num_relocations + 1);
2529   }
2530 
2531   elf_xword plt_offset = plt_hdr->sh_size - num_relocations * plt_entsize;
2532 
2533   return std::make_pair(plt_entsize, plt_offset);
2534 }
2535 
ParsePLTRelocations(Symtab * symbol_table,user_id_t start_id,unsigned rel_type,const ELFHeader * hdr,const ELFSectionHeader * rel_hdr,const ELFSectionHeader * plt_hdr,const ELFSectionHeader * sym_hdr,const lldb::SectionSP & plt_section_sp,DataExtractor & rel_data,DataExtractor & symtab_data,DataExtractor & strtab_data)2536 static unsigned ParsePLTRelocations(
2537     Symtab *symbol_table, user_id_t start_id, unsigned rel_type,
2538     const ELFHeader *hdr, const ELFSectionHeader *rel_hdr,
2539     const ELFSectionHeader *plt_hdr, const ELFSectionHeader *sym_hdr,
2540     const lldb::SectionSP &plt_section_sp, DataExtractor &rel_data,
2541     DataExtractor &symtab_data, DataExtractor &strtab_data) {
2542   ELFRelocation rel(rel_type);
2543   ELFSymbol symbol;
2544   lldb::offset_t offset = 0;
2545 
2546   uint64_t plt_offset, plt_entsize;
2547   std::tie(plt_entsize, plt_offset) =
2548       GetPltEntrySizeAndOffset(rel_hdr, plt_hdr);
2549   const elf_xword num_relocations = rel_hdr->sh_size / rel_hdr->sh_entsize;
2550 
2551   typedef unsigned (*reloc_info_fn)(const ELFRelocation &rel);
2552   reloc_info_fn reloc_type;
2553   reloc_info_fn reloc_symbol;
2554 
2555   if (hdr->Is32Bit()) {
2556     reloc_type = ELFRelocation::RelocType32;
2557     reloc_symbol = ELFRelocation::RelocSymbol32;
2558   } else {
2559     reloc_type = ELFRelocation::RelocType64;
2560     reloc_symbol = ELFRelocation::RelocSymbol64;
2561   }
2562 
2563   unsigned slot_type = hdr->GetRelocationJumpSlotType();
2564   unsigned i;
2565   for (i = 0; i < num_relocations; ++i) {
2566     if (!rel.Parse(rel_data, &offset))
2567       break;
2568 
2569     if (reloc_type(rel) != slot_type)
2570       continue;
2571 
2572     lldb::offset_t symbol_offset = reloc_symbol(rel) * sym_hdr->sh_entsize;
2573     if (!symbol.Parse(symtab_data, &symbol_offset))
2574       break;
2575 
2576     const char *symbol_name = strtab_data.PeekCStr(symbol.st_name);
2577     uint64_t plt_index = plt_offset + i * plt_entsize;
2578 
2579     Symbol jump_symbol(
2580         i + start_id,          // Symbol table index
2581         symbol_name,           // symbol name.
2582         eSymbolTypeTrampoline, // Type of this symbol
2583         false,                 // Is this globally visible?
2584         false,                 // Is this symbol debug info?
2585         true,                  // Is this symbol a trampoline?
2586         true,                  // Is this symbol artificial?
2587         plt_section_sp, // Section in which this symbol is defined or null.
2588         plt_index,      // Offset in section or symbol value.
2589         plt_entsize,    // Size in bytes of this symbol.
2590         true,           // Size is valid
2591         false,          // Contains linker annotations?
2592         0);             // Symbol flags.
2593 
2594     symbol_table->AddSymbol(jump_symbol);
2595   }
2596 
2597   return i;
2598 }
2599 
2600 unsigned
ParseTrampolineSymbols(Symtab * symbol_table,user_id_t start_id,const ELFSectionHeaderInfo * rel_hdr,user_id_t rel_id)2601 ObjectFileELF::ParseTrampolineSymbols(Symtab *symbol_table, user_id_t start_id,
2602                                       const ELFSectionHeaderInfo *rel_hdr,
2603                                       user_id_t rel_id) {
2604   assert(rel_hdr->sh_type == SHT_RELA || rel_hdr->sh_type == SHT_REL);
2605 
2606   // The link field points to the associated symbol table.
2607   user_id_t symtab_id = rel_hdr->sh_link;
2608 
2609   // If the link field doesn't point to the appropriate symbol name table then
2610   // try to find it by name as some compiler don't fill in the link fields.
2611   if (!symtab_id)
2612     symtab_id = GetSectionIndexByName(".dynsym");
2613 
2614   // Get PLT section.  We cannot use rel_hdr->sh_info, since current linkers
2615   // point that to the .got.plt or .got section instead of .plt.
2616   user_id_t plt_id = GetSectionIndexByName(".plt");
2617 
2618   if (!symtab_id || !plt_id)
2619     return 0;
2620 
2621   const ELFSectionHeaderInfo *plt_hdr = GetSectionHeaderByIndex(plt_id);
2622   if (!plt_hdr)
2623     return 0;
2624 
2625   const ELFSectionHeaderInfo *sym_hdr = GetSectionHeaderByIndex(symtab_id);
2626   if (!sym_hdr)
2627     return 0;
2628 
2629   SectionList *section_list = m_sections_up.get();
2630   if (!section_list)
2631     return 0;
2632 
2633   Section *rel_section = section_list->FindSectionByID(rel_id).get();
2634   if (!rel_section)
2635     return 0;
2636 
2637   SectionSP plt_section_sp(section_list->FindSectionByID(plt_id));
2638   if (!plt_section_sp)
2639     return 0;
2640 
2641   Section *symtab = section_list->FindSectionByID(symtab_id).get();
2642   if (!symtab)
2643     return 0;
2644 
2645   // sh_link points to associated string table.
2646   Section *strtab = section_list->FindSectionByID(sym_hdr->sh_link).get();
2647   if (!strtab)
2648     return 0;
2649 
2650   DataExtractor rel_data;
2651   if (!ReadSectionData(rel_section, rel_data))
2652     return 0;
2653 
2654   DataExtractor symtab_data;
2655   if (!ReadSectionData(symtab, symtab_data))
2656     return 0;
2657 
2658   DataExtractor strtab_data;
2659   if (!ReadSectionData(strtab, strtab_data))
2660     return 0;
2661 
2662   unsigned rel_type = PLTRelocationType();
2663   if (!rel_type)
2664     return 0;
2665 
2666   return ParsePLTRelocations(symbol_table, start_id, rel_type, &m_header,
2667                              rel_hdr, plt_hdr, sym_hdr, plt_section_sp,
2668                              rel_data, symtab_data, strtab_data);
2669 }
2670 
ApplyELF64ABS64Relocation(Symtab * symtab,ELFRelocation & rel,DataExtractor & debug_data,Section * rel_section)2671 static void ApplyELF64ABS64Relocation(Symtab *symtab, ELFRelocation &rel,
2672                                       DataExtractor &debug_data,
2673                                       Section *rel_section) {
2674   Symbol *symbol = symtab->FindSymbolByID(ELFRelocation::RelocSymbol64(rel));
2675   if (symbol) {
2676     addr_t value = symbol->GetAddressRef().GetFileAddress();
2677     DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer();
2678     // ObjectFileELF creates a WritableDataBuffer in CreateInstance.
2679     WritableDataBuffer *data_buffer =
2680         llvm::cast<WritableDataBuffer>(data_buffer_sp.get());
2681     uint64_t *dst = reinterpret_cast<uint64_t *>(
2682         data_buffer->GetBytes() + rel_section->GetFileOffset() +
2683         ELFRelocation::RelocOffset64(rel));
2684     uint64_t val_offset = value + ELFRelocation::RelocAddend64(rel);
2685     memcpy(dst, &val_offset, sizeof(uint64_t));
2686   }
2687 }
2688 
ApplyELF64ABS32Relocation(Symtab * symtab,ELFRelocation & rel,DataExtractor & debug_data,Section * rel_section,bool is_signed)2689 static void ApplyELF64ABS32Relocation(Symtab *symtab, ELFRelocation &rel,
2690                                       DataExtractor &debug_data,
2691                                       Section *rel_section, bool is_signed) {
2692   Symbol *symbol = symtab->FindSymbolByID(ELFRelocation::RelocSymbol64(rel));
2693   if (symbol) {
2694     addr_t value = symbol->GetAddressRef().GetFileAddress();
2695     value += ELFRelocation::RelocAddend32(rel);
2696     if ((!is_signed && (value > UINT32_MAX)) ||
2697         (is_signed &&
2698          ((int64_t)value > INT32_MAX || (int64_t)value < INT32_MIN))) {
2699       Log *log = GetLog(LLDBLog::Modules);
2700       LLDB_LOGF(log, "Failed to apply debug info relocations");
2701       return;
2702     }
2703     uint32_t truncated_addr = (value & 0xFFFFFFFF);
2704     DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer();
2705     // ObjectFileELF creates a WritableDataBuffer in CreateInstance.
2706     WritableDataBuffer *data_buffer =
2707         llvm::cast<WritableDataBuffer>(data_buffer_sp.get());
2708     uint32_t *dst = reinterpret_cast<uint32_t *>(
2709         data_buffer->GetBytes() + rel_section->GetFileOffset() +
2710         ELFRelocation::RelocOffset32(rel));
2711     memcpy(dst, &truncated_addr, sizeof(uint32_t));
2712   }
2713 }
2714 
ApplyELF32ABS32RelRelocation(Symtab * symtab,ELFRelocation & rel,DataExtractor & debug_data,Section * rel_section)2715 static void ApplyELF32ABS32RelRelocation(Symtab *symtab, ELFRelocation &rel,
2716                                          DataExtractor &debug_data,
2717                                          Section *rel_section) {
2718   Log *log = GetLog(LLDBLog::Modules);
2719   Symbol *symbol = symtab->FindSymbolByID(ELFRelocation::RelocSymbol32(rel));
2720   if (symbol) {
2721     addr_t value = symbol->GetAddressRef().GetFileAddress();
2722     if (value == LLDB_INVALID_ADDRESS) {
2723       const char *name = symbol->GetName().GetCString();
2724       LLDB_LOGF(log, "Debug info symbol invalid: %s", name);
2725       return;
2726     }
2727     assert(llvm::isUInt<32>(value) && "Valid addresses are 32-bit");
2728     DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer();
2729     // ObjectFileELF creates a WritableDataBuffer in CreateInstance.
2730     WritableDataBuffer *data_buffer =
2731         llvm::cast<WritableDataBuffer>(data_buffer_sp.get());
2732     uint8_t *dst = data_buffer->GetBytes() + rel_section->GetFileOffset() +
2733                    ELFRelocation::RelocOffset32(rel);
2734     // Implicit addend is stored inline as a signed value.
2735     int32_t addend;
2736     memcpy(&addend, dst, sizeof(int32_t));
2737     // The sum must be positive. This extra check prevents UB from overflow in
2738     // the actual range check below.
2739     if (addend < 0 && static_cast<uint32_t>(-addend) > value) {
2740       LLDB_LOGF(log, "Debug info relocation overflow: 0x%" PRIx64,
2741                 static_cast<int64_t>(value) + addend);
2742       return;
2743     }
2744     if (!llvm::isUInt<32>(value + addend)) {
2745       LLDB_LOGF(log, "Debug info relocation out of range: 0x%" PRIx64, value);
2746       return;
2747     }
2748     uint32_t addr = value + addend;
2749     memcpy(dst, &addr, sizeof(uint32_t));
2750   }
2751 }
2752 
ApplyRelocations(Symtab * symtab,const ELFHeader * hdr,const ELFSectionHeader * rel_hdr,const ELFSectionHeader * symtab_hdr,const ELFSectionHeader * debug_hdr,DataExtractor & rel_data,DataExtractor & symtab_data,DataExtractor & debug_data,Section * rel_section)2753 unsigned ObjectFileELF::ApplyRelocations(
2754     Symtab *symtab, const ELFHeader *hdr, const ELFSectionHeader *rel_hdr,
2755     const ELFSectionHeader *symtab_hdr, const ELFSectionHeader *debug_hdr,
2756     DataExtractor &rel_data, DataExtractor &symtab_data,
2757     DataExtractor &debug_data, Section *rel_section) {
2758   ELFRelocation rel(rel_hdr->sh_type);
2759   lldb::addr_t offset = 0;
2760   const unsigned num_relocations = rel_hdr->sh_size / rel_hdr->sh_entsize;
2761   typedef unsigned (*reloc_info_fn)(const ELFRelocation &rel);
2762   reloc_info_fn reloc_type;
2763   reloc_info_fn reloc_symbol;
2764 
2765   if (hdr->Is32Bit()) {
2766     reloc_type = ELFRelocation::RelocType32;
2767     reloc_symbol = ELFRelocation::RelocSymbol32;
2768   } else {
2769     reloc_type = ELFRelocation::RelocType64;
2770     reloc_symbol = ELFRelocation::RelocSymbol64;
2771   }
2772 
2773   for (unsigned i = 0; i < num_relocations; ++i) {
2774     if (!rel.Parse(rel_data, &offset)) {
2775       GetModule()->ReportError(".rel{0}[{1:d}] failed to parse relocation",
2776                                rel_section->GetName().AsCString(), i);
2777       break;
2778     }
2779     Symbol *symbol = nullptr;
2780 
2781     if (hdr->Is32Bit()) {
2782       switch (hdr->e_machine) {
2783       case llvm::ELF::EM_ARM:
2784         switch (reloc_type(rel)) {
2785         case R_ARM_ABS32:
2786           ApplyELF32ABS32RelRelocation(symtab, rel, debug_data, rel_section);
2787           break;
2788         case R_ARM_REL32:
2789           GetModule()->ReportError("unsupported AArch32 relocation:"
2790                                    " .rel{0}[{1}], type {2}",
2791                                    rel_section->GetName().AsCString(), i,
2792                                    reloc_type(rel));
2793           break;
2794         default:
2795           assert(false && "unexpected relocation type");
2796         }
2797         break;
2798       case llvm::ELF::EM_386:
2799         switch (reloc_type(rel)) {
2800         case R_386_32:
2801           symbol = symtab->FindSymbolByID(reloc_symbol(rel));
2802           if (symbol) {
2803             addr_t f_offset =
2804                 rel_section->GetFileOffset() + ELFRelocation::RelocOffset32(rel);
2805             DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer();
2806             // ObjectFileELF creates a WritableDataBuffer in CreateInstance.
2807             WritableDataBuffer *data_buffer =
2808                 llvm::cast<WritableDataBuffer>(data_buffer_sp.get());
2809             uint32_t *dst = reinterpret_cast<uint32_t *>(
2810                 data_buffer->GetBytes() + f_offset);
2811 
2812             addr_t value = symbol->GetAddressRef().GetFileAddress();
2813             if (rel.IsRela()) {
2814               value += ELFRelocation::RelocAddend32(rel);
2815             } else {
2816               value += *dst;
2817             }
2818             *dst = value;
2819           } else {
2820             GetModule()->ReportError(".rel{0}[{1}] unknown symbol id: {2:d}",
2821                                     rel_section->GetName().AsCString(), i,
2822                                     reloc_symbol(rel));
2823           }
2824           break;
2825         case R_386_NONE:
2826         case R_386_PC32:
2827           GetModule()->ReportError("unsupported i386 relocation:"
2828                                    " .rel{0}[{1}], type {2}",
2829                                    rel_section->GetName().AsCString(), i,
2830                                    reloc_type(rel));
2831           break;
2832         default:
2833           assert(false && "unexpected relocation type");
2834           break;
2835         }
2836         break;
2837       default:
2838         GetModule()->ReportError("unsupported 32-bit ELF machine arch: {0}", hdr->e_machine);
2839         break;
2840       }
2841     } else {
2842       switch (hdr->e_machine) {
2843       case llvm::ELF::EM_AARCH64:
2844         switch (reloc_type(rel)) {
2845         case R_AARCH64_ABS64:
2846           ApplyELF64ABS64Relocation(symtab, rel, debug_data, rel_section);
2847           break;
2848         case R_AARCH64_ABS32:
2849           ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, true);
2850           break;
2851         default:
2852           assert(false && "unexpected relocation type");
2853         }
2854         break;
2855       case llvm::ELF::EM_LOONGARCH:
2856         switch (reloc_type(rel)) {
2857         case R_LARCH_64:
2858           ApplyELF64ABS64Relocation(symtab, rel, debug_data, rel_section);
2859           break;
2860         case R_LARCH_32:
2861           ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, true);
2862           break;
2863         default:
2864           assert(false && "unexpected relocation type");
2865         }
2866         break;
2867       case llvm::ELF::EM_X86_64:
2868         switch (reloc_type(rel)) {
2869         case R_X86_64_64:
2870           ApplyELF64ABS64Relocation(symtab, rel, debug_data, rel_section);
2871           break;
2872         case R_X86_64_32:
2873           ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section,
2874                                     false);
2875           break;
2876         case R_X86_64_32S:
2877           ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, true);
2878           break;
2879         case R_X86_64_PC32:
2880         default:
2881           assert(false && "unexpected relocation type");
2882         }
2883         break;
2884       default:
2885         GetModule()->ReportError("unsupported 64-bit ELF machine arch: {0}", hdr->e_machine);
2886         break;
2887       }
2888     }
2889   }
2890 
2891   return 0;
2892 }
2893 
RelocateDebugSections(const ELFSectionHeader * rel_hdr,user_id_t rel_id,lldb_private::Symtab * thetab)2894 unsigned ObjectFileELF::RelocateDebugSections(const ELFSectionHeader *rel_hdr,
2895                                               user_id_t rel_id,
2896                                               lldb_private::Symtab *thetab) {
2897   assert(rel_hdr->sh_type == SHT_RELA || rel_hdr->sh_type == SHT_REL);
2898 
2899   // Parse in the section list if needed.
2900   SectionList *section_list = GetSectionList();
2901   if (!section_list)
2902     return 0;
2903 
2904   user_id_t symtab_id = rel_hdr->sh_link;
2905   user_id_t debug_id = rel_hdr->sh_info;
2906 
2907   const ELFSectionHeader *symtab_hdr = GetSectionHeaderByIndex(symtab_id);
2908   if (!symtab_hdr)
2909     return 0;
2910 
2911   const ELFSectionHeader *debug_hdr = GetSectionHeaderByIndex(debug_id);
2912   if (!debug_hdr)
2913     return 0;
2914 
2915   Section *rel = section_list->FindSectionByID(rel_id).get();
2916   if (!rel)
2917     return 0;
2918 
2919   Section *symtab = section_list->FindSectionByID(symtab_id).get();
2920   if (!symtab)
2921     return 0;
2922 
2923   Section *debug = section_list->FindSectionByID(debug_id).get();
2924   if (!debug)
2925     return 0;
2926 
2927   DataExtractor rel_data;
2928   DataExtractor symtab_data;
2929   DataExtractor debug_data;
2930 
2931   if (GetData(rel->GetFileOffset(), rel->GetFileSize(), rel_data) &&
2932       GetData(symtab->GetFileOffset(), symtab->GetFileSize(), symtab_data) &&
2933       GetData(debug->GetFileOffset(), debug->GetFileSize(), debug_data)) {
2934     ApplyRelocations(thetab, &m_header, rel_hdr, symtab_hdr, debug_hdr,
2935                      rel_data, symtab_data, debug_data, debug);
2936   }
2937 
2938   return 0;
2939 }
2940 
ParseSymtab(Symtab & lldb_symtab)2941 void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
2942   ModuleSP module_sp(GetModule());
2943   if (!module_sp)
2944     return;
2945 
2946   Progress progress("Parsing symbol table",
2947                     m_file.GetFilename().AsCString("<Unknown>"));
2948   ElapsedTime elapsed(module_sp->GetSymtabParseTime());
2949 
2950   // We always want to use the main object file so we (hopefully) only have one
2951   // cached copy of our symtab, dynamic sections, etc.
2952   ObjectFile *module_obj_file = module_sp->GetObjectFile();
2953   if (module_obj_file && module_obj_file != this)
2954     return module_obj_file->ParseSymtab(lldb_symtab);
2955 
2956   SectionList *section_list = module_sp->GetSectionList();
2957   if (!section_list)
2958     return;
2959 
2960   uint64_t symbol_id = 0;
2961 
2962   // Sharable objects and dynamic executables usually have 2 distinct symbol
2963   // tables, one named ".symtab", and the other ".dynsym". The dynsym is a
2964   // smaller version of the symtab that only contains global symbols. The
2965   // information found in the dynsym is therefore also found in the symtab,
2966   // while the reverse is not necessarily true.
2967   Section *symtab =
2968       section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get();
2969   if (symtab) {
2970     auto [num_symbols, address_class_map] =
2971         ParseSymbolTable(&lldb_symtab, symbol_id, symtab);
2972     m_address_class_map.merge(address_class_map);
2973     symbol_id += num_symbols;
2974   }
2975 
2976   // The symtab section is non-allocable and can be stripped, while the
2977   // .dynsym section which should always be always be there. To support the
2978   // minidebuginfo case we parse .dynsym when there's a .gnu_debuginfo
2979   // section, nomatter if .symtab was already parsed or not. This is because
2980   // minidebuginfo normally removes the .symtab symbols which have their
2981   // matching .dynsym counterparts.
2982   if (!symtab ||
2983       GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
2984     Section *dynsym =
2985         section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
2986             .get();
2987     if (dynsym) {
2988       auto [num_symbols, address_class_map] =
2989           ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
2990       symbol_id += num_symbols;
2991       m_address_class_map.merge(address_class_map);
2992     } else {
2993       // Try and read the dynamic symbol table from the .dynamic section.
2994       uint32_t dynamic_num_symbols = 0;
2995       std::optional<DataExtractor> symtab_data =
2996           GetDynsymDataFromDynamic(dynamic_num_symbols);
2997       std::optional<DataExtractor> strtab_data = GetDynstrData();
2998       if (symtab_data && strtab_data) {
2999         auto [num_symbols_parsed, address_class_map] = ParseSymbols(
3000             &lldb_symtab, symbol_id, section_list, dynamic_num_symbols,
3001             symtab_data.value(), strtab_data.value());
3002         symbol_id += num_symbols_parsed;
3003         m_address_class_map.merge(address_class_map);
3004       }
3005     }
3006   }
3007 
3008   // DT_JMPREL
3009   //      If present, this entry's d_ptr member holds the address of
3010   //      relocation
3011   //      entries associated solely with the procedure linkage table.
3012   //      Separating
3013   //      these relocation entries lets the dynamic linker ignore them during
3014   //      process initialization, if lazy binding is enabled. If this entry is
3015   //      present, the related entries of types DT_PLTRELSZ and DT_PLTREL must
3016   //      also be present.
3017   const ELFDynamic *symbol = FindDynamicSymbol(DT_JMPREL);
3018   if (symbol) {
3019     // Synthesize trampoline symbols to help navigate the PLT.
3020     addr_t addr = symbol->d_ptr;
3021     Section *reloc_section =
3022         section_list->FindSectionContainingFileAddress(addr).get();
3023     if (reloc_section) {
3024       user_id_t reloc_id = reloc_section->GetID();
3025       const ELFSectionHeaderInfo *reloc_header =
3026           GetSectionHeaderByIndex(reloc_id);
3027       if (reloc_header)
3028         ParseTrampolineSymbols(&lldb_symtab, symbol_id, reloc_header, reloc_id);
3029     }
3030   }
3031 
3032   if (DWARFCallFrameInfo *eh_frame =
3033           GetModule()->GetUnwindTable().GetEHFrameInfo()) {
3034     ParseUnwindSymbols(&lldb_symtab, eh_frame);
3035   }
3036 
3037   // In the event that there's no symbol entry for the entry point we'll
3038   // artificially create one. We delegate to the symtab object the figuring
3039   // out of the proper size, this will usually make it span til the next
3040   // symbol it finds in the section. This means that if there are missing
3041   // symbols the entry point might span beyond its function definition.
3042   // We're fine with this as it doesn't make it worse than not having a
3043   // symbol entry at all.
3044   if (CalculateType() == eTypeExecutable) {
3045     ArchSpec arch = GetArchitecture();
3046     auto entry_point_addr = GetEntryPointAddress();
3047     bool is_valid_entry_point =
3048         entry_point_addr.IsValid() && entry_point_addr.IsSectionOffset();
3049     addr_t entry_point_file_addr = entry_point_addr.GetFileAddress();
3050     if (is_valid_entry_point && !lldb_symtab.FindSymbolContainingFileAddress(
3051                                     entry_point_file_addr)) {
3052       uint64_t symbol_id = lldb_symtab.GetNumSymbols();
3053       // Don't set the name for any synthetic symbols, the Symbol
3054       // object will generate one if needed when the name is accessed
3055       // via accessors.
3056       SectionSP section_sp = entry_point_addr.GetSection();
3057       Symbol symbol(
3058           /*symID=*/symbol_id,
3059           /*name=*/llvm::StringRef(), // Name will be auto generated.
3060           /*type=*/eSymbolTypeCode,
3061           /*external=*/true,
3062           /*is_debug=*/false,
3063           /*is_trampoline=*/false,
3064           /*is_artificial=*/true,
3065           /*section_sp=*/section_sp,
3066           /*offset=*/0,
3067           /*size=*/0, // FDE can span multiple symbols so don't use its size.
3068           /*size_is_valid=*/false,
3069           /*contains_linker_annotations=*/false,
3070           /*flags=*/0);
3071       // When the entry point is arm thumb we need to explicitly set its
3072       // class address to reflect that. This is important because expression
3073       // evaluation relies on correctly setting a breakpoint at this
3074       // address.
3075       if (arch.GetMachine() == llvm::Triple::arm &&
3076           (entry_point_file_addr & 1)) {
3077         symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1);
3078         m_address_class_map[entry_point_file_addr ^ 1] =
3079             AddressClass::eCodeAlternateISA;
3080       } else {
3081         m_address_class_map[entry_point_file_addr] = AddressClass::eCode;
3082       }
3083       lldb_symtab.AddSymbol(symbol);
3084     }
3085   }
3086 }
3087 
RelocateSection(lldb_private::Section * section)3088 void ObjectFileELF::RelocateSection(lldb_private::Section *section)
3089 {
3090   static const char *debug_prefix = ".debug";
3091 
3092   // Set relocated bit so we stop getting called, regardless of whether we
3093   // actually relocate.
3094   section->SetIsRelocated(true);
3095 
3096   // We only relocate in ELF relocatable files
3097   if (CalculateType() != eTypeObjectFile)
3098     return;
3099 
3100   const char *section_name = section->GetName().GetCString();
3101   // Can't relocate that which can't be named
3102   if (section_name == nullptr)
3103     return;
3104 
3105   // We don't relocate non-debug sections at the moment
3106   if (strncmp(section_name, debug_prefix, strlen(debug_prefix)))
3107     return;
3108 
3109   // Relocation section names to look for
3110   std::string needle = std::string(".rel") + section_name;
3111   std::string needlea = std::string(".rela") + section_name;
3112 
3113   for (SectionHeaderCollIter I = m_section_headers.begin();
3114        I != m_section_headers.end(); ++I) {
3115     if (I->sh_type == SHT_RELA || I->sh_type == SHT_REL) {
3116       const char *hay_name = I->section_name.GetCString();
3117       if (hay_name == nullptr)
3118         continue;
3119       if (needle == hay_name || needlea == hay_name) {
3120         const ELFSectionHeader &reloc_header = *I;
3121         user_id_t reloc_id = SectionIndex(I);
3122         RelocateDebugSections(&reloc_header, reloc_id, GetSymtab());
3123         break;
3124       }
3125     }
3126   }
3127 }
3128 
ParseUnwindSymbols(Symtab * symbol_table,DWARFCallFrameInfo * eh_frame)3129 void ObjectFileELF::ParseUnwindSymbols(Symtab *symbol_table,
3130                                        DWARFCallFrameInfo *eh_frame) {
3131   SectionList *section_list = GetSectionList();
3132   if (!section_list)
3133     return;
3134 
3135   // First we save the new symbols into a separate list and add them to the
3136   // symbol table after we collected all symbols we want to add. This is
3137   // neccessary because adding a new symbol invalidates the internal index of
3138   // the symtab what causing the next lookup to be slow because it have to
3139   // recalculate the index first.
3140   std::vector<Symbol> new_symbols;
3141 
3142   size_t num_symbols = symbol_table->GetNumSymbols();
3143   uint64_t last_symbol_id =
3144       num_symbols ? symbol_table->SymbolAtIndex(num_symbols - 1)->GetID() : 0;
3145   eh_frame->ForEachFDEEntries([&](lldb::addr_t file_addr, uint32_t size,
3146                                   dw_offset_t) {
3147     Symbol *symbol = symbol_table->FindSymbolAtFileAddress(file_addr);
3148     if (symbol) {
3149       if (!symbol->GetByteSizeIsValid()) {
3150         symbol->SetByteSize(size);
3151         symbol->SetSizeIsSynthesized(true);
3152       }
3153     } else {
3154       SectionSP section_sp =
3155           section_list->FindSectionContainingFileAddress(file_addr);
3156       if (section_sp) {
3157         addr_t offset = file_addr - section_sp->GetFileAddress();
3158         uint64_t symbol_id = ++last_symbol_id;
3159         // Don't set the name for any synthetic symbols, the Symbol
3160         // object will generate one if needed when the name is accessed
3161         // via accessors.
3162         Symbol eh_symbol(
3163             /*symID=*/symbol_id,
3164             /*name=*/llvm::StringRef(), // Name will be auto generated.
3165             /*type=*/eSymbolTypeCode,
3166             /*external=*/true,
3167             /*is_debug=*/false,
3168             /*is_trampoline=*/false,
3169             /*is_artificial=*/true,
3170             /*section_sp=*/section_sp,
3171             /*offset=*/offset,
3172             /*size=*/0, // FDE can span multiple symbols so don't use its size.
3173             /*size_is_valid=*/false,
3174             /*contains_linker_annotations=*/false,
3175             /*flags=*/0);
3176         new_symbols.push_back(eh_symbol);
3177       }
3178     }
3179     return true;
3180   });
3181 
3182   for (const Symbol &s : new_symbols)
3183     symbol_table->AddSymbol(s);
3184 }
3185 
IsStripped()3186 bool ObjectFileELF::IsStripped() {
3187   // TODO: determine this for ELF
3188   return false;
3189 }
3190 
3191 //===----------------------------------------------------------------------===//
3192 // Dump
3193 //
3194 // Dump the specifics of the runtime file container (such as any headers
3195 // segments, sections, etc).
Dump(Stream * s)3196 void ObjectFileELF::Dump(Stream *s) {
3197   ModuleSP module_sp(GetModule());
3198   if (!module_sp) {
3199     return;
3200   }
3201 
3202   std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
3203   s->Printf("%p: ", static_cast<void *>(this));
3204   s->Indent();
3205   s->PutCString("ObjectFileELF");
3206 
3207   ArchSpec header_arch = GetArchitecture();
3208 
3209   *s << ", file = '" << m_file
3210      << "', arch = " << header_arch.GetArchitectureName();
3211   if (m_memory_addr != LLDB_INVALID_ADDRESS)
3212     s->Printf(", addr = %#16.16" PRIx64, m_memory_addr);
3213   s->EOL();
3214 
3215   DumpELFHeader(s, m_header);
3216   s->EOL();
3217   DumpELFProgramHeaders(s);
3218   s->EOL();
3219   DumpELFSectionHeaders(s);
3220   s->EOL();
3221   SectionList *section_list = GetSectionList();
3222   if (section_list)
3223     section_list->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
3224                        UINT32_MAX);
3225   Symtab *symtab = GetSymtab();
3226   if (symtab)
3227     symtab->Dump(s, nullptr, eSortOrderNone);
3228   s->EOL();
3229   DumpDependentModules(s);
3230   s->EOL();
3231   DumpELFDynamic(s);
3232   s->EOL();
3233   Address image_info_addr = GetImageInfoAddress(nullptr);
3234   if (image_info_addr.IsValid())
3235     s->Printf("image_info_address = %#16.16" PRIx64 "\n",
3236               image_info_addr.GetFileAddress());
3237 }
3238 
3239 // DumpELFHeader
3240 //
3241 // Dump the ELF header to the specified output stream
DumpELFHeader(Stream * s,const ELFHeader & header)3242 void ObjectFileELF::DumpELFHeader(Stream *s, const ELFHeader &header) {
3243   s->PutCString("ELF Header\n");
3244   s->Printf("e_ident[EI_MAG0   ] = 0x%2.2x\n", header.e_ident[EI_MAG0]);
3245   s->Printf("e_ident[EI_MAG1   ] = 0x%2.2x '%c'\n", header.e_ident[EI_MAG1],
3246             header.e_ident[EI_MAG1]);
3247   s->Printf("e_ident[EI_MAG2   ] = 0x%2.2x '%c'\n", header.e_ident[EI_MAG2],
3248             header.e_ident[EI_MAG2]);
3249   s->Printf("e_ident[EI_MAG3   ] = 0x%2.2x '%c'\n", header.e_ident[EI_MAG3],
3250             header.e_ident[EI_MAG3]);
3251 
3252   s->Printf("e_ident[EI_CLASS  ] = 0x%2.2x\n", header.e_ident[EI_CLASS]);
3253   s->Printf("e_ident[EI_DATA   ] = 0x%2.2x ", header.e_ident[EI_DATA]);
3254   DumpELFHeader_e_ident_EI_DATA(s, header.e_ident[EI_DATA]);
3255   s->Printf("\ne_ident[EI_VERSION] = 0x%2.2x\n", header.e_ident[EI_VERSION]);
3256   s->Printf("e_ident[EI_PAD    ] = 0x%2.2x\n", header.e_ident[EI_PAD]);
3257 
3258   s->Printf("e_type      = 0x%4.4x ", header.e_type);
3259   DumpELFHeader_e_type(s, header.e_type);
3260   s->Printf("\ne_machine   = 0x%4.4x\n", header.e_machine);
3261   s->Printf("e_version   = 0x%8.8x\n", header.e_version);
3262   s->Printf("e_entry     = 0x%8.8" PRIx64 "\n", header.e_entry);
3263   s->Printf("e_phoff     = 0x%8.8" PRIx64 "\n", header.e_phoff);
3264   s->Printf("e_shoff     = 0x%8.8" PRIx64 "\n", header.e_shoff);
3265   s->Printf("e_flags     = 0x%8.8x\n", header.e_flags);
3266   s->Printf("e_ehsize    = 0x%4.4x\n", header.e_ehsize);
3267   s->Printf("e_phentsize = 0x%4.4x\n", header.e_phentsize);
3268   s->Printf("e_phnum     = 0x%8.8x\n", header.e_phnum);
3269   s->Printf("e_shentsize = 0x%4.4x\n", header.e_shentsize);
3270   s->Printf("e_shnum     = 0x%8.8x\n", header.e_shnum);
3271   s->Printf("e_shstrndx  = 0x%8.8x\n", header.e_shstrndx);
3272 }
3273 
3274 // DumpELFHeader_e_type
3275 //
3276 // Dump an token value for the ELF header member e_type
DumpELFHeader_e_type(Stream * s,elf_half e_type)3277 void ObjectFileELF::DumpELFHeader_e_type(Stream *s, elf_half e_type) {
3278   switch (e_type) {
3279   case ET_NONE:
3280     *s << "ET_NONE";
3281     break;
3282   case ET_REL:
3283     *s << "ET_REL";
3284     break;
3285   case ET_EXEC:
3286     *s << "ET_EXEC";
3287     break;
3288   case ET_DYN:
3289     *s << "ET_DYN";
3290     break;
3291   case ET_CORE:
3292     *s << "ET_CORE";
3293     break;
3294   default:
3295     break;
3296   }
3297 }
3298 
3299 // DumpELFHeader_e_ident_EI_DATA
3300 //
3301 // Dump an token value for the ELF header member e_ident[EI_DATA]
DumpELFHeader_e_ident_EI_DATA(Stream * s,unsigned char ei_data)3302 void ObjectFileELF::DumpELFHeader_e_ident_EI_DATA(Stream *s,
3303                                                   unsigned char ei_data) {
3304   switch (ei_data) {
3305   case ELFDATANONE:
3306     *s << "ELFDATANONE";
3307     break;
3308   case ELFDATA2LSB:
3309     *s << "ELFDATA2LSB - Little Endian";
3310     break;
3311   case ELFDATA2MSB:
3312     *s << "ELFDATA2MSB - Big Endian";
3313     break;
3314   default:
3315     break;
3316   }
3317 }
3318 
3319 // DumpELFProgramHeader
3320 //
3321 // Dump a single ELF program header to the specified output stream
DumpELFProgramHeader(Stream * s,const ELFProgramHeader & ph)3322 void ObjectFileELF::DumpELFProgramHeader(Stream *s,
3323                                          const ELFProgramHeader &ph) {
3324   DumpELFProgramHeader_p_type(s, ph.p_type);
3325   s->Printf(" %8.8" PRIx64 " %8.8" PRIx64 " %8.8" PRIx64, ph.p_offset,
3326             ph.p_vaddr, ph.p_paddr);
3327   s->Printf(" %8.8" PRIx64 " %8.8" PRIx64 " %8.8x (", ph.p_filesz, ph.p_memsz,
3328             ph.p_flags);
3329 
3330   DumpELFProgramHeader_p_flags(s, ph.p_flags);
3331   s->Printf(") %8.8" PRIx64, ph.p_align);
3332 }
3333 
3334 // DumpELFProgramHeader_p_type
3335 //
3336 // Dump an token value for the ELF program header member p_type which describes
3337 // the type of the program header
DumpELFProgramHeader_p_type(Stream * s,elf_word p_type)3338 void ObjectFileELF::DumpELFProgramHeader_p_type(Stream *s, elf_word p_type) {
3339   const int kStrWidth = 15;
3340   switch (p_type) {
3341     CASE_AND_STREAM(s, PT_NULL, kStrWidth);
3342     CASE_AND_STREAM(s, PT_LOAD, kStrWidth);
3343     CASE_AND_STREAM(s, PT_DYNAMIC, kStrWidth);
3344     CASE_AND_STREAM(s, PT_INTERP, kStrWidth);
3345     CASE_AND_STREAM(s, PT_NOTE, kStrWidth);
3346     CASE_AND_STREAM(s, PT_SHLIB, kStrWidth);
3347     CASE_AND_STREAM(s, PT_PHDR, kStrWidth);
3348     CASE_AND_STREAM(s, PT_TLS, kStrWidth);
3349     CASE_AND_STREAM(s, PT_GNU_EH_FRAME, kStrWidth);
3350   default:
3351     s->Printf("0x%8.8x%*s", p_type, kStrWidth - 10, "");
3352     break;
3353   }
3354 }
3355 
3356 // DumpELFProgramHeader_p_flags
3357 //
3358 // Dump an token value for the ELF program header member p_flags
DumpELFProgramHeader_p_flags(Stream * s,elf_word p_flags)3359 void ObjectFileELF::DumpELFProgramHeader_p_flags(Stream *s, elf_word p_flags) {
3360   *s << ((p_flags & PF_X) ? "PF_X" : "    ")
3361      << (((p_flags & PF_X) && (p_flags & PF_W)) ? '+' : ' ')
3362      << ((p_flags & PF_W) ? "PF_W" : "    ")
3363      << (((p_flags & PF_W) && (p_flags & PF_R)) ? '+' : ' ')
3364      << ((p_flags & PF_R) ? "PF_R" : "    ");
3365 }
3366 
3367 // DumpELFProgramHeaders
3368 //
3369 // Dump all of the ELF program header to the specified output stream
DumpELFProgramHeaders(Stream * s)3370 void ObjectFileELF::DumpELFProgramHeaders(Stream *s) {
3371   if (!ParseProgramHeaders())
3372     return;
3373 
3374   s->PutCString("Program Headers\n");
3375   s->PutCString("IDX  p_type          p_offset p_vaddr  p_paddr  "
3376                 "p_filesz p_memsz  p_flags                   p_align\n");
3377   s->PutCString("==== --------------- -------- -------- -------- "
3378                 "-------- -------- ------------------------- --------\n");
3379 
3380   for (const auto &H : llvm::enumerate(m_program_headers)) {
3381     s->Format("[{0,2}] ", H.index());
3382     ObjectFileELF::DumpELFProgramHeader(s, H.value());
3383     s->EOL();
3384   }
3385 }
3386 
3387 // DumpELFSectionHeader
3388 //
3389 // Dump a single ELF section header to the specified output stream
DumpELFSectionHeader(Stream * s,const ELFSectionHeaderInfo & sh)3390 void ObjectFileELF::DumpELFSectionHeader(Stream *s,
3391                                          const ELFSectionHeaderInfo &sh) {
3392   s->Printf("%8.8x ", sh.sh_name);
3393   DumpELFSectionHeader_sh_type(s, sh.sh_type);
3394   s->Printf(" %8.8" PRIx64 " (", sh.sh_flags);
3395   DumpELFSectionHeader_sh_flags(s, sh.sh_flags);
3396   s->Printf(") %8.8" PRIx64 " %8.8" PRIx64 " %8.8" PRIx64, sh.sh_addr,
3397             sh.sh_offset, sh.sh_size);
3398   s->Printf(" %8.8x %8.8x", sh.sh_link, sh.sh_info);
3399   s->Printf(" %8.8" PRIx64 " %8.8" PRIx64, sh.sh_addralign, sh.sh_entsize);
3400 }
3401 
3402 // DumpELFSectionHeader_sh_type
3403 //
3404 // Dump an token value for the ELF section header member sh_type which
3405 // describes the type of the section
DumpELFSectionHeader_sh_type(Stream * s,elf_word sh_type)3406 void ObjectFileELF::DumpELFSectionHeader_sh_type(Stream *s, elf_word sh_type) {
3407   const int kStrWidth = 12;
3408   switch (sh_type) {
3409     CASE_AND_STREAM(s, SHT_NULL, kStrWidth);
3410     CASE_AND_STREAM(s, SHT_PROGBITS, kStrWidth);
3411     CASE_AND_STREAM(s, SHT_SYMTAB, kStrWidth);
3412     CASE_AND_STREAM(s, SHT_STRTAB, kStrWidth);
3413     CASE_AND_STREAM(s, SHT_RELA, kStrWidth);
3414     CASE_AND_STREAM(s, SHT_HASH, kStrWidth);
3415     CASE_AND_STREAM(s, SHT_DYNAMIC, kStrWidth);
3416     CASE_AND_STREAM(s, SHT_NOTE, kStrWidth);
3417     CASE_AND_STREAM(s, SHT_NOBITS, kStrWidth);
3418     CASE_AND_STREAM(s, SHT_REL, kStrWidth);
3419     CASE_AND_STREAM(s, SHT_SHLIB, kStrWidth);
3420     CASE_AND_STREAM(s, SHT_DYNSYM, kStrWidth);
3421     CASE_AND_STREAM(s, SHT_LOPROC, kStrWidth);
3422     CASE_AND_STREAM(s, SHT_HIPROC, kStrWidth);
3423     CASE_AND_STREAM(s, SHT_LOUSER, kStrWidth);
3424     CASE_AND_STREAM(s, SHT_HIUSER, kStrWidth);
3425   default:
3426     s->Printf("0x%8.8x%*s", sh_type, kStrWidth - 10, "");
3427     break;
3428   }
3429 }
3430 
3431 // DumpELFSectionHeader_sh_flags
3432 //
3433 // Dump an token value for the ELF section header member sh_flags
DumpELFSectionHeader_sh_flags(Stream * s,elf_xword sh_flags)3434 void ObjectFileELF::DumpELFSectionHeader_sh_flags(Stream *s,
3435                                                   elf_xword sh_flags) {
3436   *s << ((sh_flags & SHF_WRITE) ? "WRITE" : "     ")
3437      << (((sh_flags & SHF_WRITE) && (sh_flags & SHF_ALLOC)) ? '+' : ' ')
3438      << ((sh_flags & SHF_ALLOC) ? "ALLOC" : "     ")
3439      << (((sh_flags & SHF_ALLOC) && (sh_flags & SHF_EXECINSTR)) ? '+' : ' ')
3440      << ((sh_flags & SHF_EXECINSTR) ? "EXECINSTR" : "         ");
3441 }
3442 
3443 // DumpELFSectionHeaders
3444 //
3445 // Dump all of the ELF section header to the specified output stream
DumpELFSectionHeaders(Stream * s)3446 void ObjectFileELF::DumpELFSectionHeaders(Stream *s) {
3447   if (!ParseSectionHeaders())
3448     return;
3449 
3450   s->PutCString("Section Headers\n");
3451   s->PutCString("IDX  name     type         flags                            "
3452                 "addr     offset   size     link     info     addralgn "
3453                 "entsize  Name\n");
3454   s->PutCString("==== -------- ------------ -------------------------------- "
3455                 "-------- -------- -------- -------- -------- -------- "
3456                 "-------- ====================\n");
3457 
3458   uint32_t idx = 0;
3459   for (SectionHeaderCollConstIter I = m_section_headers.begin();
3460        I != m_section_headers.end(); ++I, ++idx) {
3461     s->Printf("[%2u] ", idx);
3462     ObjectFileELF::DumpELFSectionHeader(s, *I);
3463     const char *section_name = I->section_name.AsCString("");
3464     if (section_name)
3465       *s << ' ' << section_name << "\n";
3466   }
3467 }
3468 
DumpDependentModules(lldb_private::Stream * s)3469 void ObjectFileELF::DumpDependentModules(lldb_private::Stream *s) {
3470   size_t num_modules = ParseDependentModules();
3471 
3472   if (num_modules > 0) {
3473     s->PutCString("Dependent Modules:\n");
3474     for (unsigned i = 0; i < num_modules; ++i) {
3475       const FileSpec &spec = m_filespec_up->GetFileSpecAtIndex(i);
3476       s->Printf("   %s\n", spec.GetFilename().GetCString());
3477     }
3478   }
3479 }
3480 
getDynamicTagAsString(uint16_t Arch,uint64_t Type)3481 std::string static getDynamicTagAsString(uint16_t Arch, uint64_t Type) {
3482 #define DYNAMIC_STRINGIFY_ENUM(tag, value)                                     \
3483   case value:                                                                  \
3484     return #tag;
3485 
3486 #define DYNAMIC_TAG(n, v)
3487   switch (Arch) {
3488   case llvm::ELF::EM_AARCH64:
3489     switch (Type) {
3490 #define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
3491 #include "llvm/BinaryFormat/DynamicTags.def"
3492 #undef AARCH64_DYNAMIC_TAG
3493     }
3494     break;
3495 
3496   case llvm::ELF::EM_HEXAGON:
3497     switch (Type) {
3498 #define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
3499 #include "llvm/BinaryFormat/DynamicTags.def"
3500 #undef HEXAGON_DYNAMIC_TAG
3501     }
3502     break;
3503 
3504   case llvm::ELF::EM_MIPS:
3505     switch (Type) {
3506 #define MIPS_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
3507 #include "llvm/BinaryFormat/DynamicTags.def"
3508 #undef MIPS_DYNAMIC_TAG
3509     }
3510     break;
3511 
3512   case llvm::ELF::EM_PPC:
3513     switch (Type) {
3514 #define PPC_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
3515 #include "llvm/BinaryFormat/DynamicTags.def"
3516 #undef PPC_DYNAMIC_TAG
3517     }
3518     break;
3519 
3520   case llvm::ELF::EM_PPC64:
3521     switch (Type) {
3522 #define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
3523 #include "llvm/BinaryFormat/DynamicTags.def"
3524 #undef PPC64_DYNAMIC_TAG
3525     }
3526     break;
3527 
3528   case llvm::ELF::EM_RISCV:
3529     switch (Type) {
3530 #define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
3531 #include "llvm/BinaryFormat/DynamicTags.def"
3532 #undef RISCV_DYNAMIC_TAG
3533     }
3534     break;
3535   }
3536 #undef DYNAMIC_TAG
3537   switch (Type) {
3538 // Now handle all dynamic tags except the architecture specific ones
3539 #define AARCH64_DYNAMIC_TAG(name, value)
3540 #define MIPS_DYNAMIC_TAG(name, value)
3541 #define HEXAGON_DYNAMIC_TAG(name, value)
3542 #define PPC_DYNAMIC_TAG(name, value)
3543 #define PPC64_DYNAMIC_TAG(name, value)
3544 #define RISCV_DYNAMIC_TAG(name, value)
3545 // Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
3546 #define DYNAMIC_TAG_MARKER(name, value)
3547 #define DYNAMIC_TAG(name, value)                                               \
3548   case value:                                                                  \
3549     return #name;
3550 #include "llvm/BinaryFormat/DynamicTags.def"
3551 #undef DYNAMIC_TAG
3552 #undef AARCH64_DYNAMIC_TAG
3553 #undef MIPS_DYNAMIC_TAG
3554 #undef HEXAGON_DYNAMIC_TAG
3555 #undef PPC_DYNAMIC_TAG
3556 #undef PPC64_DYNAMIC_TAG
3557 #undef RISCV_DYNAMIC_TAG
3558 #undef DYNAMIC_TAG_MARKER
3559 #undef DYNAMIC_STRINGIFY_ENUM
3560   default:
3561     return "<unknown:>0x" + llvm::utohexstr(Type, true);
3562   }
3563 }
3564 
DumpELFDynamic(lldb_private::Stream * s)3565 void ObjectFileELF::DumpELFDynamic(lldb_private::Stream *s) {
3566   ParseDynamicSymbols();
3567   if (m_dynamic_symbols.empty())
3568     return;
3569 
3570   s->PutCString(".dynamic:\n");
3571   s->PutCString("IDX  d_tag            d_val/d_ptr\n");
3572   s->PutCString("==== ---------------- ------------------\n");
3573   uint32_t idx = 0;
3574   for (const auto &entry : m_dynamic_symbols) {
3575     s->Printf("[%2u] ", idx++);
3576     s->Printf(
3577         "%-16s 0x%16.16" PRIx64,
3578         getDynamicTagAsString(m_header.e_machine, entry.symbol.d_tag).c_str(),
3579         entry.symbol.d_ptr);
3580     if (!entry.name.empty())
3581       s->Printf(" \"%s\"", entry.name.c_str());
3582     s->EOL();
3583   }
3584 }
3585 
GetArchitecture()3586 ArchSpec ObjectFileELF::GetArchitecture() {
3587   if (!ParseHeader())
3588     return ArchSpec();
3589 
3590   if (m_section_headers.empty()) {
3591     // Allow elf notes to be parsed which may affect the detected architecture.
3592     ParseSectionHeaders();
3593   }
3594 
3595   if (CalculateType() == eTypeCoreFile &&
3596       !m_arch_spec.TripleOSWasSpecified()) {
3597     // Core files don't have section headers yet they have PT_NOTE program
3598     // headers that might shed more light on the architecture
3599     for (const elf::ELFProgramHeader &H : ProgramHeaders()) {
3600       if (H.p_type != PT_NOTE || H.p_offset == 0 || H.p_filesz == 0)
3601         continue;
3602       DataExtractor data;
3603       if (data.SetData(m_data, H.p_offset, H.p_filesz) == H.p_filesz) {
3604         UUID uuid;
3605         RefineModuleDetailsFromNote(data, m_arch_spec, uuid);
3606       }
3607     }
3608   }
3609   return m_arch_spec;
3610 }
3611 
CalculateType()3612 ObjectFile::Type ObjectFileELF::CalculateType() {
3613   switch (m_header.e_type) {
3614   case llvm::ELF::ET_NONE:
3615     // 0 - No file type
3616     return eTypeUnknown;
3617 
3618   case llvm::ELF::ET_REL:
3619     // 1 - Relocatable file
3620     return eTypeObjectFile;
3621 
3622   case llvm::ELF::ET_EXEC:
3623     // 2 - Executable file
3624     return eTypeExecutable;
3625 
3626   case llvm::ELF::ET_DYN:
3627     // 3 - Shared object file
3628     return eTypeSharedLibrary;
3629 
3630   case ET_CORE:
3631     // 4 - Core file
3632     return eTypeCoreFile;
3633 
3634   default:
3635     break;
3636   }
3637   return eTypeUnknown;
3638 }
3639 
CalculateStrata()3640 ObjectFile::Strata ObjectFileELF::CalculateStrata() {
3641   switch (m_header.e_type) {
3642   case llvm::ELF::ET_NONE:
3643     // 0 - No file type
3644     return eStrataUnknown;
3645 
3646   case llvm::ELF::ET_REL:
3647     // 1 - Relocatable file
3648     return eStrataUnknown;
3649 
3650   case llvm::ELF::ET_EXEC:
3651     // 2 - Executable file
3652     {
3653       SectionList *section_list = GetSectionList();
3654       if (section_list) {
3655         static ConstString loader_section_name(".interp");
3656         SectionSP loader_section =
3657             section_list->FindSectionByName(loader_section_name);
3658         if (loader_section) {
3659           char buffer[256];
3660           size_t read_size =
3661               ReadSectionData(loader_section.get(), 0, buffer, sizeof(buffer));
3662 
3663           // We compare the content of .interp section
3664           // It will contains \0 when counting read_size, so the size needs to
3665           // decrease by one
3666           llvm::StringRef loader_name(buffer, read_size - 1);
3667           llvm::StringRef freebsd_kernel_loader_name("/red/herring");
3668           if (loader_name == freebsd_kernel_loader_name)
3669             return eStrataKernel;
3670         }
3671       }
3672       return eStrataUser;
3673     }
3674 
3675   case llvm::ELF::ET_DYN:
3676     // 3 - Shared object file
3677     // TODO: is there any way to detect that an shared library is a kernel
3678     // related executable by inspecting the program headers, section headers,
3679     // symbols, or any other flag bits???
3680     return eStrataUnknown;
3681 
3682   case ET_CORE:
3683     // 4 - Core file
3684     // TODO: is there any way to detect that an core file is a kernel
3685     // related executable by inspecting the program headers, section headers,
3686     // symbols, or any other flag bits???
3687     return eStrataUnknown;
3688 
3689   default:
3690     break;
3691   }
3692   return eStrataUnknown;
3693 }
3694 
ReadSectionData(Section * section,lldb::offset_t section_offset,void * dst,size_t dst_len)3695 size_t ObjectFileELF::ReadSectionData(Section *section,
3696                        lldb::offset_t section_offset, void *dst,
3697                        size_t dst_len) {
3698   // If some other objectfile owns this data, pass this to them.
3699   if (section->GetObjectFile() != this)
3700     return section->GetObjectFile()->ReadSectionData(section, section_offset,
3701                                                      dst, dst_len);
3702 
3703   if (!section->Test(SHF_COMPRESSED))
3704     return ObjectFile::ReadSectionData(section, section_offset, dst, dst_len);
3705 
3706   // For compressed sections we need to read to full data to be able to
3707   // decompress.
3708   DataExtractor data;
3709   ReadSectionData(section, data);
3710   return data.CopyData(section_offset, dst_len, dst);
3711 }
3712 
ReadSectionData(Section * section,DataExtractor & section_data)3713 size_t ObjectFileELF::ReadSectionData(Section *section,
3714                                       DataExtractor &section_data) {
3715   // If some other objectfile owns this data, pass this to them.
3716   if (section->GetObjectFile() != this)
3717     return section->GetObjectFile()->ReadSectionData(section, section_data);
3718 
3719   size_t result = ObjectFile::ReadSectionData(section, section_data);
3720   if (result == 0 || !(section->Get() & llvm::ELF::SHF_COMPRESSED))
3721     return result;
3722 
3723   auto Decompressor = llvm::object::Decompressor::create(
3724       section->GetName().GetStringRef(),
3725       {reinterpret_cast<const char *>(section_data.GetDataStart()),
3726        size_t(section_data.GetByteSize())},
3727       GetByteOrder() == eByteOrderLittle, GetAddressByteSize() == 8);
3728   if (!Decompressor) {
3729     GetModule()->ReportWarning(
3730         "Unable to initialize decompressor for section '{0}': {1}",
3731         section->GetName().GetCString(),
3732         llvm::toString(Decompressor.takeError()).c_str());
3733     section_data.Clear();
3734     return 0;
3735   }
3736 
3737   auto buffer_sp =
3738       std::make_shared<DataBufferHeap>(Decompressor->getDecompressedSize(), 0);
3739   if (auto error = Decompressor->decompress(
3740           {buffer_sp->GetBytes(), size_t(buffer_sp->GetByteSize())})) {
3741     GetModule()->ReportWarning("Decompression of section '{0}' failed: {1}",
3742                                section->GetName().GetCString(),
3743                                llvm::toString(std::move(error)).c_str());
3744     section_data.Clear();
3745     return 0;
3746   }
3747 
3748   section_data.SetData(buffer_sp);
3749   return buffer_sp->GetByteSize();
3750 }
3751 
ProgramHeaders()3752 llvm::ArrayRef<ELFProgramHeader> ObjectFileELF::ProgramHeaders() {
3753   ParseProgramHeaders();
3754   return m_program_headers;
3755 }
3756 
GetSegmentData(const ELFProgramHeader & H)3757 DataExtractor ObjectFileELF::GetSegmentData(const ELFProgramHeader &H) {
3758   // Try and read the program header from our cached m_data which can come from
3759   // the file on disk being mmap'ed or from the initial part of the ELF file we
3760   // read from memory and cached.
3761   DataExtractor data = DataExtractor(m_data, H.p_offset, H.p_filesz);
3762   if (data.GetByteSize() == H.p_filesz)
3763     return data;
3764   if (IsInMemory()) {
3765     // We have a ELF file in process memory, read the program header data from
3766     // the process.
3767     if (ProcessSP process_sp = m_process_wp.lock()) {
3768       const lldb::offset_t base_file_addr = GetBaseAddress().GetFileAddress();
3769       const addr_t load_bias = m_memory_addr - base_file_addr;
3770       const addr_t data_addr = H.p_vaddr + load_bias;
3771       if (DataBufferSP data_sp = ReadMemory(process_sp, data_addr, H.p_memsz))
3772         return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
3773     }
3774   }
3775   return DataExtractor();
3776 }
3777 
AnySegmentHasPhysicalAddress()3778 bool ObjectFileELF::AnySegmentHasPhysicalAddress() {
3779   for (const ELFProgramHeader &H : ProgramHeaders()) {
3780     if (H.p_paddr != 0)
3781       return true;
3782   }
3783   return false;
3784 }
3785 
3786 std::vector<ObjectFile::LoadableData>
GetLoadableData(Target & target)3787 ObjectFileELF::GetLoadableData(Target &target) {
3788   // Create a list of loadable data from loadable segments, using physical
3789   // addresses if they aren't all null
3790   std::vector<LoadableData> loadables;
3791   bool should_use_paddr = AnySegmentHasPhysicalAddress();
3792   for (const ELFProgramHeader &H : ProgramHeaders()) {
3793     LoadableData loadable;
3794     if (H.p_type != llvm::ELF::PT_LOAD)
3795       continue;
3796     loadable.Dest = should_use_paddr ? H.p_paddr : H.p_vaddr;
3797     if (loadable.Dest == LLDB_INVALID_ADDRESS)
3798       continue;
3799     if (H.p_filesz == 0)
3800       continue;
3801     auto segment_data = GetSegmentData(H);
3802     loadable.Contents = llvm::ArrayRef<uint8_t>(segment_data.GetDataStart(),
3803                                                 segment_data.GetByteSize());
3804     loadables.push_back(loadable);
3805   }
3806   return loadables;
3807 }
3808 
3809 lldb::WritableDataBufferSP
MapFileDataWritable(const FileSpec & file,uint64_t Size,uint64_t Offset)3810 ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size,
3811                                    uint64_t Offset) {
3812   return FileSystem::Instance().CreateWritableDataBuffer(file.GetPath(), Size,
3813                                                          Offset);
3814 }
3815 
3816 std::optional<DataExtractor>
ReadDataFromDynamic(const ELFDynamic * dyn,uint64_t length,uint64_t offset)3817 ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
3818                                    uint64_t offset) {
3819   // ELFDynamic values contain a "d_ptr" member that will be a load address if
3820   // we have an ELF file read from memory, or it will be a file address if it
3821   // was read from a ELF file. This function will correctly fetch data pointed
3822   // to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't
3823   // available.
3824   const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset;
3825   if (ProcessSP process_sp = m_process_wp.lock()) {
3826     if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length))
3827       return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
3828   } else {
3829     // We have an ELF file with no section headers or we didn't find the
3830     // .dynamic section. Try and find the .dynstr section.
3831     Address addr;
3832     if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList()))
3833       return std::nullopt;
3834     DataExtractor data;
3835     addr.GetSection()->GetSectionData(data);
3836     return DataExtractor(data, d_ptr_addr - addr.GetSection()->GetFileAddress(),
3837                          length);
3838   }
3839   return std::nullopt;
3840 }
3841 
GetDynstrData()3842 std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
3843   if (SectionList *section_list = GetSectionList()) {
3844     // Find the SHT_DYNAMIC section.
3845     if (Section *dynamic =
3846             section_list
3847                 ->FindSectionByType(eSectionTypeELFDynamicLinkInfo, true)
3848                 .get()) {
3849       assert(dynamic->GetObjectFile() == this);
3850       if (const ELFSectionHeaderInfo *header =
3851               GetSectionHeaderByIndex(dynamic->GetID())) {
3852         // sh_link: section header index of string table used by entries in
3853         // the section.
3854         if (Section *dynstr =
3855                 section_list->FindSectionByID(header->sh_link).get()) {
3856           DataExtractor data;
3857           if (ReadSectionData(dynstr, data))
3858             return data;
3859         }
3860       }
3861     }
3862   }
3863 
3864   // Every ELF file which represents an executable or shared library has
3865   // mandatory .dynamic entries. Two of these values are DT_STRTAB and DT_STRSZ
3866   // and represent the dynamic symbol tables's string table. These are needed
3867   // by the dynamic loader and we can read them from a process' address space.
3868   //
3869   // When loading and ELF file from memory, only the program headers are
3870   // guaranteed end up being mapped into memory, and we can find these values in
3871   // the PT_DYNAMIC segment.
3872   const ELFDynamic *strtab = FindDynamicSymbol(DT_STRTAB);
3873   const ELFDynamic *strsz = FindDynamicSymbol(DT_STRSZ);
3874   if (strtab == nullptr || strsz == nullptr)
3875     return std::nullopt;
3876 
3877   return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0);
3878 }
3879 
GetDynamicData()3880 std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
3881   DataExtractor data;
3882   // The PT_DYNAMIC program header describes where the .dynamic section is and
3883   // doesn't require parsing section headers. The PT_DYNAMIC is required by
3884   // executables and shared libraries so it will always be available.
3885   for (const ELFProgramHeader &H : ProgramHeaders()) {
3886     if (H.p_type == llvm::ELF::PT_DYNAMIC) {
3887       data = GetSegmentData(H);
3888       if (data.GetByteSize() > 0) {
3889         m_dynamic_base_addr = H.p_vaddr;
3890         return data;
3891       }
3892     }
3893   }
3894   // Fall back to using section headers.
3895   if (SectionList *section_list = GetSectionList()) {
3896     // Find the SHT_DYNAMIC section.
3897     if (Section *dynamic =
3898             section_list
3899                 ->FindSectionByType(eSectionTypeELFDynamicLinkInfo, true)
3900                 .get()) {
3901       assert(dynamic->GetObjectFile() == this);
3902       if (ReadSectionData(dynamic, data)) {
3903         m_dynamic_base_addr = dynamic->GetFileAddress();
3904         return data;
3905       }
3906     }
3907   }
3908   return std::nullopt;
3909 }
3910 
GetNumSymbolsFromDynamicHash()3911 std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicHash() {
3912   const ELFDynamic *hash = FindDynamicSymbol(DT_HASH);
3913   if (hash == nullptr)
3914     return std::nullopt;
3915 
3916   // The DT_HASH header looks like this:
3917   struct DtHashHeader {
3918     uint32_t nbucket;
3919     uint32_t nchain;
3920   };
3921   if (auto data = ReadDataFromDynamic(hash, 8)) {
3922     // We don't need the number of buckets value "nbucket", we just need the
3923     // "nchain" value which contains the number of symbols.
3924     offset_t offset = offsetof(DtHashHeader, nchain);
3925     return data->GetU32(&offset);
3926   }
3927 
3928   return std::nullopt;
3929 }
3930 
GetNumSymbolsFromDynamicGnuHash()3931 std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() {
3932   const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH);
3933   if (gnu_hash == nullptr)
3934     return std::nullopt;
3935 
3936   // Create a DT_GNU_HASH header
3937   // https://flapenguin.me/elf-dt-gnu-hash
3938   struct DtGnuHashHeader {
3939     uint32_t nbuckets = 0;
3940     uint32_t symoffset = 0;
3941     uint32_t bloom_size = 0;
3942     uint32_t bloom_shift = 0;
3943   };
3944   uint32_t num_symbols = 0;
3945   // Read enogh data for the DT_GNU_HASH header so we can extract the values.
3946   if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DtGnuHashHeader))) {
3947     offset_t offset = 0;
3948     DtGnuHashHeader header;
3949     header.nbuckets = data->GetU32(&offset);
3950     header.symoffset = data->GetU32(&offset);
3951     header.bloom_size = data->GetU32(&offset);
3952     header.bloom_shift = data->GetU32(&offset);
3953     const size_t addr_size = GetAddressByteSize();
3954     const addr_t buckets_offset =
3955         sizeof(DtGnuHashHeader) + addr_size * header.bloom_size;
3956     std::vector<uint32_t> buckets;
3957     if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4,
3958                                                buckets_offset)) {
3959       offset = 0;
3960       for (uint32_t i = 0; i < header.nbuckets; ++i)
3961         buckets.push_back(bucket_data->GetU32(&offset));
3962       // Locate the chain that handles the largest index bucket.
3963       uint32_t last_symbol = 0;
3964       for (uint32_t bucket_value : buckets)
3965         last_symbol = std::max(bucket_value, last_symbol);
3966       if (last_symbol < header.symoffset) {
3967         num_symbols = header.symoffset;
3968       } else {
3969         // Walk the bucket's chain to add the chain length to the total.
3970         const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
3971         for (;;) {
3972           if (auto chain_entry_data = ReadDataFromDynamic(
3973                   gnu_hash, 4,
3974                   chains_base_offset + (last_symbol - header.symoffset) * 4)) {
3975             offset = 0;
3976             uint32_t chain_entry = chain_entry_data->GetU32(&offset);
3977             ++last_symbol;
3978             // If the low bit is set, this entry is the end of the chain.
3979             if (chain_entry & 1)
3980               break;
3981           } else {
3982             break;
3983           }
3984         }
3985         num_symbols = last_symbol;
3986       }
3987     }
3988   }
3989   if (num_symbols > 0)
3990     return num_symbols;
3991 
3992   return std::nullopt;
3993 }
3994 
3995 std::optional<DataExtractor>
GetDynsymDataFromDynamic(uint32_t & num_symbols)3996 ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) {
3997   // Every ELF file which represents an executable or shared library has
3998   // mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the
3999   // symbol table, and DT_SYMENT contains the size of a symbol table entry.
4000   // We then can use either the DT_HASH or DT_GNU_HASH to find the number of
4001   // symbols in the symbol table as the symbol count is not stored in the
4002   // .dynamic section as a key/value pair.
4003   //
4004   // When loading and ELF file from memory, only the program headers end up
4005   // being mapped into memory, and we can find these values in the PT_DYNAMIC
4006   // segment.
4007   num_symbols = 0;
4008   // Get the process in case this is an in memory ELF file.
4009   ProcessSP process_sp(m_process_wp.lock());
4010   const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB);
4011   const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT);
4012   // DT_SYMTAB and DT_SYMENT are mandatory.
4013   if (symtab == nullptr || syment == nullptr)
4014     return std::nullopt;
4015 
4016   if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicHash())
4017     num_symbols = *syms;
4018   else if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicGnuHash())
4019     num_symbols = *syms;
4020   else
4021     return std::nullopt;
4022   if (num_symbols == 0)
4023     return std::nullopt;
4024   return ReadDataFromDynamic(symtab, syment->d_val * num_symbols);
4025 }
4026