xref: /freebsd/contrib/llvm-project/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp (revision b23dbabb7f3edb3f323a64f03e37be2c9a8b2a45)
1 //===-- DWARFUnit.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DWARFUnit.h"
10 
11 #include "lldb/Core/Module.h"
12 #include "lldb/Symbol/ObjectFile.h"
13 #include "lldb/Utility/LLDBAssert.h"
14 #include "lldb/Utility/StreamString.h"
15 #include "lldb/Utility/Timer.h"
16 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
17 #include "llvm/Object/Error.h"
18 
19 #include "DWARFCompileUnit.h"
20 #include "DWARFDebugAranges.h"
21 #include "DWARFDebugInfo.h"
22 #include "DWARFTypeUnit.h"
23 #include "LogChannelDWARF.h"
24 #include "SymbolFileDWARFDwo.h"
25 #include <optional>
26 
27 using namespace lldb;
28 using namespace lldb_private;
29 using namespace lldb_private::dwarf;
30 
31 extern int g_verbose;
32 
33 DWARFUnit::DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid,
34                      const DWARFUnitHeader &header,
35                      const DWARFAbbreviationDeclarationSet &abbrevs,
36                      DIERef::Section section, bool is_dwo)
37     : UserID(uid), m_dwarf(dwarf), m_header(header), m_abbrevs(&abbrevs),
38       m_cancel_scopes(false), m_section(section), m_is_dwo(is_dwo),
39       m_has_parsed_non_skeleton_unit(false), m_dwo_id(header.GetDWOId()) {}
40 
41 DWARFUnit::~DWARFUnit() = default;
42 
43 // Parses first DIE of a compile unit, excluding DWO.
44 void DWARFUnit::ExtractUnitDIENoDwoIfNeeded() {
45   {
46     llvm::sys::ScopedReader lock(m_first_die_mutex);
47     if (m_first_die)
48       return; // Already parsed
49   }
50   llvm::sys::ScopedWriter lock(m_first_die_mutex);
51   if (m_first_die)
52     return; // Already parsed
53 
54   ElapsedTime elapsed(m_dwarf.GetDebugInfoParseTimeRef());
55 
56   // Set the offset to that of the first DIE and calculate the start of the
57   // next compilation unit header.
58   lldb::offset_t offset = GetFirstDIEOffset();
59 
60   // We are in our compile unit, parse starting at the offset we were told to
61   // parse
62   const DWARFDataExtractor &data = GetData();
63   if (offset < GetNextUnitOffset() &&
64       m_first_die.Extract(data, this, &offset)) {
65     AddUnitDIE(m_first_die);
66     return;
67   }
68 }
69 
70 // Parses first DIE of a compile unit including DWO.
71 void DWARFUnit::ExtractUnitDIEIfNeeded() {
72   ExtractUnitDIENoDwoIfNeeded();
73 
74   if (m_has_parsed_non_skeleton_unit)
75     return;
76 
77   m_has_parsed_non_skeleton_unit = true;
78   m_dwo_error.Clear();
79 
80   if (!m_dwo_id)
81     return; // No DWO file.
82 
83   std::shared_ptr<SymbolFileDWARFDwo> dwo_symbol_file =
84       m_dwarf.GetDwoSymbolFileForCompileUnit(*this, m_first_die);
85   if (!dwo_symbol_file)
86     return;
87 
88   DWARFUnit *dwo_cu = dwo_symbol_file->GetDWOCompileUnitForHash(*m_dwo_id);
89 
90   if (!dwo_cu) {
91     SetDwoError(Status::createWithFormat(
92         "unable to load .dwo file from \"{0}\" due to ID ({1:x16}) mismatch "
93         "for skeleton DIE at {2:x8}",
94         dwo_symbol_file->GetObjectFile()->GetFileSpec().GetPath().c_str(),
95         *m_dwo_id, m_first_die.GetOffset()));
96     return; // Can't fetch the compile unit from the dwo file.
97   }
98   dwo_cu->SetUserData(this);
99 
100   DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly();
101   if (!dwo_cu_die.IsValid()) {
102     // Can't fetch the compile unit DIE from the dwo file.
103     SetDwoError(Status::createWithFormat(
104         "unable to extract compile unit DIE from .dwo file for skeleton "
105         "DIE at {0:x16}",
106         m_first_die.GetOffset()));
107     return;
108   }
109 
110   // Here for DWO CU we want to use the address base set in the skeleton unit
111   // (DW_AT_addr_base) if it is available and use the DW_AT_GNU_addr_base
112   // otherwise. We do that because pre-DWARF v5 could use the DW_AT_GNU_*
113   // attributes which were applicable to the DWO units. The corresponding
114   // DW_AT_* attributes standardized in DWARF v5 are also applicable to the
115   // main unit in contrast.
116   if (m_addr_base)
117     dwo_cu->SetAddrBase(*m_addr_base);
118   else if (m_gnu_addr_base)
119     dwo_cu->SetAddrBase(*m_gnu_addr_base);
120 
121   if (GetVersion() <= 4 && m_gnu_ranges_base)
122     dwo_cu->SetRangesBase(*m_gnu_ranges_base);
123   else if (dwo_symbol_file->GetDWARFContext()
124                .getOrLoadRngListsData()
125                .GetByteSize() > 0)
126     dwo_cu->SetRangesBase(llvm::DWARFListTableHeader::getHeaderSize(DWARF32));
127 
128   if (GetVersion() >= 5 &&
129       dwo_symbol_file->GetDWARFContext().getOrLoadLocListsData().GetByteSize() >
130           0)
131     dwo_cu->SetLoclistsBase(llvm::DWARFListTableHeader::getHeaderSize(DWARF32));
132 
133   dwo_cu->SetBaseAddress(GetBaseAddress());
134 
135   m_dwo = std::shared_ptr<DWARFUnit>(std::move(dwo_symbol_file), dwo_cu);
136 }
137 
138 // Parses a compile unit and indexes its DIEs if it hasn't already been done.
139 // It will leave this compile unit extracted forever.
140 void DWARFUnit::ExtractDIEsIfNeeded() {
141   m_cancel_scopes = true;
142 
143   {
144     llvm::sys::ScopedReader lock(m_die_array_mutex);
145     if (!m_die_array.empty())
146       return; // Already parsed
147   }
148   llvm::sys::ScopedWriter lock(m_die_array_mutex);
149   if (!m_die_array.empty())
150     return; // Already parsed
151 
152   ExtractDIEsRWLocked();
153 }
154 
155 // Parses a compile unit and indexes its DIEs if it hasn't already been done.
156 // It will clear this compile unit after returned instance gets out of scope,
157 // no other ScopedExtractDIEs instance is running for this compile unit
158 // and no ExtractDIEsIfNeeded() has been executed during this ScopedExtractDIEs
159 // lifetime.
160 DWARFUnit::ScopedExtractDIEs DWARFUnit::ExtractDIEsScoped() {
161   ScopedExtractDIEs scoped(*this);
162 
163   {
164     llvm::sys::ScopedReader lock(m_die_array_mutex);
165     if (!m_die_array.empty())
166       return scoped; // Already parsed
167   }
168   llvm::sys::ScopedWriter lock(m_die_array_mutex);
169   if (!m_die_array.empty())
170     return scoped; // Already parsed
171 
172   // Otherwise m_die_array would be already populated.
173   lldbassert(!m_cancel_scopes);
174 
175   ExtractDIEsRWLocked();
176   scoped.m_clear_dies = true;
177   return scoped;
178 }
179 
180 DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(DWARFUnit &cu) : m_cu(&cu) {
181   m_cu->m_die_array_scoped_mutex.lock_shared();
182 }
183 
184 DWARFUnit::ScopedExtractDIEs::~ScopedExtractDIEs() {
185   if (!m_cu)
186     return;
187   m_cu->m_die_array_scoped_mutex.unlock_shared();
188   if (!m_clear_dies || m_cu->m_cancel_scopes)
189     return;
190   // Be sure no other ScopedExtractDIEs is running anymore.
191   llvm::sys::ScopedWriter lock_scoped(m_cu->m_die_array_scoped_mutex);
192   llvm::sys::ScopedWriter lock(m_cu->m_die_array_mutex);
193   if (m_cu->m_cancel_scopes)
194     return;
195   m_cu->ClearDIEsRWLocked();
196 }
197 
198 DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(ScopedExtractDIEs &&rhs)
199     : m_cu(rhs.m_cu), m_clear_dies(rhs.m_clear_dies) {
200   rhs.m_cu = nullptr;
201 }
202 
203 DWARFUnit::ScopedExtractDIEs &DWARFUnit::ScopedExtractDIEs::operator=(
204     DWARFUnit::ScopedExtractDIEs &&rhs) {
205   m_cu = rhs.m_cu;
206   rhs.m_cu = nullptr;
207   m_clear_dies = rhs.m_clear_dies;
208   return *this;
209 }
210 
211 // Parses a compile unit and indexes its DIEs, m_die_array_mutex must be
212 // held R/W and m_die_array must be empty.
213 void DWARFUnit::ExtractDIEsRWLocked() {
214   llvm::sys::ScopedWriter first_die_lock(m_first_die_mutex);
215 
216   ElapsedTime elapsed(m_dwarf.GetDebugInfoParseTimeRef());
217   LLDB_SCOPED_TIMERF(
218       "%s",
219       llvm::formatv("{0:x16}: DWARFUnit::ExtractDIEsIfNeeded()", GetOffset())
220           .str()
221           .c_str());
222 
223   // Set the offset to that of the first DIE and calculate the start of the
224   // next compilation unit header.
225   lldb::offset_t offset = GetFirstDIEOffset();
226   lldb::offset_t next_cu_offset = GetNextUnitOffset();
227 
228   DWARFDebugInfoEntry die;
229 
230   uint32_t depth = 0;
231   // We are in our compile unit, parse starting at the offset we were told to
232   // parse
233   const DWARFDataExtractor &data = GetData();
234   std::vector<uint32_t> die_index_stack;
235   die_index_stack.reserve(32);
236   die_index_stack.push_back(0);
237   bool prev_die_had_children = false;
238   while (offset < next_cu_offset && die.Extract(data, this, &offset)) {
239     const bool null_die = die.IsNULL();
240     if (depth == 0) {
241       assert(m_die_array.empty() && "Compile unit DIE already added");
242 
243       // The average bytes per DIE entry has been seen to be around 14-20 so
244       // lets pre-reserve half of that since we are now stripping the NULL
245       // tags.
246 
247       // Only reserve the memory if we are adding children of the main
248       // compile unit DIE. The compile unit DIE is always the first entry, so
249       // if our size is 1, then we are adding the first compile unit child
250       // DIE and should reserve the memory.
251       m_die_array.reserve(GetDebugInfoSize() / 24);
252       m_die_array.push_back(die);
253 
254       if (!m_first_die)
255         AddUnitDIE(m_die_array.front());
256 
257       // With -fsplit-dwarf-inlining, clang will emit non-empty skeleton compile
258       // units. We are not able to access these DIE *and* the dwo file
259       // simultaneously. We also don't need to do that as the dwo file will
260       // contain a superset of information. So, we don't even attempt to parse
261       // any remaining DIEs.
262       if (m_dwo) {
263         m_die_array.front().SetHasChildren(false);
264         break;
265       }
266 
267     } else {
268       if (null_die) {
269         if (prev_die_had_children) {
270           // This will only happen if a DIE says is has children but all it
271           // contains is a NULL tag. Since we are removing the NULL DIEs from
272           // the list (saves up to 25% in C++ code), we need a way to let the
273           // DIE know that it actually doesn't have children.
274           if (!m_die_array.empty())
275             m_die_array.back().SetHasChildren(false);
276         }
277       } else {
278         die.SetParentIndex(m_die_array.size() - die_index_stack[depth - 1]);
279 
280         if (die_index_stack.back())
281           m_die_array[die_index_stack.back()].SetSiblingIndex(
282               m_die_array.size() - die_index_stack.back());
283 
284         // Only push the DIE if it isn't a NULL DIE
285         m_die_array.push_back(die);
286       }
287     }
288 
289     if (null_die) {
290       // NULL DIE.
291       if (!die_index_stack.empty())
292         die_index_stack.pop_back();
293 
294       if (depth > 0)
295         --depth;
296       prev_die_had_children = false;
297     } else {
298       die_index_stack.back() = m_die_array.size() - 1;
299       // Normal DIE
300       const bool die_has_children = die.HasChildren();
301       if (die_has_children) {
302         die_index_stack.push_back(0);
303         ++depth;
304       }
305       prev_die_had_children = die_has_children;
306     }
307 
308     if (depth == 0)
309       break; // We are done with this compile unit!
310   }
311 
312   if (!m_die_array.empty()) {
313     // The last die cannot have children (if it did, it wouldn't be the last one).
314     // This only makes a difference for malformed dwarf that does not have a
315     // terminating null die.
316     m_die_array.back().SetHasChildren(false);
317 
318     if (m_first_die) {
319       // Only needed for the assertion.
320       m_first_die.SetHasChildren(m_die_array.front().HasChildren());
321       lldbassert(m_first_die == m_die_array.front());
322     }
323     m_first_die = m_die_array.front();
324   }
325 
326   m_die_array.shrink_to_fit();
327 
328   if (m_dwo)
329     m_dwo->ExtractDIEsIfNeeded();
330 }
331 
332 // This is used when a split dwarf is enabled.
333 // A skeleton compilation unit may contain the DW_AT_str_offsets_base attribute
334 // that points to the first string offset of the CU contribution to the
335 // .debug_str_offsets. At the same time, the corresponding split debug unit also
336 // may use DW_FORM_strx* forms pointing to its own .debug_str_offsets.dwo and
337 // for that case, we should find the offset (skip the section header).
338 void DWARFUnit::SetDwoStrOffsetsBase() {
339   lldb::offset_t baseOffset = 0;
340 
341   if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
342     if (const auto *contribution =
343             entry->getContribution(llvm::DW_SECT_STR_OFFSETS))
344       baseOffset = contribution->getOffset32();
345     else
346       return;
347   }
348 
349   if (GetVersion() >= 5) {
350     const DWARFDataExtractor &strOffsets =
351         GetSymbolFileDWARF().GetDWARFContext().getOrLoadStrOffsetsData();
352     uint64_t length = strOffsets.GetU32(&baseOffset);
353     if (length == 0xffffffff)
354       length = strOffsets.GetU64(&baseOffset);
355 
356     // Check version.
357     if (strOffsets.GetU16(&baseOffset) < 5)
358       return;
359 
360     // Skip padding.
361     baseOffset += 2;
362   }
363 
364   SetStrOffsetsBase(baseOffset);
365 }
366 
367 std::optional<uint64_t> DWARFUnit::GetDWOId() {
368   ExtractUnitDIENoDwoIfNeeded();
369   return m_dwo_id;
370 }
371 
372 // m_die_array_mutex must be already held as read/write.
373 void DWARFUnit::AddUnitDIE(const DWARFDebugInfoEntry &cu_die) {
374   DWARFAttributes attributes;
375   size_t num_attributes = cu_die.GetAttributes(this, attributes);
376 
377   // Extract DW_AT_addr_base first, as other attributes may need it.
378   for (size_t i = 0; i < num_attributes; ++i) {
379     if (attributes.AttributeAtIndex(i) != DW_AT_addr_base)
380       continue;
381     DWARFFormValue form_value;
382     if (attributes.ExtractFormValueAtIndex(i, form_value)) {
383       SetAddrBase(form_value.Unsigned());
384       break;
385     }
386   }
387 
388   for (size_t i = 0; i < num_attributes; ++i) {
389     dw_attr_t attr = attributes.AttributeAtIndex(i);
390     DWARFFormValue form_value;
391     if (!attributes.ExtractFormValueAtIndex(i, form_value))
392       continue;
393     switch (attr) {
394     case DW_AT_loclists_base:
395       SetLoclistsBase(form_value.Unsigned());
396       break;
397     case DW_AT_rnglists_base:
398       SetRangesBase(form_value.Unsigned());
399       break;
400     case DW_AT_str_offsets_base:
401       SetStrOffsetsBase(form_value.Unsigned());
402       break;
403     case DW_AT_low_pc:
404       SetBaseAddress(form_value.Address());
405       break;
406     case DW_AT_entry_pc:
407       // If the value was already set by DW_AT_low_pc, don't update it.
408       if (m_base_addr == LLDB_INVALID_ADDRESS)
409         SetBaseAddress(form_value.Address());
410       break;
411     case DW_AT_stmt_list:
412       m_line_table_offset = form_value.Unsigned();
413       break;
414     case DW_AT_GNU_addr_base:
415       m_gnu_addr_base = form_value.Unsigned();
416       break;
417     case DW_AT_GNU_ranges_base:
418       m_gnu_ranges_base = form_value.Unsigned();
419       break;
420     case DW_AT_GNU_dwo_id:
421       m_dwo_id = form_value.Unsigned();
422       break;
423     }
424   }
425 
426   if (m_is_dwo) {
427     m_has_parsed_non_skeleton_unit = true;
428     SetDwoStrOffsetsBase();
429     return;
430   }
431 }
432 
433 size_t DWARFUnit::GetDebugInfoSize() const {
434   return GetLengthByteSize() + GetLength() - GetHeaderByteSize();
435 }
436 
437 const DWARFAbbreviationDeclarationSet *DWARFUnit::GetAbbreviations() const {
438   return m_abbrevs;
439 }
440 
441 dw_offset_t DWARFUnit::GetAbbrevOffset() const {
442   return m_abbrevs ? m_abbrevs->GetOffset() : DW_INVALID_OFFSET;
443 }
444 
445 dw_offset_t DWARFUnit::GetLineTableOffset() {
446   ExtractUnitDIENoDwoIfNeeded();
447   return m_line_table_offset;
448 }
449 
450 void DWARFUnit::SetAddrBase(dw_addr_t addr_base) { m_addr_base = addr_base; }
451 
452 // Parse the rangelist table header, including the optional array of offsets
453 // following it (DWARF v5 and later).
454 template <typename ListTableType>
455 static llvm::Expected<ListTableType>
456 ParseListTableHeader(const llvm::DWARFDataExtractor &data, uint64_t offset,
457                      DwarfFormat format) {
458   // We are expected to be called with Offset 0 or pointing just past the table
459   // header. Correct Offset in the latter case so that it points to the start
460   // of the header.
461   if (offset == 0) {
462     // This means DW_AT_rnglists_base is missing and therefore DW_FORM_rnglistx
463     // cannot be handled. Returning a default-constructed ListTableType allows
464     // DW_FORM_sec_offset to be supported.
465     return ListTableType();
466   }
467 
468   uint64_t HeaderSize = llvm::DWARFListTableHeader::getHeaderSize(format);
469   if (offset < HeaderSize)
470     return llvm::createStringError(std::errc::invalid_argument,
471                                    "did not detect a valid"
472                                    " list table with base = 0x%" PRIx64 "\n",
473                                    offset);
474   offset -= HeaderSize;
475   ListTableType Table;
476   if (llvm::Error E = Table.extractHeaderAndOffsets(data, &offset))
477     return std::move(E);
478   return Table;
479 }
480 
481 void DWARFUnit::SetLoclistsBase(dw_addr_t loclists_base) {
482   uint64_t offset = 0;
483   if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
484     const auto *contribution = entry->getContribution(llvm::DW_SECT_LOCLISTS);
485     if (!contribution) {
486       GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
487           "Failed to find location list contribution for CU with DWO Id "
488           "{0:x16}",
489           *GetDWOId());
490       return;
491     }
492     offset += contribution->getOffset32();
493   }
494   m_loclists_base = loclists_base;
495 
496   uint64_t header_size = llvm::DWARFListTableHeader::getHeaderSize(DWARF32);
497   if (loclists_base < header_size)
498     return;
499 
500   m_loclist_table_header.emplace(".debug_loclists", "locations");
501   offset += loclists_base - header_size;
502   if (llvm::Error E = m_loclist_table_header->extract(
503           m_dwarf.GetDWARFContext().getOrLoadLocListsData().GetAsLLVM(),
504           &offset)) {
505     GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
506         "Failed to extract location list table at offset {0:x16} (location "
507         "list base: {1:x16}): {2}",
508         offset, loclists_base, toString(std::move(E)).c_str());
509   }
510 }
511 
512 std::unique_ptr<llvm::DWARFLocationTable>
513 DWARFUnit::GetLocationTable(const DataExtractor &data) const {
514   llvm::DWARFDataExtractor llvm_data(
515       data.GetData(), data.GetByteOrder() == lldb::eByteOrderLittle,
516       data.GetAddressByteSize());
517 
518   if (m_is_dwo || GetVersion() >= 5)
519     return std::make_unique<llvm::DWARFDebugLoclists>(llvm_data, GetVersion());
520   return std::make_unique<llvm::DWARFDebugLoc>(llvm_data);
521 }
522 
523 DWARFDataExtractor DWARFUnit::GetLocationData() const {
524   DWARFContext &Ctx = GetSymbolFileDWARF().GetDWARFContext();
525   const DWARFDataExtractor &data =
526       GetVersion() >= 5 ? Ctx.getOrLoadLocListsData() : Ctx.getOrLoadLocData();
527   if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
528     if (const auto *contribution = entry->getContribution(
529             GetVersion() >= 5 ? llvm::DW_SECT_LOCLISTS : llvm::DW_SECT_EXT_LOC))
530       return DWARFDataExtractor(data, contribution->getOffset32(),
531                                 contribution->getLength32());
532     return DWARFDataExtractor();
533   }
534   return data;
535 }
536 
537 DWARFDataExtractor DWARFUnit::GetRnglistData() const {
538   DWARFContext &Ctx = GetSymbolFileDWARF().GetDWARFContext();
539   const DWARFDataExtractor &data = Ctx.getOrLoadRngListsData();
540   if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
541     if (const auto *contribution =
542             entry->getContribution(llvm::DW_SECT_RNGLISTS))
543       return DWARFDataExtractor(data, contribution->getOffset32(),
544                                 contribution->getLength32());
545     GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
546         "Failed to find range list contribution for CU with signature {0:x16}",
547         entry->getSignature());
548 
549     return DWARFDataExtractor();
550   }
551   return data;
552 }
553 
554 void DWARFUnit::SetRangesBase(dw_addr_t ranges_base) {
555   lldbassert(!m_rnglist_table_done);
556 
557   m_ranges_base = ranges_base;
558 }
559 
560 const std::optional<llvm::DWARFDebugRnglistTable> &
561 DWARFUnit::GetRnglistTable() {
562   if (GetVersion() >= 5 && !m_rnglist_table_done) {
563     m_rnglist_table_done = true;
564     if (auto table_or_error =
565             ParseListTableHeader<llvm::DWARFDebugRnglistTable>(
566                 GetRnglistData().GetAsLLVM(), m_ranges_base, DWARF32))
567       m_rnglist_table = std::move(table_or_error.get());
568     else
569       GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
570           "Failed to extract range list table at offset {0:x16}: {1}",
571           m_ranges_base, toString(table_or_error.takeError()).c_str());
572   }
573   return m_rnglist_table;
574 }
575 
576 // This function is called only for DW_FORM_rnglistx.
577 llvm::Expected<uint64_t> DWARFUnit::GetRnglistOffset(uint32_t Index) {
578   if (!GetRnglistTable())
579     return llvm::createStringError(std::errc::invalid_argument,
580                                    "missing or invalid range list table");
581   if (!m_ranges_base)
582     return llvm::createStringError(
583         std::errc::invalid_argument,
584         llvm::formatv("DW_FORM_rnglistx cannot be used without "
585                       "DW_AT_rnglists_base for CU at {0:x16}",
586                       GetOffset())
587             .str()
588             .c_str());
589   if (std::optional<uint64_t> off = GetRnglistTable()->getOffsetEntry(
590           GetRnglistData().GetAsLLVM(), Index))
591     return *off + m_ranges_base;
592   return llvm::createStringError(
593       std::errc::invalid_argument,
594       "invalid range list table index %u; OffsetEntryCount is %u, "
595       "DW_AT_rnglists_base is %" PRIu64,
596       Index, GetRnglistTable()->getOffsetEntryCount(), m_ranges_base);
597 }
598 
599 void DWARFUnit::SetStrOffsetsBase(dw_offset_t str_offsets_base) {
600   m_str_offsets_base = str_offsets_base;
601 }
602 
603 dw_addr_t DWARFUnit::ReadAddressFromDebugAddrSection(uint32_t index) const {
604   uint32_t index_size = GetAddressByteSize();
605   dw_offset_t addr_base = GetAddrBase();
606   dw_addr_t offset = addr_base + static_cast<dw_addr_t>(index) * index_size;
607   const DWARFDataExtractor &data =
608       m_dwarf.GetDWARFContext().getOrLoadAddrData();
609   if (data.ValidOffsetForDataOfSize(offset, index_size))
610     return data.GetMaxU64_unchecked(&offset, index_size);
611   return LLDB_INVALID_ADDRESS;
612 }
613 
614 // It may be called only with m_die_array_mutex held R/W.
615 void DWARFUnit::ClearDIEsRWLocked() {
616   m_die_array.clear();
617   m_die_array.shrink_to_fit();
618 
619   if (m_dwo && !m_dwo->m_cancel_scopes)
620     m_dwo->ClearDIEsRWLocked();
621 }
622 
623 lldb::ByteOrder DWARFUnit::GetByteOrder() const {
624   return m_dwarf.GetObjectFile()->GetByteOrder();
625 }
626 
627 void DWARFUnit::SetBaseAddress(dw_addr_t base_addr) { m_base_addr = base_addr; }
628 
629 // Compare function DWARFDebugAranges::Range structures
630 static bool CompareDIEOffset(const DWARFDebugInfoEntry &die,
631                              const dw_offset_t die_offset) {
632   return die.GetOffset() < die_offset;
633 }
634 
635 // GetDIE()
636 //
637 // Get the DIE (Debug Information Entry) with the specified offset by first
638 // checking if the DIE is contained within this compile unit and grabbing the
639 // DIE from this compile unit. Otherwise we grab the DIE from the DWARF file.
640 DWARFDIE
641 DWARFUnit::GetDIE(dw_offset_t die_offset) {
642   if (die_offset == DW_INVALID_OFFSET)
643     return DWARFDIE(); // Not found
644 
645   if (!ContainsDIEOffset(die_offset)) {
646     GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
647         "GetDIE for DIE {0:x16} is outside of its CU {0:x16}", die_offset,
648         GetOffset());
649     return DWARFDIE(); // Not found
650   }
651 
652   ExtractDIEsIfNeeded();
653   DWARFDebugInfoEntry::const_iterator end = m_die_array.cend();
654   DWARFDebugInfoEntry::const_iterator pos =
655       lower_bound(m_die_array.cbegin(), end, die_offset, CompareDIEOffset);
656 
657   if (pos != end && die_offset == (*pos).GetOffset())
658     return DWARFDIE(this, &(*pos));
659   return DWARFDIE(); // Not found
660 }
661 
662 DWARFUnit &DWARFUnit::GetNonSkeletonUnit() {
663   ExtractUnitDIEIfNeeded();
664   if (m_dwo)
665     return *m_dwo;
666   return *this;
667 }
668 
669 uint8_t DWARFUnit::GetAddressByteSize(const DWARFUnit *cu) {
670   if (cu)
671     return cu->GetAddressByteSize();
672   return DWARFUnit::GetDefaultAddressSize();
673 }
674 
675 uint8_t DWARFUnit::GetDefaultAddressSize() { return 4; }
676 
677 void *DWARFUnit::GetUserData() const { return m_user_data; }
678 
679 void DWARFUnit::SetUserData(void *d) { m_user_data = d; }
680 
681 bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() {
682   return GetProducer() != eProducerLLVMGCC;
683 }
684 
685 bool DWARFUnit::DW_AT_decl_file_attributes_are_invalid() {
686   // llvm-gcc makes completely invalid decl file attributes and won't ever be
687   // fixed, so we need to know to ignore these.
688   return GetProducer() == eProducerLLVMGCC;
689 }
690 
691 bool DWARFUnit::Supports_unnamed_objc_bitfields() {
692   if (GetProducer() == eProducerClang)
693     return GetProducerVersion() >= llvm::VersionTuple(425, 0, 13);
694   // Assume all other compilers didn't have incorrect ObjC bitfield info.
695   return true;
696 }
697 
698 void DWARFUnit::ParseProducerInfo() {
699   m_producer = eProducerOther;
700   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
701   if (!die)
702     return;
703 
704   llvm::StringRef producer(
705       die->GetAttributeValueAsString(this, DW_AT_producer, nullptr));
706   if (producer.empty())
707     return;
708 
709   static const RegularExpression g_swiftlang_version_regex(
710       llvm::StringRef(R"(swiftlang-([0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?))"));
711   static const RegularExpression g_clang_version_regex(
712       llvm::StringRef(R"(clang-([0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?))"));
713   static const RegularExpression g_llvm_gcc_regex(
714       llvm::StringRef(R"(4\.[012]\.[01] )"
715                       R"(\(Based on Apple Inc\. build [0-9]+\) )"
716                       R"(\(LLVM build [\.0-9]+\)$)"));
717 
718   llvm::SmallVector<llvm::StringRef, 3> matches;
719   if (g_swiftlang_version_regex.Execute(producer, &matches)) {
720       m_producer_version.tryParse(matches[1]);
721     m_producer = eProducerSwift;
722   } else if (producer.contains("clang")) {
723     if (g_clang_version_regex.Execute(producer, &matches))
724       m_producer_version.tryParse(matches[1]);
725     m_producer = eProducerClang;
726   } else if (producer.contains("GNU")) {
727     m_producer = eProducerGCC;
728   } else if (g_llvm_gcc_regex.Execute(producer)) {
729     m_producer = eProducerLLVMGCC;
730   }
731 }
732 
733 DWARFProducer DWARFUnit::GetProducer() {
734   if (m_producer == eProducerInvalid)
735     ParseProducerInfo();
736   return m_producer;
737 }
738 
739 llvm::VersionTuple DWARFUnit::GetProducerVersion() {
740   if (m_producer_version.empty())
741     ParseProducerInfo();
742   return m_producer_version;
743 }
744 
745 uint64_t DWARFUnit::GetDWARFLanguageType() {
746   if (m_language_type)
747     return *m_language_type;
748 
749   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
750   if (!die)
751     m_language_type = 0;
752   else
753     m_language_type = die->GetAttributeValueAsUnsigned(this, DW_AT_language, 0);
754   return *m_language_type;
755 }
756 
757 bool DWARFUnit::GetIsOptimized() {
758   if (m_is_optimized == eLazyBoolCalculate) {
759     const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
760     if (die) {
761       m_is_optimized = eLazyBoolNo;
762       if (die->GetAttributeValueAsUnsigned(this, DW_AT_APPLE_optimized, 0) ==
763           1) {
764         m_is_optimized = eLazyBoolYes;
765       }
766     }
767   }
768   return m_is_optimized == eLazyBoolYes;
769 }
770 
771 FileSpec::Style DWARFUnit::GetPathStyle() {
772   if (!m_comp_dir)
773     ComputeCompDirAndGuessPathStyle();
774   return m_comp_dir->GetPathStyle();
775 }
776 
777 const FileSpec &DWARFUnit::GetCompilationDirectory() {
778   if (!m_comp_dir)
779     ComputeCompDirAndGuessPathStyle();
780   return *m_comp_dir;
781 }
782 
783 const FileSpec &DWARFUnit::GetAbsolutePath() {
784   if (!m_file_spec)
785     ComputeAbsolutePath();
786   return *m_file_spec;
787 }
788 
789 FileSpec DWARFUnit::GetFile(size_t file_idx) {
790   return m_dwarf.GetFile(*this, file_idx);
791 }
792 
793 // DWARF2/3 suggests the form hostname:pathname for compilation directory.
794 // Remove the host part if present.
795 static llvm::StringRef
796 removeHostnameFromPathname(llvm::StringRef path_from_dwarf) {
797   if (!path_from_dwarf.contains(':'))
798     return path_from_dwarf;
799   llvm::StringRef host, path;
800   std::tie(host, path) = path_from_dwarf.split(':');
801 
802   if (host.contains('/'))
803     return path_from_dwarf;
804 
805   // check whether we have a windows path, and so the first character is a
806   // drive-letter not a hostname.
807   if (host.size() == 1 && llvm::isAlpha(host[0]) &&
808       (path.startswith("\\") || path.startswith("/")))
809     return path_from_dwarf;
810 
811   return path;
812 }
813 
814 void DWARFUnit::ComputeCompDirAndGuessPathStyle() {
815   m_comp_dir = FileSpec();
816   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
817   if (!die)
818     return;
819 
820   llvm::StringRef comp_dir = removeHostnameFromPathname(
821       die->GetAttributeValueAsString(this, DW_AT_comp_dir, nullptr));
822   if (!comp_dir.empty()) {
823     FileSpec::Style comp_dir_style =
824         FileSpec::GuessPathStyle(comp_dir).value_or(FileSpec::Style::native);
825     m_comp_dir = FileSpec(comp_dir, comp_dir_style);
826   } else {
827     // Try to detect the style based on the DW_AT_name attribute, but just store
828     // the detected style in the m_comp_dir field.
829     const char *name =
830         die->GetAttributeValueAsString(this, DW_AT_name, nullptr);
831     m_comp_dir = FileSpec(
832         "", FileSpec::GuessPathStyle(name).value_or(FileSpec::Style::native));
833   }
834 }
835 
836 void DWARFUnit::ComputeAbsolutePath() {
837   m_file_spec = FileSpec();
838   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
839   if (!die)
840     return;
841 
842   m_file_spec =
843       FileSpec(die->GetAttributeValueAsString(this, DW_AT_name, nullptr),
844                GetPathStyle());
845 
846   if (m_file_spec->IsRelative())
847     m_file_spec->MakeAbsolute(GetCompilationDirectory());
848 }
849 
850 SymbolFileDWARFDwo *DWARFUnit::GetDwoSymbolFile() {
851   ExtractUnitDIEIfNeeded();
852   if (m_dwo)
853     return &llvm::cast<SymbolFileDWARFDwo>(m_dwo->GetSymbolFileDWARF());
854   return nullptr;
855 }
856 
857 const DWARFDebugAranges &DWARFUnit::GetFunctionAranges() {
858   if (m_func_aranges_up == nullptr) {
859     m_func_aranges_up = std::make_unique<DWARFDebugAranges>();
860     const DWARFDebugInfoEntry *die = DIEPtr();
861     if (die)
862       die->BuildFunctionAddressRangeTable(this, m_func_aranges_up.get());
863 
864     if (m_dwo) {
865       const DWARFDebugInfoEntry *dwo_die = m_dwo->DIEPtr();
866       if (dwo_die)
867         dwo_die->BuildFunctionAddressRangeTable(m_dwo.get(),
868                                                 m_func_aranges_up.get());
869     }
870 
871     const bool minimize = false;
872     m_func_aranges_up->Sort(minimize);
873   }
874   return *m_func_aranges_up;
875 }
876 
877 llvm::Expected<DWARFUnitHeader>
878 DWARFUnitHeader::extract(const DWARFDataExtractor &data,
879                          DIERef::Section section,
880                          lldb_private::DWARFContext &context,
881                          lldb::offset_t *offset_ptr) {
882   DWARFUnitHeader header;
883   header.m_offset = *offset_ptr;
884   header.m_length = data.GetDWARFInitialLength(offset_ptr);
885   header.m_version = data.GetU16(offset_ptr);
886   if (header.m_version == 5) {
887     header.m_unit_type = data.GetU8(offset_ptr);
888     header.m_addr_size = data.GetU8(offset_ptr);
889     header.m_abbr_offset = data.GetDWARFOffset(offset_ptr);
890     if (header.m_unit_type == llvm::dwarf::DW_UT_skeleton ||
891         header.m_unit_type == llvm::dwarf::DW_UT_split_compile)
892       header.m_dwo_id = data.GetU64(offset_ptr);
893   } else {
894     header.m_abbr_offset = data.GetDWARFOffset(offset_ptr);
895     header.m_addr_size = data.GetU8(offset_ptr);
896     header.m_unit_type =
897         section == DIERef::Section::DebugTypes ? DW_UT_type : DW_UT_compile;
898   }
899 
900   if (header.IsTypeUnit()) {
901     header.m_type_hash = data.GetU64(offset_ptr);
902     header.m_type_offset = data.GetDWARFOffset(offset_ptr);
903   }
904 
905   if (context.isDwo()) {
906     const llvm::DWARFUnitIndex *Index;
907     if (header.IsTypeUnit()) {
908       Index = &context.GetAsLLVM().getTUIndex();
909       if (*Index)
910         header.m_index_entry = Index->getFromHash(header.m_type_hash);
911     } else {
912       Index = &context.GetAsLLVM().getCUIndex();
913       if (*Index && header.m_version >= 5 && header.m_dwo_id)
914         header.m_index_entry = Index->getFromHash(*header.m_dwo_id);
915     }
916     if (!header.m_index_entry)
917       header.m_index_entry = Index->getFromOffset(header.m_offset);
918   }
919 
920   if (header.m_index_entry) {
921     if (header.m_abbr_offset) {
922       return llvm::createStringError(
923           llvm::inconvertibleErrorCode(),
924           "Package unit with a non-zero abbreviation offset");
925     }
926     auto *unit_contrib = header.m_index_entry->getContribution();
927     if (!unit_contrib || unit_contrib->getLength32() != header.m_length + 4) {
928       return llvm::createStringError(llvm::inconvertibleErrorCode(),
929                                      "Inconsistent DWARF package unit index");
930     }
931     auto *abbr_entry =
932         header.m_index_entry->getContribution(llvm::DW_SECT_ABBREV);
933     if (!abbr_entry) {
934       return llvm::createStringError(
935           llvm::inconvertibleErrorCode(),
936           "DWARF package index missing abbreviation column");
937     }
938     header.m_abbr_offset = abbr_entry->getOffset32();
939   }
940 
941   bool length_OK = data.ValidOffset(header.GetNextUnitOffset() - 1);
942   bool version_OK = SymbolFileDWARF::SupportedVersion(header.m_version);
943   bool addr_size_OK = (header.m_addr_size == 4) || (header.m_addr_size == 8);
944   bool type_offset_OK =
945       !header.IsTypeUnit() || (header.m_type_offset <= header.GetLength());
946 
947   if (!length_OK)
948     return llvm::make_error<llvm::object::GenericBinaryError>(
949         "Invalid unit length");
950   if (!version_OK)
951     return llvm::make_error<llvm::object::GenericBinaryError>(
952         "Unsupported unit version");
953   if (!addr_size_OK)
954     return llvm::make_error<llvm::object::GenericBinaryError>(
955         "Invalid unit address size");
956   if (!type_offset_OK)
957     return llvm::make_error<llvm::object::GenericBinaryError>(
958         "Type offset out of range");
959 
960   return header;
961 }
962 
963 llvm::Expected<DWARFUnitSP>
964 DWARFUnit::extract(SymbolFileDWARF &dwarf, user_id_t uid,
965                    const DWARFDataExtractor &debug_info,
966                    DIERef::Section section, lldb::offset_t *offset_ptr) {
967   assert(debug_info.ValidOffset(*offset_ptr));
968 
969   auto expected_header = DWARFUnitHeader::extract(
970       debug_info, section, dwarf.GetDWARFContext(), offset_ptr);
971   if (!expected_header)
972     return expected_header.takeError();
973 
974   const DWARFDebugAbbrev *abbr = dwarf.DebugAbbrev();
975   if (!abbr)
976     return llvm::make_error<llvm::object::GenericBinaryError>(
977         "No debug_abbrev data");
978 
979   bool abbr_offset_OK =
980       dwarf.GetDWARFContext().getOrLoadAbbrevData().ValidOffset(
981           expected_header->GetAbbrOffset());
982   if (!abbr_offset_OK)
983     return llvm::make_error<llvm::object::GenericBinaryError>(
984         "Abbreviation offset for unit is not valid");
985 
986   const DWARFAbbreviationDeclarationSet *abbrevs =
987       abbr->GetAbbreviationDeclarationSet(expected_header->GetAbbrOffset());
988   if (!abbrevs)
989     return llvm::make_error<llvm::object::GenericBinaryError>(
990         "No abbrev exists at the specified offset.");
991 
992   bool is_dwo = dwarf.GetDWARFContext().isDwo();
993   if (expected_header->IsTypeUnit())
994     return DWARFUnitSP(new DWARFTypeUnit(dwarf, uid, *expected_header, *abbrevs,
995                                          section, is_dwo));
996   return DWARFUnitSP(new DWARFCompileUnit(dwarf, uid, *expected_header,
997                                           *abbrevs, section, is_dwo));
998 }
999 
1000 const lldb_private::DWARFDataExtractor &DWARFUnit::GetData() const {
1001   return m_section == DIERef::Section::DebugTypes
1002              ? m_dwarf.GetDWARFContext().getOrLoadDebugTypesData()
1003              : m_dwarf.GetDWARFContext().getOrLoadDebugInfoData();
1004 }
1005 
1006 uint32_t DWARFUnit::GetHeaderByteSize() const {
1007   switch (m_header.GetUnitType()) {
1008   case llvm::dwarf::DW_UT_compile:
1009   case llvm::dwarf::DW_UT_partial:
1010     return GetVersion() < 5 ? 11 : 12;
1011   case llvm::dwarf::DW_UT_skeleton:
1012   case llvm::dwarf::DW_UT_split_compile:
1013     return 20;
1014   case llvm::dwarf::DW_UT_type:
1015   case llvm::dwarf::DW_UT_split_type:
1016     return GetVersion() < 5 ? 23 : 24;
1017   }
1018   llvm_unreachable("invalid UnitType.");
1019 }
1020 
1021 std::optional<uint64_t>
1022 DWARFUnit::GetStringOffsetSectionItem(uint32_t index) const {
1023   offset_t offset = GetStrOffsetsBase() + index * 4;
1024   return m_dwarf.GetDWARFContext().getOrLoadStrOffsetsData().GetU32(&offset);
1025 }
1026 
1027 llvm::Expected<DWARFRangeList>
1028 DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) {
1029   if (GetVersion() <= 4) {
1030     const DWARFDebugRanges *debug_ranges = m_dwarf.GetDebugRanges();
1031     if (!debug_ranges)
1032       return llvm::make_error<llvm::object::GenericBinaryError>(
1033           "No debug_ranges section");
1034     DWARFRangeList ranges;
1035     debug_ranges->FindRanges(this, offset, ranges);
1036     return ranges;
1037   }
1038 
1039   if (!GetRnglistTable())
1040     return llvm::createStringError(std::errc::invalid_argument,
1041                                    "missing or invalid range list table");
1042 
1043   llvm::DWARFDataExtractor data = GetRnglistData().GetAsLLVM();
1044 
1045   // As DW_AT_rnglists_base may be missing we need to call setAddressSize.
1046   data.setAddressSize(m_header.GetAddressByteSize());
1047   auto range_list_or_error = GetRnglistTable()->findList(data, offset);
1048   if (!range_list_or_error)
1049     return range_list_or_error.takeError();
1050 
1051   llvm::Expected<llvm::DWARFAddressRangesVector> llvm_ranges =
1052       range_list_or_error->getAbsoluteRanges(
1053           llvm::object::SectionedAddress{GetBaseAddress()},
1054           GetAddressByteSize(), [&](uint32_t index) {
1055             uint32_t index_size = GetAddressByteSize();
1056             dw_offset_t addr_base = GetAddrBase();
1057             lldb::offset_t offset =
1058                 addr_base + static_cast<lldb::offset_t>(index) * index_size;
1059             return llvm::object::SectionedAddress{
1060                 m_dwarf.GetDWARFContext().getOrLoadAddrData().GetMaxU64(
1061                     &offset, index_size)};
1062           });
1063   if (!llvm_ranges)
1064     return llvm_ranges.takeError();
1065 
1066   DWARFRangeList ranges;
1067   for (const llvm::DWARFAddressRange &llvm_range : *llvm_ranges) {
1068     ranges.Append(DWARFRangeList::Entry(llvm_range.LowPC,
1069                                         llvm_range.HighPC - llvm_range.LowPC));
1070   }
1071   return ranges;
1072 }
1073 
1074 llvm::Expected<DWARFRangeList>
1075 DWARFUnit::FindRnglistFromIndex(uint32_t index) {
1076   llvm::Expected<uint64_t> maybe_offset = GetRnglistOffset(index);
1077   if (!maybe_offset)
1078     return maybe_offset.takeError();
1079   return FindRnglistFromOffset(*maybe_offset);
1080 }
1081 
1082 
1083 bool DWARFUnit::HasAny(llvm::ArrayRef<dw_tag_t> tags) {
1084   ExtractUnitDIEIfNeeded();
1085   if (m_dwo)
1086     return m_dwo->HasAny(tags);
1087 
1088   for (const auto &die: m_die_array) {
1089     for (const auto tag: tags) {
1090       if (tag == die.Tag())
1091         return true;
1092     }
1093   }
1094   return false;
1095 }
1096