1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/LLDBLog.h"
19 #include "lldb/Utility/Log.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/BinaryFormat/Magic.h"
24 #include "llvm/BinaryFormat/Wasm.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Format.h"
27 #include <optional>
28
29 using namespace lldb;
30 using namespace lldb_private;
31 using namespace lldb_private::wasm;
32
33 LLDB_PLUGIN_DEFINE(ObjectFileWasm)
34
35 static const uint32_t kWasmHeaderSize =
36 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
37
38 /// Checks whether the data buffer starts with a valid Wasm module header.
ValidateModuleHeader(const DataBufferSP & data_sp)39 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
40 if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
41 return false;
42
43 if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
44 llvm::file_magic::wasm_object)
45 return false;
46
47 const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
48
49 uint32_t version = llvm::support::endian::read32le(Ptr);
50 return version == llvm::wasm::WasmVersion;
51 }
52
53 static std::optional<ConstString>
GetWasmString(llvm::DataExtractor & data,llvm::DataExtractor::Cursor & c)54 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
55 // A Wasm string is encoded as a vector of UTF-8 codes.
56 // Vectors are encoded with their u32 length followed by the element
57 // sequence.
58 uint64_t len = data.getULEB128(c);
59 if (!c) {
60 consumeError(c.takeError());
61 return std::nullopt;
62 }
63
64 if (len >= (uint64_t(1) << 32)) {
65 return std::nullopt;
66 }
67
68 llvm::SmallVector<uint8_t, 32> str_storage;
69 data.getU8(c, str_storage, len);
70 if (!c) {
71 consumeError(c.takeError());
72 return std::nullopt;
73 }
74
75 llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage));
76 return ConstString(str);
77 }
78
79 char ObjectFileWasm::ID;
80
Initialize()81 void ObjectFileWasm::Initialize() {
82 PluginManager::RegisterPlugin(GetPluginNameStatic(),
83 GetPluginDescriptionStatic(), CreateInstance,
84 CreateMemoryInstance, GetModuleSpecifications);
85 }
86
Terminate()87 void ObjectFileWasm::Terminate() {
88 PluginManager::UnregisterPlugin(CreateInstance);
89 }
90
91 ObjectFile *
CreateInstance(const ModuleSP & module_sp,DataBufferSP data_sp,offset_t data_offset,const FileSpec * file,offset_t file_offset,offset_t length)92 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp,
93 offset_t data_offset, const FileSpec *file,
94 offset_t file_offset, offset_t length) {
95 Log *log = GetLog(LLDBLog::Object);
96
97 if (!data_sp) {
98 data_sp = MapFileData(*file, length, file_offset);
99 if (!data_sp) {
100 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
101 file->GetPath().c_str());
102 return nullptr;
103 }
104 data_offset = 0;
105 }
106
107 assert(data_sp);
108 if (!ValidateModuleHeader(data_sp)) {
109 LLDB_LOGF(log,
110 "Failed to create ObjectFileWasm instance: invalid Wasm header");
111 return nullptr;
112 }
113
114 // Update the data to contain the entire file if it doesn't contain it
115 // already.
116 if (data_sp->GetByteSize() < length) {
117 data_sp = MapFileData(*file, length, file_offset);
118 if (!data_sp) {
119 LLDB_LOGF(log,
120 "Failed to create ObjectFileWasm instance: cannot read file %s",
121 file->GetPath().c_str());
122 return nullptr;
123 }
124 data_offset = 0;
125 }
126
127 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
128 module_sp, data_sp, data_offset, file, file_offset, length));
129 ArchSpec spec = objfile_up->GetArchitecture();
130 if (spec && objfile_up->SetModulesArchitecture(spec)) {
131 LLDB_LOGF(log,
132 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
133 static_cast<void *>(objfile_up.get()),
134 static_cast<void *>(objfile_up->GetModule().get()),
135 objfile_up->GetModule()->GetSpecificationDescription().c_str(),
136 file ? file->GetPath().c_str() : "<NULL>");
137 return objfile_up.release();
138 }
139
140 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
141 return nullptr;
142 }
143
CreateMemoryInstance(const ModuleSP & module_sp,WritableDataBufferSP data_sp,const ProcessSP & process_sp,addr_t header_addr)144 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
145 WritableDataBufferSP data_sp,
146 const ProcessSP &process_sp,
147 addr_t header_addr) {
148 if (!ValidateModuleHeader(data_sp))
149 return nullptr;
150
151 std::unique_ptr<ObjectFileWasm> objfile_up(
152 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
153 ArchSpec spec = objfile_up->GetArchitecture();
154 if (spec && objfile_up->SetModulesArchitecture(spec))
155 return objfile_up.release();
156 return nullptr;
157 }
158
DecodeNextSection(lldb::offset_t * offset_ptr)159 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
160 // Buffer sufficient to read a section header and find the pointer to the next
161 // section.
162 const uint32_t kBufferSize = 1024;
163 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
164
165 llvm::DataExtractor data = section_header_data.GetAsLLVM();
166 llvm::DataExtractor::Cursor c(0);
167
168 // Each section consists of:
169 // - a one-byte section id,
170 // - the u32 size of the contents, in bytes,
171 // - the actual contents.
172 uint8_t section_id = data.getU8(c);
173 uint64_t payload_len = data.getULEB128(c);
174 if (!c)
175 return !llvm::errorToBool(c.takeError());
176
177 if (payload_len >= (uint64_t(1) << 32))
178 return false;
179
180 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
181 // Custom sections have the id 0. Their contents consist of a name
182 // identifying the custom section, followed by an uninterpreted sequence
183 // of bytes.
184 lldb::offset_t prev_offset = c.tell();
185 std::optional<ConstString> sect_name = GetWasmString(data, c);
186 if (!sect_name)
187 return false;
188
189 if (payload_len < c.tell() - prev_offset)
190 return false;
191
192 uint32_t section_length = payload_len - (c.tell() - prev_offset);
193 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
194 section_id, *sect_name});
195 *offset_ptr += (c.tell() + section_length);
196 } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
198 static_cast<uint32_t>(payload_len),
199 section_id, ConstString()});
200 *offset_ptr += (c.tell() + payload_len);
201 } else {
202 // Invalid section id.
203 return false;
204 }
205 return true;
206 }
207
DecodeSections()208 bool ObjectFileWasm::DecodeSections() {
209 lldb::offset_t offset = kWasmHeaderSize;
210 if (IsInMemory()) {
211 offset += m_memory_addr;
212 }
213
214 while (DecodeNextSection(&offset))
215 ;
216 return true;
217 }
218
GetModuleSpecifications(const FileSpec & file,DataBufferSP & data_sp,offset_t data_offset,offset_t file_offset,offset_t length,ModuleSpecList & specs)219 size_t ObjectFileWasm::GetModuleSpecifications(
220 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
221 offset_t file_offset, offset_t length, ModuleSpecList &specs) {
222 if (!ValidateModuleHeader(data_sp)) {
223 return 0;
224 }
225
226 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
227 specs.Append(spec);
228 return 1;
229 }
230
ObjectFileWasm(const ModuleSP & module_sp,DataBufferSP data_sp,offset_t data_offset,const FileSpec * file,offset_t offset,offset_t length)231 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp,
232 offset_t data_offset, const FileSpec *file,
233 offset_t offset, offset_t length)
234 : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
235 m_arch("wasm32-unknown-unknown-wasm") {
236 m_data.SetAddressByteSize(4);
237 }
238
ObjectFileWasm(const lldb::ModuleSP & module_sp,lldb::WritableDataBufferSP header_data_sp,const lldb::ProcessSP & process_sp,lldb::addr_t header_addr)239 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
240 lldb::WritableDataBufferSP header_data_sp,
241 const lldb::ProcessSP &process_sp,
242 lldb::addr_t header_addr)
243 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
244 m_arch("wasm32-unknown-unknown-wasm") {}
245
ParseHeader()246 bool ObjectFileWasm::ParseHeader() {
247 // We already parsed the header during initialization.
248 return true;
249 }
250
ParseSymtab(Symtab & symtab)251 void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
252
GetSectionTypeFromName(llvm::StringRef Name)253 static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
254 if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
255 return llvm::StringSwitch<SectionType>(Name)
256 .Case("abbrev", eSectionTypeDWARFDebugAbbrev)
257 .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo)
258 .Case("addr", eSectionTypeDWARFDebugAddr)
259 .Case("aranges", eSectionTypeDWARFDebugAranges)
260 .Case("cu_index", eSectionTypeDWARFDebugCuIndex)
261 .Case("frame", eSectionTypeDWARFDebugFrame)
262 .Case("info", eSectionTypeDWARFDebugInfo)
263 .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo)
264 .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine)
265 .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr)
266 .Case("loc", eSectionTypeDWARFDebugLoc)
267 .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo)
268 .Case("loclists", eSectionTypeDWARFDebugLocLists)
269 .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo)
270 .Case("macinfo", eSectionTypeDWARFDebugMacInfo)
271 .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro)
272 .Case("names", eSectionTypeDWARFDebugNames)
273 .Case("pubnames", eSectionTypeDWARFDebugPubNames)
274 .Case("pubtypes", eSectionTypeDWARFDebugPubTypes)
275 .Case("ranges", eSectionTypeDWARFDebugRanges)
276 .Case("rnglists", eSectionTypeDWARFDebugRngLists)
277 .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo)
278 .Case("str", eSectionTypeDWARFDebugStr)
279 .Case("str.dwo", eSectionTypeDWARFDebugStrDwo)
280 .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets)
281 .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo)
282 .Case("tu_index", eSectionTypeDWARFDebugTuIndex)
283 .Case("types", eSectionTypeDWARFDebugTypes)
284 .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo)
285 .Default(eSectionTypeOther);
286 }
287 return eSectionTypeOther;
288 }
289
CreateSections(SectionList & unified_section_list)290 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
291 if (m_sections_up)
292 return;
293
294 m_sections_up = std::make_unique<SectionList>();
295
296 if (m_sect_infos.empty()) {
297 DecodeSections();
298 }
299
300 for (const section_info §_info : m_sect_infos) {
301 SectionType section_type = eSectionTypeOther;
302 ConstString section_name;
303 offset_t file_offset = sect_info.offset & 0xffffffff;
304 addr_t vm_addr = file_offset;
305 size_t vm_size = sect_info.size;
306
307 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
308 section_type = eSectionTypeCode;
309 section_name = ConstString("code");
310
311 // A code address in DWARF for WebAssembly is the offset of an
312 // instruction relative within the Code section of the WebAssembly file.
313 // For this reason Section::GetFileAddress() must return zero for the
314 // Code section.
315 vm_addr = 0;
316 } else {
317 section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
318 if (section_type == eSectionTypeOther)
319 continue;
320 section_name = sect_info.name;
321 if (!IsInMemory()) {
322 vm_size = 0;
323 vm_addr = 0;
324 }
325 }
326
327 SectionSP section_sp(
328 new Section(GetModule(), // Module to which this section belongs.
329 this, // ObjectFile to which this section belongs and
330 // should read section data from.
331 section_type, // Section ID.
332 section_name, // Section name.
333 section_type, // Section type.
334 vm_addr, // VM address.
335 vm_size, // VM size in bytes of this section.
336 file_offset, // Offset of this section in the file.
337 sect_info.size, // Size of the section as found in the file.
338 0, // Alignment of the section
339 0, // Flags for this section.
340 1)); // Number of host bytes per target byte
341 m_sections_up->AddSection(section_sp);
342 unified_section_list.AddSection(section_sp);
343 }
344 }
345
SetLoadAddress(Target & target,lldb::addr_t load_address,bool value_is_offset)346 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
347 bool value_is_offset) {
348 /// In WebAssembly, linear memory is disjointed from code space. The VM can
349 /// load multiple instances of a module, which logically share the same code.
350 /// We represent a wasm32 code address with 64-bits, like:
351 /// 63 32 31 0
352 /// +---------------+---------------+
353 /// + module_id | offset |
354 /// +---------------+---------------+
355 /// where the lower 32 bits represent a module offset (relative to the module
356 /// start not to the beginning of the code section) and the higher 32 bits
357 /// uniquely identify the module in the WebAssembly VM.
358 /// In other words, we assume that each WebAssembly module is loaded by the
359 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
360 /// 0x0000000400000000 for module_id == 4.
361 /// These 64-bit addresses will be used to request code ranges for a specific
362 /// module from the WebAssembly engine.
363
364 assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
365 m_memory_addr == load_address);
366
367 ModuleSP module_sp = GetModule();
368 if (!module_sp)
369 return false;
370
371 DecodeSections();
372
373 size_t num_loaded_sections = 0;
374 SectionList *section_list = GetSectionList();
375 if (!section_list)
376 return false;
377
378 const size_t num_sections = section_list->GetSize();
379 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
380 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
381 if (target.SetSectionLoadAddress(
382 section_sp, load_address | section_sp->GetFileOffset())) {
383 ++num_loaded_sections;
384 }
385 }
386
387 return num_loaded_sections > 0;
388 }
389
ReadImageData(offset_t offset,uint32_t size)390 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
391 DataExtractor data;
392 if (m_file) {
393 if (offset < GetByteSize()) {
394 size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
395 auto buffer_sp = MapFileData(m_file, size, offset);
396 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
397 }
398 } else {
399 ProcessSP process_sp(m_process_wp.lock());
400 if (process_sp) {
401 auto data_up = std::make_unique<DataBufferHeap>(size, 0);
402 Status readmem_error;
403 size_t bytes_read = process_sp->ReadMemory(
404 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
405 if (bytes_read > 0) {
406 DataBufferSP buffer_sp(data_up.release());
407 data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());
408 }
409 }
410 }
411
412 data.SetByteOrder(GetByteOrder());
413 return data;
414 }
415
GetExternalDebugInfoFileSpec()416 std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
417 static ConstString g_sect_name_external_debug_info("external_debug_info");
418
419 for (const section_info §_info : m_sect_infos) {
420 if (g_sect_name_external_debug_info == sect_info.name) {
421 const uint32_t kBufferSize = 1024;
422 DataExtractor section_header_data =
423 ReadImageData(sect_info.offset, kBufferSize);
424 llvm::DataExtractor data = section_header_data.GetAsLLVM();
425 llvm::DataExtractor::Cursor c(0);
426 std::optional<ConstString> symbols_url = GetWasmString(data, c);
427 if (symbols_url)
428 return FileSpec(symbols_url->GetStringRef());
429 }
430 }
431 return std::nullopt;
432 }
433
Dump(Stream * s)434 void ObjectFileWasm::Dump(Stream *s) {
435 ModuleSP module_sp(GetModule());
436 if (!module_sp)
437 return;
438
439 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
440
441 llvm::raw_ostream &ostream = s->AsRawOstream();
442 ostream << static_cast<void *>(this) << ": ";
443 s->Indent();
444 ostream << "ObjectFileWasm, file = '";
445 m_file.Dump(ostream);
446 ostream << "', arch = ";
447 ostream << GetArchitecture().GetArchitectureName() << "\n";
448
449 SectionList *sections = GetSectionList();
450 if (sections) {
451 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
452 UINT32_MAX);
453 }
454 ostream << "\n";
455 DumpSectionHeaders(ostream);
456 ostream << "\n";
457 }
458
DumpSectionHeader(llvm::raw_ostream & ostream,const section_info_t & sh)459 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
460 const section_info_t &sh) {
461 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
462 << llvm::format_hex(sh.offset, 10) << " "
463 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
464 << "\n";
465 }
466
DumpSectionHeaders(llvm::raw_ostream & ostream)467 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
468 ostream << "Section Headers\n";
469 ostream << "IDX name addr size id\n";
470 ostream << "==== ---------------- ---------- ---------- ------\n";
471
472 uint32_t idx = 0;
473 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
474 ++pos, ++idx) {
475 ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
476 ObjectFileWasm::DumpSectionHeader(ostream, *pos);
477 }
478 }
479