//===- DWARFDebugFrame.h - Parsing of .debug_frame ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include using namespace llvm; using namespace dwarf; // See DWARF standard v3, section 7.23 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset) { DataExtractor::Cursor C(*Offset); while (C && C.tell() < EndOffset) { uint8_t Opcode = Data.getRelocatedValue(C, 1); if (!C) break; // Some instructions have a primary opcode encoded in the top bits. if (uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) { // If it's a primary opcode, the first operand is encoded in the bottom // bits of the opcode itself. uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK; switch (Primary) { case DW_CFA_advance_loc: case DW_CFA_restore: addInstruction(Primary, Op1); break; case DW_CFA_offset: addInstruction(Primary, Op1, Data.getULEB128(C)); break; default: llvm_unreachable("invalid primary CFI opcode"); } continue; } // Extended opcode - its value is Opcode itself. switch (Opcode) { default: return createStringError(errc::illegal_byte_sequence, "invalid extended CFI opcode 0x%" PRIx8, Opcode); case DW_CFA_nop: case DW_CFA_remember_state: case DW_CFA_restore_state: case DW_CFA_GNU_window_save: // No operands addInstruction(Opcode); break; case DW_CFA_set_loc: // Operands: Address addInstruction(Opcode, Data.getRelocatedAddress(C)); break; case DW_CFA_advance_loc1: // Operands: 1-byte delta addInstruction(Opcode, Data.getRelocatedValue(C, 1)); break; case DW_CFA_advance_loc2: // Operands: 2-byte delta addInstruction(Opcode, Data.getRelocatedValue(C, 2)); break; case DW_CFA_advance_loc4: // Operands: 4-byte delta addInstruction(Opcode, Data.getRelocatedValue(C, 4)); break; case DW_CFA_restore_extended: case DW_CFA_undefined: case DW_CFA_same_value: case DW_CFA_def_cfa_register: case DW_CFA_def_cfa_offset: case DW_CFA_GNU_args_size: // Operands: ULEB128 addInstruction(Opcode, Data.getULEB128(C)); break; case DW_CFA_def_cfa_offset_sf: // Operands: SLEB128 addInstruction(Opcode, Data.getSLEB128(C)); break; case DW_CFA_offset_extended: case DW_CFA_register: case DW_CFA_def_cfa: case DW_CFA_val_offset: { // Operands: ULEB128, ULEB128 // Note: We can not embed getULEB128 directly into function // argument list. getULEB128 changes Offset and order of evaluation // for arguments is unspecified. uint64_t op1 = Data.getULEB128(C); uint64_t op2 = Data.getULEB128(C); addInstruction(Opcode, op1, op2); break; } case DW_CFA_offset_extended_sf: case DW_CFA_def_cfa_sf: case DW_CFA_val_offset_sf: { // Operands: ULEB128, SLEB128 // Note: see comment for the previous case uint64_t op1 = Data.getULEB128(C); uint64_t op2 = (uint64_t)Data.getSLEB128(C); addInstruction(Opcode, op1, op2); break; } case DW_CFA_def_cfa_expression: { uint64_t ExprLength = Data.getULEB128(C); addInstruction(Opcode, 0); StringRef Expression = Data.getBytes(C, ExprLength); DataExtractor Extractor(Expression, Data.isLittleEndian(), Data.getAddressSize()); // Note. We do not pass the DWARF format to DWARFExpression, because // DW_OP_call_ref, the only operation which depends on the format, is // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5. Instructions.back().Expression = DWARFExpression(Extractor, Data.getAddressSize()); break; } case DW_CFA_expression: case DW_CFA_val_expression: { uint64_t RegNum = Data.getULEB128(C); addInstruction(Opcode, RegNum, 0); uint64_t BlockLength = Data.getULEB128(C); StringRef Expression = Data.getBytes(C, BlockLength); DataExtractor Extractor(Expression, Data.isLittleEndian(), Data.getAddressSize()); // Note. We do not pass the DWARF format to DWARFExpression, because // DW_OP_call_ref, the only operation which depends on the format, is // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5. Instructions.back().Expression = DWARFExpression(Extractor, Data.getAddressSize()); break; } } } *Offset = C.tell(); return C.takeError(); } namespace { } // end anonymous namespace ArrayRef CFIProgram::getOperandTypes() { static OperandType OpTypes[DW_CFA_restore+1][2]; static bool Initialized = false; if (Initialized) { return ArrayRef(&OpTypes[0], DW_CFA_restore+1); } Initialized = true; #define DECLARE_OP2(OP, OPTYPE0, OPTYPE1) \ do { \ OpTypes[OP][0] = OPTYPE0; \ OpTypes[OP][1] = OPTYPE1; \ } while (false) #define DECLARE_OP1(OP, OPTYPE0) DECLARE_OP2(OP, OPTYPE0, OT_None) #define DECLARE_OP0(OP) DECLARE_OP1(OP, OT_None) DECLARE_OP1(DW_CFA_set_loc, OT_Address); DECLARE_OP1(DW_CFA_advance_loc, OT_FactoredCodeOffset); DECLARE_OP1(DW_CFA_advance_loc1, OT_FactoredCodeOffset); DECLARE_OP1(DW_CFA_advance_loc2, OT_FactoredCodeOffset); DECLARE_OP1(DW_CFA_advance_loc4, OT_FactoredCodeOffset); DECLARE_OP1(DW_CFA_MIPS_advance_loc8, OT_FactoredCodeOffset); DECLARE_OP2(DW_CFA_def_cfa, OT_Register, OT_Offset); DECLARE_OP2(DW_CFA_def_cfa_sf, OT_Register, OT_SignedFactDataOffset); DECLARE_OP1(DW_CFA_def_cfa_register, OT_Register); DECLARE_OP1(DW_CFA_def_cfa_offset, OT_Offset); DECLARE_OP1(DW_CFA_def_cfa_offset_sf, OT_SignedFactDataOffset); DECLARE_OP1(DW_CFA_def_cfa_expression, OT_Expression); DECLARE_OP1(DW_CFA_undefined, OT_Register); DECLARE_OP1(DW_CFA_same_value, OT_Register); DECLARE_OP2(DW_CFA_offset, OT_Register, OT_UnsignedFactDataOffset); DECLARE_OP2(DW_CFA_offset_extended, OT_Register, OT_UnsignedFactDataOffset); DECLARE_OP2(DW_CFA_offset_extended_sf, OT_Register, OT_SignedFactDataOffset); DECLARE_OP2(DW_CFA_val_offset, OT_Register, OT_UnsignedFactDataOffset); DECLARE_OP2(DW_CFA_val_offset_sf, OT_Register, OT_SignedFactDataOffset); DECLARE_OP2(DW_CFA_register, OT_Register, OT_Register); DECLARE_OP2(DW_CFA_expression, OT_Register, OT_Expression); DECLARE_OP2(DW_CFA_val_expression, OT_Register, OT_Expression); DECLARE_OP1(DW_CFA_restore, OT_Register); DECLARE_OP1(DW_CFA_restore_extended, OT_Register); DECLARE_OP0(DW_CFA_remember_state); DECLARE_OP0(DW_CFA_restore_state); DECLARE_OP0(DW_CFA_GNU_window_save); DECLARE_OP1(DW_CFA_GNU_args_size, OT_Offset); DECLARE_OP0(DW_CFA_nop); #undef DECLARE_OP0 #undef DECLARE_OP1 #undef DECLARE_OP2 return ArrayRef(&OpTypes[0], DW_CFA_restore+1); } /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand. void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, const Instruction &Instr, unsigned OperandIdx, uint64_t Operand) const { assert(OperandIdx < 2); uint8_t Opcode = Instr.Opcode; OperandType Type = getOperandTypes()[Opcode][OperandIdx]; switch (Type) { case OT_Unset: { OS << " Unsupported " << (OperandIdx ? "second" : "first") << " operand to"; auto OpcodeName = CallFrameString(Opcode, Arch); if (!OpcodeName.empty()) OS << " " << OpcodeName; else OS << format(" Opcode %x", Opcode); break; } case OT_None: break; case OT_Address: OS << format(" %" PRIx64, Operand); break; case OT_Offset: // The offsets are all encoded in a unsigned form, but in practice // consumers use them signed. It's most certainly legacy due to // the lack of signed variants in the first Dwarf standards. OS << format(" %+" PRId64, int64_t(Operand)); break; case OT_FactoredCodeOffset: // Always Unsigned if (CodeAlignmentFactor) OS << format(" %" PRId64, Operand * CodeAlignmentFactor); else OS << format(" %" PRId64 "*code_alignment_factor" , Operand); break; case OT_SignedFactDataOffset: if (DataAlignmentFactor) OS << format(" %" PRId64, int64_t(Operand) * DataAlignmentFactor); else OS << format(" %" PRId64 "*data_alignment_factor" , int64_t(Operand)); break; case OT_UnsignedFactDataOffset: if (DataAlignmentFactor) OS << format(" %" PRId64, Operand * DataAlignmentFactor); else OS << format(" %" PRId64 "*data_alignment_factor" , Operand); break; case OT_Register: OS << format(" reg%" PRId64, Operand); break; case OT_Expression: assert(Instr.Expression && "missing DWARFExpression object"); OS << " "; Instr.Expression->print(OS, MRI, nullptr, IsEH); break; } } void CFIProgram::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, unsigned IndentLevel) const { for (const auto &Instr : Instructions) { uint8_t Opcode = Instr.Opcode; if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; OS.indent(2 * IndentLevel); OS << CallFrameString(Opcode, Arch) << ":"; for (unsigned i = 0; i < Instr.Ops.size(); ++i) printOperand(OS, MRI, IsEH, Instr, i, Instr.Ops[i]); OS << '\n'; } } // Returns the CIE identifier to be used by the requested format. // CIE ids for .debug_frame sections are defined in Section 7.24 of DWARFv5. // For CIE ID in .eh_frame sections see // https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html constexpr uint64_t getCIEId(bool IsDWARF64, bool IsEH) { if (IsEH) return 0; if (IsDWARF64) return DW64_CIE_ID; return DW_CIE_ID; } void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { // A CIE with a zero length is a terminator entry in the .eh_frame section. if (IsEH && Length == 0) { OS << format("%08" PRIx64, Offset) << " ZERO terminator\n"; return; } OS << format("%08" PRIx64, Offset) << format(" %0*" PRIx64, IsDWARF64 ? 16 : 8, Length) << format(" %0*" PRIx64, IsDWARF64 && !IsEH ? 16 : 8, getCIEId(IsDWARF64, IsEH)) << " CIE\n" << " Format: " << FormatString(IsDWARF64) << "\n" << format(" Version: %d\n", Version) << " Augmentation: \"" << Augmentation << "\"\n"; if (Version >= 4) { OS << format(" Address size: %u\n", (uint32_t)AddressSize); OS << format(" Segment desc size: %u\n", (uint32_t)SegmentDescriptorSize); } OS << format(" Code alignment factor: %u\n", (uint32_t)CodeAlignmentFactor); OS << format(" Data alignment factor: %d\n", (int32_t)DataAlignmentFactor); OS << format(" Return address column: %d\n", (int32_t)ReturnAddressRegister); if (Personality) OS << format(" Personality Address: %016" PRIx64 "\n", *Personality); if (!AugmentationData.empty()) { OS << " Augmentation data: "; for (uint8_t Byte : AugmentationData) OS << ' ' << hexdigit(Byte >> 4) << hexdigit(Byte & 0xf); OS << "\n"; } OS << "\n"; CFIs.dump(OS, MRI, IsEH); OS << "\n"; } void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { OS << format("%08" PRIx64, Offset) << format(" %0*" PRIx64, IsDWARF64 ? 16 : 8, Length) << format(" %0*" PRIx64, IsDWARF64 && !IsEH ? 16 : 8, CIEPointer) << " FDE cie="; if (LinkedCIE) OS << format("%08" PRIx64, LinkedCIE->getOffset()); else OS << ""; OS << format(" pc=%08" PRIx64 "...%08" PRIx64 "\n", InitialLocation, InitialLocation + AddressRange); OS << " Format: " << FormatString(IsDWARF64) << "\n"; if (LSDAAddress) OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress); CFIs.dump(OS, MRI, IsEH); OS << "\n"; } DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch, bool IsEH, uint64_t EHFrameAddress) : Arch(Arch), IsEH(IsEH), EHFrameAddress(EHFrameAddress) {} DWARFDebugFrame::~DWARFDebugFrame() = default; static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, uint64_t Offset, int Length) { errs() << "DUMP: "; for (int i = 0; i < Length; ++i) { uint8_t c = Data.getU8(&Offset); errs().write_hex(c); errs() << " "; } errs() << "\n"; } Error DWARFDebugFrame::parse(DWARFDataExtractor Data) { uint64_t Offset = 0; DenseMap CIEs; while (Data.isValidOffset(Offset)) { uint64_t StartOffset = Offset; uint64_t Length; DwarfFormat Format; std::tie(Length, Format) = Data.getInitialLength(&Offset); bool IsDWARF64 = Format == DWARF64; // If the Length is 0, then this CIE is a terminator. We add it because some // dumper tools might need it to print something special for such entries // (e.g. llvm-objdump --dwarf=frames prints "ZERO terminator"). if (Length == 0) { auto Cie = std::make_unique( IsDWARF64, StartOffset, 0, 0, SmallString<8>(), 0, 0, 0, 0, 0, SmallString<8>(), 0, 0, None, None, Arch); CIEs[StartOffset] = Cie.get(); Entries.push_back(std::move(Cie)); break; } // At this point, Offset points to the next field after Length. // Length is the structure size excluding itself. Compute an offset one // past the end of the structure (needed to know how many instructions to // read). uint64_t StartStructureOffset = Offset; uint64_t EndStructureOffset = Offset + Length; // The Id field's size depends on the DWARF format Error Err = Error::success(); uint64_t Id = Data.getRelocatedValue((IsDWARF64 && !IsEH) ? 8 : 4, &Offset, /*SectionIndex=*/nullptr, &Err); if (Err) return Err; if (Id == getCIEId(IsDWARF64, IsEH)) { uint8_t Version = Data.getU8(&Offset); const char *Augmentation = Data.getCStr(&Offset); StringRef AugmentationString(Augmentation ? Augmentation : ""); // TODO: we should provide a way to report a warning and continue dumping. if (IsEH && Version != 1) return createStringError(errc::not_supported, "unsupported CIE version: %" PRIu8, Version); uint8_t AddressSize = Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); Data.setAddressSize(AddressSize); uint8_t SegmentDescriptorSize = Version < 4 ? 0 : Data.getU8(&Offset); uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset); int64_t DataAlignmentFactor = Data.getSLEB128(&Offset); uint64_t ReturnAddressRegister = Version == 1 ? Data.getU8(&Offset) : Data.getULEB128(&Offset); // Parse the augmentation data for EH CIEs StringRef AugmentationData(""); uint32_t FDEPointerEncoding = DW_EH_PE_absptr; uint32_t LSDAPointerEncoding = DW_EH_PE_omit; Optional Personality; Optional PersonalityEncoding; if (IsEH) { Optional AugmentationLength; uint64_t StartAugmentationOffset; uint64_t EndAugmentationOffset; // Walk the augmentation string to get all the augmentation data. for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { switch (AugmentationString[i]) { default: return createStringError( errc::invalid_argument, "unknown augmentation character in entry at 0x%" PRIx64, StartOffset); case 'L': LSDAPointerEncoding = Data.getU8(&Offset); break; case 'P': { if (Personality) return createStringError( errc::invalid_argument, "duplicate personality in entry at 0x%" PRIx64, StartOffset); PersonalityEncoding = Data.getU8(&Offset); Personality = Data.getEncodedPointer( &Offset, *PersonalityEncoding, EHFrameAddress ? EHFrameAddress + Offset : 0); break; } case 'R': FDEPointerEncoding = Data.getU8(&Offset); break; case 'S': // Current frame is a signal trampoline. break; case 'z': if (i) return createStringError( errc::invalid_argument, "'z' must be the first character at 0x%" PRIx64, StartOffset); // Parse the augmentation length first. We only parse it if // the string contains a 'z'. AugmentationLength = Data.getULEB128(&Offset); StartAugmentationOffset = Offset; EndAugmentationOffset = Offset + *AugmentationLength; break; case 'B': // B-Key is used for signing functions associated with this // augmentation string break; } } if (AugmentationLength.hasValue()) { if (Offset != EndAugmentationOffset) return createStringError(errc::invalid_argument, "parsing augmentation data at 0x%" PRIx64 " failed", StartOffset); AugmentationData = Data.getData().slice(StartAugmentationOffset, EndAugmentationOffset); } } auto Cie = std::make_unique( IsDWARF64, StartOffset, Length, Version, AugmentationString, AddressSize, SegmentDescriptorSize, CodeAlignmentFactor, DataAlignmentFactor, ReturnAddressRegister, AugmentationData, FDEPointerEncoding, LSDAPointerEncoding, Personality, PersonalityEncoding, Arch); CIEs[StartOffset] = Cie.get(); Entries.emplace_back(std::move(Cie)); } else { // FDE uint64_t CIEPointer = Id; uint64_t InitialLocation = 0; uint64_t AddressRange = 0; Optional LSDAAddress; CIE *Cie = CIEs[IsEH ? (StartStructureOffset - CIEPointer) : CIEPointer]; if (IsEH) { // The address size is encoded in the CIE we reference. if (!Cie) return createStringError(errc::invalid_argument, "parsing FDE data at 0x%" PRIx64 " failed due to missing CIE", StartOffset); if (auto Val = Data.getEncodedPointer( &Offset, Cie->getFDEPointerEncoding(), EHFrameAddress ? EHFrameAddress + Offset : 0)) { InitialLocation = *Val; } if (auto Val = Data.getEncodedPointer( &Offset, Cie->getFDEPointerEncoding(), 0)) { AddressRange = *Val; } StringRef AugmentationString = Cie->getAugmentationString(); if (!AugmentationString.empty()) { // Parse the augmentation length and data for this FDE. uint64_t AugmentationLength = Data.getULEB128(&Offset); uint64_t EndAugmentationOffset = Offset + AugmentationLength; // Decode the LSDA if the CIE augmentation string said we should. if (Cie->getLSDAPointerEncoding() != DW_EH_PE_omit) { LSDAAddress = Data.getEncodedPointer( &Offset, Cie->getLSDAPointerEncoding(), EHFrameAddress ? Offset + EHFrameAddress : 0); } if (Offset != EndAugmentationOffset) return createStringError(errc::invalid_argument, "parsing augmentation data at 0x%" PRIx64 " failed", StartOffset); } } else { InitialLocation = Data.getRelocatedAddress(&Offset); AddressRange = Data.getRelocatedAddress(&Offset); } Entries.emplace_back(new FDE(IsDWARF64, StartOffset, Length, CIEPointer, InitialLocation, AddressRange, Cie, LSDAAddress, Arch)); } if (Error E = Entries.back()->cfis().parse(Data, &Offset, EndStructureOffset)) return E; if (Offset != EndStructureOffset) return createStringError( errc::invalid_argument, "parsing entry instructions at 0x%" PRIx64 " failed", StartOffset); } return Error::success(); } FrameEntry *DWARFDebugFrame::getEntryAtOffset(uint64_t Offset) const { auto It = partition_point(Entries, [=](const std::unique_ptr &E) { return E->getOffset() < Offset; }); if (It != Entries.end() && (*It)->getOffset() == Offset) return It->get(); return nullptr; } void DWARFDebugFrame::dump(raw_ostream &OS, const MCRegisterInfo *MRI, Optional Offset) const { if (Offset) { if (auto *Entry = getEntryAtOffset(*Offset)) Entry->dump(OS, MRI, IsEH); return; } OS << "\n"; for (const auto &Entry : Entries) Entry->dump(OS, MRI, IsEH); }