1 //===- DWARFCFIProgram.h ----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_DWARF_LOWLEVEL_DWARFCFIPROGRAM_H 10 #define LLVM_DEBUGINFO_DWARF_LOWLEVEL_DWARFCFIPROGRAM_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/ADT/SmallString.h" 14 #include "llvm/ADT/iterator.h" 15 #include "llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h" 16 #include "llvm/DebugInfo/DWARF/LowLevel/DWARFExpression.h" 17 #include "llvm/Support/Compiler.h" 18 #include "llvm/Support/Error.h" 19 #include "llvm/TargetParser/Triple.h" 20 #include <map> 21 #include <memory> 22 #include <vector> 23 24 namespace llvm { 25 26 namespace dwarf { 27 28 /// Represent a sequence of Call Frame Information instructions that, when read 29 /// in order, construct a table mapping PC to frame state. This can also be 30 /// referred to as "CFI rules" in DWARF literature to avoid confusion with 31 /// computer programs in the broader sense, and in this context each instruction 32 /// would be a rule to establish the mapping. Refer to pg. 172 in the DWARF5 33 /// manual, "6.4.1 Structure of Call Frame Information". 34 class CFIProgram { 35 public: 36 static constexpr size_t MaxOperands = 3; 37 typedef SmallVector<uint64_t, MaxOperands> Operands; 38 39 /// An instruction consists of a DWARF CFI opcode and an optional sequence of 40 /// operands. If it refers to an expression, then this expression has its own 41 /// sequence of operations and operands handled separately by DWARFExpression. 42 struct Instruction { InstructionInstruction43 Instruction(uint8_t Opcode) : Opcode(Opcode) {} 44 45 uint8_t Opcode; 46 Operands Ops; 47 // Associated DWARF expression in case this instruction refers to one 48 std::optional<DWARFExpression> Expression; 49 50 LLVM_ABI Expected<uint64_t> getOperandAsUnsigned(const CFIProgram &CFIP, 51 uint32_t OperandIdx) const; 52 53 LLVM_ABI Expected<int64_t> getOperandAsSigned(const CFIProgram &CFIP, 54 uint32_t OperandIdx) const; 55 }; 56 57 using InstrList = std::vector<Instruction>; 58 using iterator = InstrList::iterator; 59 using const_iterator = InstrList::const_iterator; 60 begin()61 iterator begin() { return Instructions.begin(); } begin()62 const_iterator begin() const { return Instructions.begin(); } end()63 iterator end() { return Instructions.end(); } end()64 const_iterator end() const { return Instructions.end(); } 65 size()66 unsigned size() const { return (unsigned)Instructions.size(); } empty()67 bool empty() const { return Instructions.empty(); } codeAlign()68 uint64_t codeAlign() const { return CodeAlignmentFactor; } dataAlign()69 int64_t dataAlign() const { return DataAlignmentFactor; } triple()70 Triple::ArchType triple() const { return Arch; } 71 CFIProgram(uint64_t CodeAlignmentFactor,int64_t DataAlignmentFactor,Triple::ArchType Arch)72 CFIProgram(uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor, 73 Triple::ArchType Arch) 74 : CodeAlignmentFactor(CodeAlignmentFactor), 75 DataAlignmentFactor(DataAlignmentFactor), Arch(Arch) {} 76 77 /// Parse and store a sequence of CFI instructions from Data, 78 /// starting at *Offset and ending at EndOffset. *Offset is updated 79 /// to EndOffset upon successful parsing, or indicates the offset 80 /// where a problem occurred in case an error is returned. 81 template <typename T> parse(DWARFDataExtractorBase<T> & Data,uint64_t * Offset,uint64_t EndOffset)82 Error parse(DWARFDataExtractorBase<T> &Data, uint64_t *Offset, 83 uint64_t EndOffset) { 84 // See DWARF standard v3, section 7.23 85 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; 86 const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; 87 88 DataExtractor::Cursor C(*Offset); 89 while (C && C.tell() < EndOffset) { 90 uint8_t Opcode = Data.getRelocatedValue(C, 1); 91 if (!C) 92 break; 93 94 // Some instructions have a primary opcode encoded in the top bits. 95 if (uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) { 96 // If it's a primary opcode, the first operand is encoded in the 97 // bottom bits of the opcode itself. 98 uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK; 99 switch (Primary) { 100 case DW_CFA_advance_loc: 101 case DW_CFA_restore: 102 addInstruction(Primary, Op1); 103 break; 104 case DW_CFA_offset: 105 addInstruction(Primary, Op1, Data.getULEB128(C)); 106 break; 107 default: 108 llvm_unreachable("invalid primary CFI opcode"); 109 } 110 continue; 111 } 112 113 // Extended opcode - its value is Opcode itself. 114 switch (Opcode) { 115 default: 116 return createStringError(errc::illegal_byte_sequence, 117 "invalid extended CFI opcode 0x%" PRIx8, 118 Opcode); 119 case DW_CFA_nop: 120 case DW_CFA_remember_state: 121 case DW_CFA_restore_state: 122 case DW_CFA_GNU_window_save: 123 case DW_CFA_AARCH64_negate_ra_state_with_pc: 124 // No operands 125 addInstruction(Opcode); 126 break; 127 case DW_CFA_set_loc: 128 // Operands: Address 129 addInstruction(Opcode, Data.getRelocatedAddress(C)); 130 break; 131 case DW_CFA_advance_loc1: 132 // Operands: 1-byte delta 133 addInstruction(Opcode, Data.getRelocatedValue(C, 1)); 134 break; 135 case DW_CFA_advance_loc2: 136 // Operands: 2-byte delta 137 addInstruction(Opcode, Data.getRelocatedValue(C, 2)); 138 break; 139 case DW_CFA_advance_loc4: 140 // Operands: 4-byte delta 141 addInstruction(Opcode, Data.getRelocatedValue(C, 4)); 142 break; 143 case DW_CFA_restore_extended: 144 case DW_CFA_undefined: 145 case DW_CFA_same_value: 146 case DW_CFA_def_cfa_register: 147 case DW_CFA_def_cfa_offset: 148 case DW_CFA_GNU_args_size: 149 // Operands: ULEB128 150 addInstruction(Opcode, Data.getULEB128(C)); 151 break; 152 case DW_CFA_def_cfa_offset_sf: 153 // Operands: SLEB128 154 addInstruction(Opcode, Data.getSLEB128(C)); 155 break; 156 case DW_CFA_LLVM_def_aspace_cfa: 157 case DW_CFA_LLVM_def_aspace_cfa_sf: { 158 auto RegNum = Data.getULEB128(C); 159 auto CfaOffset = Opcode == DW_CFA_LLVM_def_aspace_cfa 160 ? Data.getULEB128(C) 161 : Data.getSLEB128(C); 162 auto AddressSpace = Data.getULEB128(C); 163 addInstruction(Opcode, RegNum, CfaOffset, AddressSpace); 164 break; 165 } 166 case DW_CFA_offset_extended: 167 case DW_CFA_register: 168 case DW_CFA_def_cfa: 169 case DW_CFA_val_offset: { 170 // Operands: ULEB128, ULEB128 171 // Note: We can not embed getULEB128 directly into function 172 // argument list. getULEB128 changes Offset and order of evaluation 173 // for arguments is unspecified. 174 uint64_t op1 = Data.getULEB128(C); 175 uint64_t op2 = Data.getULEB128(C); 176 addInstruction(Opcode, op1, op2); 177 break; 178 } 179 case DW_CFA_offset_extended_sf: 180 case DW_CFA_def_cfa_sf: 181 case DW_CFA_val_offset_sf: { 182 // Operands: ULEB128, SLEB128 183 // Note: see comment for the previous case 184 uint64_t op1 = Data.getULEB128(C); 185 uint64_t op2 = (uint64_t)Data.getSLEB128(C); 186 addInstruction(Opcode, op1, op2); 187 break; 188 } 189 case DW_CFA_def_cfa_expression: { 190 uint64_t ExprLength = Data.getULEB128(C); 191 addInstruction(Opcode, 0); 192 StringRef Expression = Data.getBytes(C, ExprLength); 193 194 DataExtractor Extractor(Expression, Data.isLittleEndian(), 195 Data.getAddressSize()); 196 // Note. We do not pass the DWARF format to DWARFExpression, because 197 // DW_OP_call_ref, the only operation which depends on the format, is 198 // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5. 199 Instructions.back().Expression = 200 DWARFExpression(Extractor, Data.getAddressSize()); 201 break; 202 } 203 case DW_CFA_expression: 204 case DW_CFA_val_expression: { 205 uint64_t RegNum = Data.getULEB128(C); 206 addInstruction(Opcode, RegNum, 0); 207 208 uint64_t BlockLength = Data.getULEB128(C); 209 StringRef Expression = Data.getBytes(C, BlockLength); 210 DataExtractor Extractor(Expression, Data.isLittleEndian(), 211 Data.getAddressSize()); 212 // Note. We do not pass the DWARF format to DWARFExpression, because 213 // DW_OP_call_ref, the only operation which depends on the format, is 214 // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5. 215 Instructions.back().Expression = 216 DWARFExpression(Extractor, Data.getAddressSize()); 217 break; 218 } 219 } 220 } 221 222 *Offset = C.tell(); 223 return C.takeError(); 224 } 225 addInstruction(const Instruction & I)226 void addInstruction(const Instruction &I) { Instructions.push_back(I); } 227 228 /// Get a DWARF CFI call frame string for the given DW_CFA opcode. 229 LLVM_ABI StringRef callFrameString(unsigned Opcode) const; 230 231 /// Types of operands to CFI instructions 232 /// In DWARF, this type is implicitly tied to a CFI instruction opcode and 233 /// thus this type doesn't need to be explicitly written to the file (this is 234 /// not a DWARF encoding). The relationship of instrs to operand types can 235 /// be obtained from getOperandTypes() and is only used to simplify 236 /// instruction printing and error messages. 237 enum OperandType { 238 OT_Unset, 239 OT_None, 240 OT_Address, 241 OT_Offset, 242 OT_FactoredCodeOffset, 243 OT_SignedFactDataOffset, 244 OT_UnsignedFactDataOffset, 245 OT_Register, 246 OT_AddressSpace, 247 OT_Expression 248 }; 249 250 /// Get the OperandType as a "const char *". 251 LLVM_ABI static const char *operandTypeString(OperandType OT); 252 253 /// Retrieve the array describing the types of operands according to the enum 254 /// above. This is indexed by opcode. 255 LLVM_ABI static ArrayRef<OperandType[MaxOperands]> getOperandTypes(); 256 257 /// Convenience method to add a new instruction with the given opcode. addInstruction(uint8_t Opcode)258 void addInstruction(uint8_t Opcode) { 259 Instructions.push_back(Instruction(Opcode)); 260 } 261 262 /// Add a new single-operand instruction. addInstruction(uint8_t Opcode,uint64_t Operand1)263 void addInstruction(uint8_t Opcode, uint64_t Operand1) { 264 Instructions.push_back(Instruction(Opcode)); 265 Instructions.back().Ops.push_back(Operand1); 266 } 267 268 /// Add a new instruction that has two operands. addInstruction(uint8_t Opcode,uint64_t Operand1,uint64_t Operand2)269 void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2) { 270 Instructions.push_back(Instruction(Opcode)); 271 Instructions.back().Ops.push_back(Operand1); 272 Instructions.back().Ops.push_back(Operand2); 273 } 274 275 /// Add a new instruction that has three operands. addInstruction(uint8_t Opcode,uint64_t Operand1,uint64_t Operand2,uint64_t Operand3)276 void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2, 277 uint64_t Operand3) { 278 Instructions.push_back(Instruction(Opcode)); 279 Instructions.back().Ops.push_back(Operand1); 280 Instructions.back().Ops.push_back(Operand2); 281 Instructions.back().Ops.push_back(Operand3); 282 } 283 284 private: 285 std::vector<Instruction> Instructions; 286 const uint64_t CodeAlignmentFactor; 287 const int64_t DataAlignmentFactor; 288 Triple::ArchType Arch; 289 }; 290 291 } // end namespace dwarf 292 293 } // end namespace llvm 294 295 #endif // LLVM_DEBUGINFO_DWARF_LOWLEVEL_DWARFCFIPROGRAM_H 296