1 /** 2 * dwarf_eh.h - Defines some helper functions for parsing DWARF exception 3 * handling tables. 4 * 5 * This file contains various helper functions that are independent of the 6 * language-specific code. It can be used in any personality function for the 7 * Itanium ABI. 8 */ 9 #include <assert.h> 10 11 // TODO: Factor out Itanium / ARM differences. We probably want an itanium.h 12 // and arm.h that can be included by this file depending on the target ABI. 13 14 // _GNU_SOURCE must be defined for unwind.h to expose some of the functions 15 // that we want. If it isn't, then we define it and undefine it to make sure 16 // that it doesn't impact the rest of the program. 17 #ifndef _GNU_SOURCE 18 # define _GNU_SOURCE 1 19 # include "unwind.h" 20 # undef _GNU_SOURCE 21 #else 22 # include "unwind.h" 23 #endif 24 25 #include <stdint.h> 26 27 /// Type used for pointers into DWARF data 28 typedef unsigned char *dw_eh_ptr_t; 29 30 // Flag indicating a signed quantity 31 #define DW_EH_PE_signed 0x08 32 /// DWARF data encoding types. 33 enum dwarf_data_encoding 34 { 35 /// Unsigned, little-endian, base 128-encoded (variable length). 36 DW_EH_PE_uleb128 = 0x01, 37 /// Unsigned 16-bit integer. 38 DW_EH_PE_udata2 = 0x02, 39 /// Unsigned 32-bit integer. 40 DW_EH_PE_udata4 = 0x03, 41 /// Unsigned 64-bit integer. 42 DW_EH_PE_udata8 = 0x04, 43 /// Signed, little-endian, base 128-encoded (variable length) 44 DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed, 45 /// Signed 16-bit integer. 46 DW_EH_PE_sdata2 = DW_EH_PE_udata2 | DW_EH_PE_signed, 47 /// Signed 32-bit integer. 48 DW_EH_PE_sdata4 = DW_EH_PE_udata4 | DW_EH_PE_signed, 49 /// Signed 32-bit integer. 50 DW_EH_PE_sdata8 = DW_EH_PE_udata8 | DW_EH_PE_signed 51 }; 52 53 /** 54 * Returns the encoding for a DWARF EH table entry. The encoding is stored in 55 * the low four of an octet. The high four bits store the addressing mode. 56 */ 57 static inline enum dwarf_data_encoding get_encoding(unsigned char x) 58 { 59 return (enum dwarf_data_encoding)(x & 0xf); 60 } 61 62 /** 63 * DWARF addressing mode constants. When reading a pointer value from a DWARF 64 * exception table, you must know how it is stored and what the addressing mode 65 * is. The low four bits tell you the encoding, allowing you to decode a 66 * number. The high four bits tell you the addressing mode, allowing you to 67 * turn that number into an address in memory. 68 */ 69 enum dwarf_data_relative 70 { 71 /// Value is omitted 72 DW_EH_PE_omit = 0xff, 73 /// Absolute pointer value 74 DW_EH_PE_absptr = 0x00, 75 /// Value relative to program counter 76 DW_EH_PE_pcrel = 0x10, 77 /// Value relative to the text segment 78 DW_EH_PE_textrel = 0x20, 79 /// Value relative to the data segment 80 DW_EH_PE_datarel = 0x30, 81 /// Value relative to the start of the function 82 DW_EH_PE_funcrel = 0x40, 83 /// Aligned pointer (Not supported yet - are they actually used?) 84 DW_EH_PE_aligned = 0x50, 85 /// Pointer points to address of real value 86 DW_EH_PE_indirect = 0x80 87 }; 88 /** 89 * Returns the addressing mode component of this encoding. 90 */ 91 static inline enum dwarf_data_relative get_base(unsigned char x) 92 { 93 return (enum dwarf_data_relative)(x & 0x70); 94 } 95 /** 96 * Returns whether an encoding represents an indirect address. 97 */ 98 static int is_indirect(unsigned char x) 99 { 100 return ((x & DW_EH_PE_indirect) == DW_EH_PE_indirect); 101 } 102 103 /** 104 * Returns the size of a fixed-size encoding. This function will abort if 105 * called with a value that is not a fixed-size encoding. 106 */ 107 static inline int dwarf_size_of_fixed_size_field(unsigned char type) 108 { 109 switch (get_encoding(type)) 110 { 111 default: abort(); 112 case DW_EH_PE_sdata2: 113 case DW_EH_PE_udata2: return 2; 114 case DW_EH_PE_sdata4: 115 case DW_EH_PE_udata4: return 4; 116 case DW_EH_PE_sdata8: 117 case DW_EH_PE_udata8: return 8; 118 case DW_EH_PE_absptr: return sizeof(void*); 119 } 120 } 121 122 /** 123 * Read an unsigned, little-endian, base-128, DWARF value. Updates *data to 124 * point to the end of the value. Stores the number of bits read in the value 125 * pointed to by b, allowing you to determine the value of the highest bit, and 126 * therefore the sign of a signed value. 127 * 128 * This function is not intended to be called directly. Use read_sleb128() or 129 * read_uleb128() for reading signed and unsigned versions, respectively. 130 */ 131 static uint64_t read_leb128(dw_eh_ptr_t *data, int *b) 132 { 133 uint64_t uleb = 0; 134 unsigned int bit = 0; 135 unsigned char digit = 0; 136 // We have to read at least one octet, and keep reading until we get to one 137 // with the high bit unset 138 do 139 { 140 // This check is a bit too strict - we should also check the highest 141 // bit of the digit. 142 assert(bit < sizeof(uint64_t) * 8); 143 // Get the base 128 digit 144 digit = (**data) & 0x7f; 145 // Add it to the current value 146 uleb += digit << bit; 147 // Increase the shift value 148 bit += 7; 149 // Proceed to the next octet 150 (*data)++; 151 // Terminate when we reach a value that does not have the high bit set 152 // (i.e. which was not modified when we mask it with 0x7f) 153 } while ((*(*data - 1)) != digit); 154 *b = bit; 155 156 return uleb; 157 } 158 159 /** 160 * Reads an unsigned little-endian base-128 value starting at the address 161 * pointed to by *data. Updates *data to point to the next byte after the end 162 * of the variable-length value. 163 */ 164 static int64_t read_uleb128(dw_eh_ptr_t *data) 165 { 166 int b; 167 return read_leb128(data, &b); 168 } 169 170 /** 171 * Reads a signed little-endian base-128 value starting at the address pointed 172 * to by *data. Updates *data to point to the next byte after the end of the 173 * variable-length value. 174 */ 175 static int64_t read_sleb128(dw_eh_ptr_t *data) 176 { 177 int bits; 178 // Read as if it's signed 179 uint64_t uleb = read_leb128(data, &bits); 180 // If the most significant bit read is 1, then we need to sign extend it 181 if ((uleb >> (bits-1)) == 1) 182 { 183 // Sign extend by setting all bits in front of it to 1 184 uleb |= ((int64_t)-1) << bits; 185 } 186 return (int64_t)uleb; 187 } 188 /** 189 * Reads a value using the specified encoding from the address pointed to by 190 * *data. Updates the value of *data to point to the next byte after the end 191 * of the data. 192 */ 193 static uint64_t read_value(char encoding, dw_eh_ptr_t *data) 194 { 195 enum dwarf_data_encoding type = get_encoding(encoding); 196 uint64_t v; 197 switch (type) 198 { 199 // Read fixed-length types 200 #define READ(dwarf, type) \ 201 case dwarf:\ 202 v = (uint64_t)(*(type*)(*data));\ 203 *data += sizeof(type);\ 204 break; 205 READ(DW_EH_PE_udata2, uint16_t) 206 READ(DW_EH_PE_udata4, uint32_t) 207 READ(DW_EH_PE_udata8, uint64_t) 208 READ(DW_EH_PE_sdata2, int16_t) 209 READ(DW_EH_PE_sdata4, int32_t) 210 READ(DW_EH_PE_sdata8, int64_t) 211 READ(DW_EH_PE_absptr, intptr_t) 212 #undef READ 213 // Read variable-length types 214 case DW_EH_PE_sleb128: 215 v = read_sleb128(data); 216 break; 217 case DW_EH_PE_uleb128: 218 v = read_uleb128(data); 219 break; 220 default: abort(); 221 } 222 223 return v; 224 } 225 226 /** 227 * Resolves an indirect value. This expects an unwind context, an encoding, a 228 * decoded value, and the start of the region as arguments. The returned value 229 * is a pointer to the address identified by the encoded value. 230 * 231 * If the encoding does not specify an indirect value, then this returns v. 232 */ 233 static uint64_t resolve_indirect_value(_Unwind_Context *c, 234 unsigned char encoding, 235 int64_t v, 236 dw_eh_ptr_t start) 237 { 238 switch (get_base(encoding)) 239 { 240 case DW_EH_PE_pcrel: 241 v += (uint64_t)start; 242 break; 243 case DW_EH_PE_textrel: 244 v += (uint64_t)_Unwind_GetTextRelBase(c); 245 break; 246 case DW_EH_PE_datarel: 247 v += (uint64_t)_Unwind_GetDataRelBase(c); 248 break; 249 case DW_EH_PE_funcrel: 250 v += (uint64_t)_Unwind_GetRegionStart(c); 251 default: 252 break; 253 } 254 // If this is an indirect value, then it is really the address of the real 255 // value 256 // TODO: Check whether this should really always be a pointer - it seems to 257 // be a GCC extensions, so not properly documented... 258 if (is_indirect(encoding)) 259 { 260 v = (uint64_t)(uintptr_t)*(void**)v; 261 } 262 return v; 263 } 264 265 266 /** 267 * Reads an encoding and a value, updating *data to point to the next byte. 268 */ 269 static inline void read_value_with_encoding(_Unwind_Context *context, 270 dw_eh_ptr_t *data, 271 uint64_t *out) 272 { 273 dw_eh_ptr_t start = *data; 274 unsigned char encoding = *((*data)++); 275 // If this value is omitted, skip it and don't touch the output value 276 if (encoding == DW_EH_PE_omit) { return; } 277 278 *out = read_value(encoding, data); 279 *out = resolve_indirect_value(context, encoding, *out, start); 280 } 281 282 /** 283 * Structure storing a decoded language-specific data area. Use parse_lsda() 284 * to generate an instance of this structure from the address returned by the 285 * generic unwind library. 286 * 287 * You should not need to inspect the fields of this structure directly if you 288 * are just using this header. The structure stores the locations of the 289 * various tables used for unwinding exceptions and is used by the functions 290 * for reading values from these tables. 291 */ 292 struct dwarf_eh_lsda 293 { 294 /// The start of the region. This is a cache of the value returned by 295 /// _Unwind_GetRegionStart(). 296 dw_eh_ptr_t region_start; 297 /// The start of the landing pads table. 298 dw_eh_ptr_t landing_pads; 299 /// The start of the type table. 300 dw_eh_ptr_t type_table; 301 /// The encoding used for entries in the type tables. 302 unsigned char type_table_encoding; 303 /// The location of the call-site table. 304 dw_eh_ptr_t call_site_table; 305 /// The location of the action table. 306 dw_eh_ptr_t action_table; 307 /// The encoding used for entries in the call-site table. 308 unsigned char callsite_encoding; 309 }; 310 311 /** 312 * Parse the header on the language-specific data area and return a structure 313 * containing the addresses and encodings of the various tables. 314 */ 315 static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context, 316 unsigned char *data) 317 { 318 struct dwarf_eh_lsda lsda; 319 320 lsda.region_start = (dw_eh_ptr_t)(uintptr_t)_Unwind_GetRegionStart(context); 321 322 // If the landing pads are relative to anything other than the start of 323 // this region, find out where. This is @LPStart in the spec, although the 324 // encoding that GCC uses does not quite match the spec. 325 uint64_t v = (uint64_t)(uintptr_t)lsda.region_start; 326 read_value_with_encoding(context, &data, &v); 327 lsda.landing_pads = (dw_eh_ptr_t)(uintptr_t)v; 328 329 // If there is a type table, find out where it is. This is @TTBase in the 330 // spec. Note: we find whether there is a type table pointer by checking 331 // whether the leading byte is DW_EH_PE_omit (0xff), which is not what the 332 // spec says, but does seem to be how G++ indicates this. 333 lsda.type_table = 0; 334 lsda.type_table_encoding = *data++; 335 if (lsda.type_table_encoding != DW_EH_PE_omit) 336 { 337 v = read_uleb128(&data); 338 dw_eh_ptr_t type_table = data; 339 type_table += v; 340 lsda.type_table = type_table; 341 //lsda.type_table = (uintptr_t*)(data + v); 342 } 343 #if __arm__ 344 lsda.type_table_encoding = (DW_EH_PE_pcrel | DW_EH_PE_indirect); 345 #endif 346 347 lsda.callsite_encoding = (enum dwarf_data_encoding)(*(data++)); 348 349 // Action table is immediately after the call site table 350 lsda.action_table = data; 351 uintptr_t callsite_size = (uintptr_t)read_uleb128(&data); 352 lsda.action_table = data + callsite_size; 353 // Call site table is immediately after the header 354 lsda.call_site_table = (dw_eh_ptr_t)data; 355 356 357 return lsda; 358 } 359 360 /** 361 * Structure representing an action to be performed while unwinding. This 362 * contains the address that should be unwound to and the action record that 363 * provoked this action. 364 */ 365 struct dwarf_eh_action 366 { 367 /** 368 * The address that this action directs should be the new program counter 369 * value after unwinding. 370 */ 371 dw_eh_ptr_t landing_pad; 372 /// The address of the action record. 373 dw_eh_ptr_t action_record; 374 }; 375 376 /** 377 * Look up the landing pad that corresponds to the current invoke. 378 * Returns true if record exists. The context is provided by the generic 379 * unwind library and the lsda should be the result of a call to parse_lsda(). 380 * 381 * The action record is returned via the result parameter. 382 */ 383 static bool dwarf_eh_find_callsite(struct _Unwind_Context *context, 384 struct dwarf_eh_lsda *lsda, 385 struct dwarf_eh_action *result) 386 { 387 result->action_record = 0; 388 result->landing_pad = 0; 389 // The current instruction pointer offset within the region 390 uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context); 391 unsigned char *callsite_table = (unsigned char*)lsda->call_site_table; 392 393 while (callsite_table <= lsda->action_table) 394 { 395 // Once again, the layout deviates from the spec. 396 uint64_t call_site_start, call_site_size, landing_pad, action; 397 call_site_start = read_value(lsda->callsite_encoding, &callsite_table); 398 call_site_size = read_value(lsda->callsite_encoding, &callsite_table); 399 400 // Call site entries are sorted, so if we find a call site that's after 401 // the current instruction pointer then there is no action associated 402 // with this call and we should unwind straight through this frame 403 // without doing anything. 404 if (call_site_start > ip) { break; } 405 406 // Read the address of the landing pad and the action from the call 407 // site table. 408 landing_pad = read_value(lsda->callsite_encoding, &callsite_table); 409 action = read_uleb128(&callsite_table); 410 411 // We should not include the call_site_start (beginning of the region) 412 // address in the ip range. For each call site: 413 // 414 // address1: call proc 415 // address2: next instruction 416 // 417 // The call stack contains address2 and not address1, address1 can be 418 // at the end of another EH region. 419 if (call_site_start < ip && ip <= call_site_start + call_site_size) 420 { 421 if (action) 422 { 423 // Action records are 1-biased so both no-record and zeroth 424 // record can be stored. 425 result->action_record = lsda->action_table + action - 1; 426 } 427 // No landing pad means keep unwinding. 428 if (landing_pad) 429 { 430 // Landing pad is the offset from the value in the header 431 result->landing_pad = lsda->landing_pads + landing_pad; 432 } 433 return true; 434 } 435 } 436 return false; 437 } 438 439 /// Defines an exception class from 8 bytes (endian independent) 440 #define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \ 441 (((uint64_t)a << 56) +\ 442 ((uint64_t)b << 48) +\ 443 ((uint64_t)c << 40) +\ 444 ((uint64_t)d << 32) +\ 445 ((uint64_t)e << 24) +\ 446 ((uint64_t)f << 16) +\ 447 ((uint64_t)g << 8) +\ 448 ((uint64_t)h)) 449 450 #define GENERIC_EXCEPTION_CLASS(e,f,g,h) \ 451 ((uint32_t)e << 24) +\ 452 ((uint32_t)f << 16) +\ 453 ((uint32_t)g << 8) +\ 454 ((uint32_t)h) 455