1 /* 2 * Copyright 2010-2011 PathScale, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. 9 * 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS 15 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 24 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 /** 27 * dwarf_eh.h - Defines some helper functions for parsing DWARF exception 28 * handling tables. 29 * 30 * This file contains various helper functions that are independent of the 31 * language-specific code. It can be used in any personality function for the 32 * Itanium ABI. 33 */ 34 #include <assert.h> 35 36 // TODO: Factor out Itanium / ARM differences. We probably want an itanium.h 37 // and arm.h that can be included by this file depending on the target ABI. 38 39 // _GNU_SOURCE must be defined for unwind.h to expose some of the functions 40 // that we want. If it isn't, then we define it and undefine it to make sure 41 // that it doesn't impact the rest of the program. 42 #ifndef _GNU_SOURCE 43 # define _GNU_SOURCE 1 44 # include "unwind.h" 45 # undef _GNU_SOURCE 46 #else 47 # include "unwind.h" 48 #endif 49 50 #include <stdint.h> 51 52 /// Type used for pointers into DWARF data 53 typedef unsigned char *dw_eh_ptr_t; 54 55 // Flag indicating a signed quantity 56 #define DW_EH_PE_signed 0x08 57 /// DWARF data encoding types. 58 enum dwarf_data_encoding 59 { 60 /// Absolute pointer value 61 DW_EH_PE_absptr = 0x00, 62 /// Unsigned, little-endian, base 128-encoded (variable length). 63 DW_EH_PE_uleb128 = 0x01, 64 /// Unsigned 16-bit integer. 65 DW_EH_PE_udata2 = 0x02, 66 /// Unsigned 32-bit integer. 67 DW_EH_PE_udata4 = 0x03, 68 /// Unsigned 64-bit integer. 69 DW_EH_PE_udata8 = 0x04, 70 /// Signed, little-endian, base 128-encoded (variable length) 71 DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed, 72 /// Signed 16-bit integer. 73 DW_EH_PE_sdata2 = DW_EH_PE_udata2 | DW_EH_PE_signed, 74 /// Signed 32-bit integer. 75 DW_EH_PE_sdata4 = DW_EH_PE_udata4 | DW_EH_PE_signed, 76 /// Signed 32-bit integer. 77 DW_EH_PE_sdata8 = DW_EH_PE_udata8 | DW_EH_PE_signed 78 }; 79 80 /** 81 * Returns the encoding for a DWARF EH table entry. The encoding is stored in 82 * the low four of an octet. The high four bits store the addressing mode. 83 */ 84 static inline enum dwarf_data_encoding get_encoding(unsigned char x) 85 { 86 return static_cast<enum dwarf_data_encoding>(x & 0xf); 87 } 88 89 /** 90 * DWARF addressing mode constants. When reading a pointer value from a DWARF 91 * exception table, you must know how it is stored and what the addressing mode 92 * is. The low four bits tell you the encoding, allowing you to decode a 93 * number. The high four bits tell you the addressing mode, allowing you to 94 * turn that number into an address in memory. 95 */ 96 enum dwarf_data_relative 97 { 98 /// Value is omitted 99 DW_EH_PE_omit = 0xff, 100 /// Value relative to program counter 101 DW_EH_PE_pcrel = 0x10, 102 /// Value relative to the text segment 103 DW_EH_PE_textrel = 0x20, 104 /// Value relative to the data segment 105 DW_EH_PE_datarel = 0x30, 106 /// Value relative to the start of the function 107 DW_EH_PE_funcrel = 0x40, 108 /// Aligned pointer (Not supported yet - are they actually used?) 109 DW_EH_PE_aligned = 0x50, 110 /// Pointer points to address of real value 111 DW_EH_PE_indirect = 0x80 112 }; 113 /** 114 * Returns the addressing mode component of this encoding. 115 */ 116 static inline enum dwarf_data_relative get_base(unsigned char x) 117 { 118 return static_cast<enum dwarf_data_relative>(x & 0x70); 119 } 120 /** 121 * Returns whether an encoding represents an indirect address. 122 */ 123 static int is_indirect(unsigned char x) 124 { 125 return ((x & DW_EH_PE_indirect) == DW_EH_PE_indirect); 126 } 127 128 /** 129 * Returns the size of a fixed-size encoding. This function will abort if 130 * called with a value that is not a fixed-size encoding. 131 */ 132 static inline int dwarf_size_of_fixed_size_field(unsigned char type) 133 { 134 switch (get_encoding(type)) 135 { 136 default: abort(); 137 case DW_EH_PE_sdata2: 138 case DW_EH_PE_udata2: return 2; 139 case DW_EH_PE_sdata4: 140 case DW_EH_PE_udata4: return 4; 141 case DW_EH_PE_sdata8: 142 case DW_EH_PE_udata8: return 8; 143 case DW_EH_PE_absptr: return sizeof(void*); 144 } 145 } 146 147 /** 148 * Read an unsigned, little-endian, base-128, DWARF value. Updates *data to 149 * point to the end of the value. Stores the number of bits read in the value 150 * pointed to by b, allowing you to determine the value of the highest bit, and 151 * therefore the sign of a signed value. 152 * 153 * This function is not intended to be called directly. Use read_sleb128() or 154 * read_uleb128() for reading signed and unsigned versions, respectively. 155 */ 156 static uint64_t read_leb128(dw_eh_ptr_t *data, int *b) 157 { 158 uint64_t uleb = 0; 159 unsigned int bit = 0; 160 unsigned char digit = 0; 161 // We have to read at least one octet, and keep reading until we get to one 162 // with the high bit unset 163 do 164 { 165 // This check is a bit too strict - we should also check the highest 166 // bit of the digit. 167 assert(bit < sizeof(uint64_t) * 8); 168 // Get the base 128 digit 169 digit = (**data) & 0x7f; 170 // Add it to the current value 171 uleb += digit << bit; 172 // Increase the shift value 173 bit += 7; 174 // Proceed to the next octet 175 (*data)++; 176 // Terminate when we reach a value that does not have the high bit set 177 // (i.e. which was not modified when we mask it with 0x7f) 178 } while ((*(*data - 1)) != digit); 179 *b = bit; 180 181 return uleb; 182 } 183 184 /** 185 * Reads an unsigned little-endian base-128 value starting at the address 186 * pointed to by *data. Updates *data to point to the next byte after the end 187 * of the variable-length value. 188 */ 189 static int64_t read_uleb128(dw_eh_ptr_t *data) 190 { 191 int b; 192 return read_leb128(data, &b); 193 } 194 195 /** 196 * Reads a signed little-endian base-128 value starting at the address pointed 197 * to by *data. Updates *data to point to the next byte after the end of the 198 * variable-length value. 199 */ 200 static int64_t read_sleb128(dw_eh_ptr_t *data) 201 { 202 int bits; 203 // Read as if it's signed 204 uint64_t uleb = read_leb128(data, &bits); 205 // If the most significant bit read is 1, then we need to sign extend it 206 if ((uleb >> (bits-1)) == 1) 207 { 208 // Sign extend by setting all bits in front of it to 1 209 uleb |= static_cast<int64_t>(-1) << bits; 210 } 211 return static_cast<int64_t>(uleb); 212 } 213 /** 214 * Reads a value using the specified encoding from the address pointed to by 215 * *data. Updates the value of *data to point to the next byte after the end 216 * of the data. 217 */ 218 static uint64_t read_value(char encoding, dw_eh_ptr_t *data) 219 { 220 enum dwarf_data_encoding type = get_encoding(encoding); 221 switch (type) 222 { 223 // Read fixed-length types 224 #define READ(dwarf, type) \ 225 case dwarf:\ 226 {\ 227 type t;\ 228 memcpy(&t, *data, sizeof t);\ 229 *data += sizeof t;\ 230 return static_cast<uint64_t>(t);\ 231 } 232 READ(DW_EH_PE_udata2, uint16_t) 233 READ(DW_EH_PE_udata4, uint32_t) 234 READ(DW_EH_PE_udata8, uint64_t) 235 READ(DW_EH_PE_sdata2, int16_t) 236 READ(DW_EH_PE_sdata4, int32_t) 237 READ(DW_EH_PE_sdata8, int64_t) 238 READ(DW_EH_PE_absptr, intptr_t) 239 #undef READ 240 // Read variable-length types 241 case DW_EH_PE_sleb128: 242 return read_sleb128(data); 243 case DW_EH_PE_uleb128: 244 return read_uleb128(data); 245 default: abort(); 246 } 247 } 248 249 /** 250 * Resolves an indirect value. This expects an unwind context, an encoding, a 251 * decoded value, and the start of the region as arguments. The returned value 252 * is a pointer to the address identified by the encoded value. 253 * 254 * If the encoding does not specify an indirect value, then this returns v. 255 */ 256 static uint64_t resolve_indirect_value(_Unwind_Context *c, 257 unsigned char encoding, 258 int64_t v, 259 dw_eh_ptr_t start) 260 { 261 switch (get_base(encoding)) 262 { 263 case DW_EH_PE_pcrel: 264 v += reinterpret_cast<uint64_t>(start); 265 break; 266 case DW_EH_PE_textrel: 267 v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetTextRelBase(c))); 268 break; 269 case DW_EH_PE_datarel: 270 v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetDataRelBase(c))); 271 break; 272 case DW_EH_PE_funcrel: 273 v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetRegionStart(c))); 274 default: 275 break; 276 } 277 // If this is an indirect value, then it is really the address of the real 278 // value 279 // TODO: Check whether this should really always be a pointer - it seems to 280 // be a GCC extensions, so not properly documented... 281 if (is_indirect(encoding)) 282 { 283 v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(*reinterpret_cast<void**>(v))); 284 } 285 return v; 286 } 287 288 289 /** 290 * Reads an encoding and a value, updating *data to point to the next byte. 291 */ 292 static inline void read_value_with_encoding(_Unwind_Context *context, 293 dw_eh_ptr_t *data, 294 uint64_t *out) 295 { 296 dw_eh_ptr_t start = *data; 297 unsigned char encoding = *((*data)++); 298 // If this value is omitted, skip it and don't touch the output value 299 if (encoding == DW_EH_PE_omit) { return; } 300 301 *out = read_value(encoding, data); 302 *out = resolve_indirect_value(context, encoding, *out, start); 303 } 304 305 /** 306 * Structure storing a decoded language-specific data area. Use parse_lsda() 307 * to generate an instance of this structure from the address returned by the 308 * generic unwind library. 309 * 310 * You should not need to inspect the fields of this structure directly if you 311 * are just using this header. The structure stores the locations of the 312 * various tables used for unwinding exceptions and is used by the functions 313 * for reading values from these tables. 314 */ 315 struct dwarf_eh_lsda 316 { 317 /// The start of the region. This is a cache of the value returned by 318 /// _Unwind_GetRegionStart(). 319 dw_eh_ptr_t region_start; 320 /// The start of the landing pads table. 321 dw_eh_ptr_t landing_pads; 322 /// The start of the type table. 323 dw_eh_ptr_t type_table; 324 /// The encoding used for entries in the type tables. 325 unsigned char type_table_encoding; 326 /// The location of the call-site table. 327 dw_eh_ptr_t call_site_table; 328 /// The location of the action table. 329 dw_eh_ptr_t action_table; 330 /// The encoding used for entries in the call-site table. 331 unsigned char callsite_encoding; 332 }; 333 334 /** 335 * Parse the header on the language-specific data area and return a structure 336 * containing the addresses and encodings of the various tables. 337 */ 338 static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context, 339 unsigned char *data) 340 { 341 struct dwarf_eh_lsda lsda; 342 343 lsda.region_start = reinterpret_cast<dw_eh_ptr_t>(_Unwind_GetRegionStart(context)); 344 345 // If the landing pads are relative to anything other than the start of 346 // this region, find out where. This is @LPStart in the spec, although the 347 // encoding that GCC uses does not quite match the spec. 348 uint64_t v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(lsda.region_start)); 349 read_value_with_encoding(context, &data, &v); 350 lsda.landing_pads = reinterpret_cast<dw_eh_ptr_t>(static_cast<uintptr_t>(v)); 351 352 // If there is a type table, find out where it is. This is @TTBase in the 353 // spec. Note: we find whether there is a type table pointer by checking 354 // whether the leading byte is DW_EH_PE_omit (0xff), which is not what the 355 // spec says, but does seem to be how G++ indicates this. 356 lsda.type_table = 0; 357 lsda.type_table_encoding = *data++; 358 if (lsda.type_table_encoding != DW_EH_PE_omit) 359 { 360 v = read_uleb128(&data); 361 dw_eh_ptr_t type_table = data; 362 type_table += v; 363 lsda.type_table = type_table; 364 //lsda.type_table = (uintptr_t*)(data + v); 365 } 366 #if defined(__arm__) && !defined(__ARM_DWARF_EH__) 367 lsda.type_table_encoding = (DW_EH_PE_pcrel | DW_EH_PE_indirect); 368 #endif 369 370 lsda.callsite_encoding = static_cast<enum dwarf_data_encoding>(*(data++)); 371 372 // Action table is immediately after the call site table 373 lsda.action_table = data; 374 uintptr_t callsite_size = static_cast<uintptr_t>(read_uleb128(&data)); 375 lsda.action_table = data + callsite_size; 376 // Call site table is immediately after the header 377 lsda.call_site_table = static_cast<dw_eh_ptr_t>(data); 378 379 380 return lsda; 381 } 382 383 /** 384 * Structure representing an action to be performed while unwinding. This 385 * contains the address that should be unwound to and the action record that 386 * provoked this action. 387 */ 388 struct dwarf_eh_action 389 { 390 /** 391 * The address that this action directs should be the new program counter 392 * value after unwinding. 393 */ 394 dw_eh_ptr_t landing_pad; 395 /// The address of the action record. 396 dw_eh_ptr_t action_record; 397 }; 398 399 /** 400 * Look up the landing pad that corresponds to the current invoke. 401 * Returns true if record exists. The context is provided by the generic 402 * unwind library and the lsda should be the result of a call to parse_lsda(). 403 * 404 * The action record is returned via the result parameter. 405 */ 406 static bool dwarf_eh_find_callsite(struct _Unwind_Context *context, 407 struct dwarf_eh_lsda *lsda, 408 struct dwarf_eh_action *result) 409 { 410 result->action_record = 0; 411 result->landing_pad = 0; 412 // The current instruction pointer offset within the region 413 uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context); 414 unsigned char *callsite_table = static_cast<unsigned char*>(lsda->call_site_table); 415 416 while (callsite_table <= lsda->action_table) 417 { 418 // Once again, the layout deviates from the spec. 419 uint64_t call_site_start, call_site_size, landing_pad, action; 420 call_site_start = read_value(lsda->callsite_encoding, &callsite_table); 421 call_site_size = read_value(lsda->callsite_encoding, &callsite_table); 422 423 // Call site entries are sorted, so if we find a call site that's after 424 // the current instruction pointer then there is no action associated 425 // with this call and we should unwind straight through this frame 426 // without doing anything. 427 if (call_site_start > ip) { break; } 428 429 // Read the address of the landing pad and the action from the call 430 // site table. 431 landing_pad = read_value(lsda->callsite_encoding, &callsite_table); 432 action = read_uleb128(&callsite_table); 433 434 // We should not include the call_site_start (beginning of the region) 435 // address in the ip range. For each call site: 436 // 437 // address1: call proc 438 // address2: next instruction 439 // 440 // The call stack contains address2 and not address1, address1 can be 441 // at the end of another EH region. 442 if (call_site_start < ip && ip <= call_site_start + call_site_size) 443 { 444 if (action) 445 { 446 // Action records are 1-biased so both no-record and zeroth 447 // record can be stored. 448 result->action_record = lsda->action_table + action - 1; 449 } 450 // No landing pad means keep unwinding. 451 if (landing_pad) 452 { 453 // Landing pad is the offset from the value in the header 454 result->landing_pad = lsda->landing_pads + landing_pad; 455 } 456 return true; 457 } 458 } 459 return false; 460 } 461 462 /// Defines an exception class from 8 bytes (endian independent) 463 #define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \ 464 ((static_cast<uint64_t>(a) << 56) +\ 465 (static_cast<uint64_t>(b) << 48) +\ 466 (static_cast<uint64_t>(c) << 40) +\ 467 (static_cast<uint64_t>(d) << 32) +\ 468 (static_cast<uint64_t>(e) << 24) +\ 469 (static_cast<uint64_t>(f) << 16) +\ 470 (static_cast<uint64_t>(g) << 8) +\ 471 (static_cast<uint64_t>(h))) 472 473 #define GENERIC_EXCEPTION_CLASS(e,f,g,h) \ 474 (static_cast<uint32_t>(e) << 24) +\ 475 (static_cast<uint32_t>(f) << 16) +\ 476 (static_cast<uint32_t>(g) << 8) +\ 477 (static_cast<uint32_t>(h)) 478