1 /*
2 * Copyright 2010-2011 PathScale, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
15 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26 /**
27 * dwarf_eh.h - Defines some helper functions for parsing DWARF exception
28 * handling tables.
29 *
30 * This file contains various helper functions that are independent of the
31 * language-specific code. It can be used in any personality function for the
32 * Itanium ABI.
33 */
34 #include <assert.h>
35
36 // TODO: Factor out Itanium / ARM differences. We probably want an itanium.h
37 // and arm.h that can be included by this file depending on the target ABI.
38
39 // _GNU_SOURCE must be defined for unwind.h to expose some of the functions
40 // that we want. If it isn't, then we define it and undefine it to make sure
41 // that it doesn't impact the rest of the program.
42 #ifndef _GNU_SOURCE
43 # define _GNU_SOURCE 1
44 # include "unwind.h"
45 # undef _GNU_SOURCE
46 #else
47 # include "unwind.h"
48 #endif
49
50 #include <stdint.h>
51
52 /// Type used for pointers into DWARF data
53 typedef unsigned char *dw_eh_ptr_t;
54
55 // Flag indicating a signed quantity
56 #define DW_EH_PE_signed 0x08
57 /// DWARF data encoding types.
58 enum dwarf_data_encoding
59 {
60 /// Absolute pointer value
61 DW_EH_PE_absptr = 0x00,
62 /// Unsigned, little-endian, base 128-encoded (variable length).
63 DW_EH_PE_uleb128 = 0x01,
64 /// Unsigned 16-bit integer.
65 DW_EH_PE_udata2 = 0x02,
66 /// Unsigned 32-bit integer.
67 DW_EH_PE_udata4 = 0x03,
68 /// Unsigned 64-bit integer.
69 DW_EH_PE_udata8 = 0x04,
70 /// Signed, little-endian, base 128-encoded (variable length)
71 DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed,
72 /// Signed 16-bit integer.
73 DW_EH_PE_sdata2 = DW_EH_PE_udata2 | DW_EH_PE_signed,
74 /// Signed 32-bit integer.
75 DW_EH_PE_sdata4 = DW_EH_PE_udata4 | DW_EH_PE_signed,
76 /// Signed 32-bit integer.
77 DW_EH_PE_sdata8 = DW_EH_PE_udata8 | DW_EH_PE_signed
78 };
79
80 /**
81 * Returns the encoding for a DWARF EH table entry. The encoding is stored in
82 * the low four of an octet. The high four bits store the addressing mode.
83 */
get_encoding(unsigned char x)84 static inline enum dwarf_data_encoding get_encoding(unsigned char x)
85 {
86 return static_cast<enum dwarf_data_encoding>(x & 0xf);
87 }
88
89 /**
90 * DWARF addressing mode constants. When reading a pointer value from a DWARF
91 * exception table, you must know how it is stored and what the addressing mode
92 * is. The low four bits tell you the encoding, allowing you to decode a
93 * number. The high four bits tell you the addressing mode, allowing you to
94 * turn that number into an address in memory.
95 */
96 enum dwarf_data_relative
97 {
98 /// Value is omitted
99 DW_EH_PE_omit = 0xff,
100 /// Value relative to program counter
101 DW_EH_PE_pcrel = 0x10,
102 /// Value relative to the text segment
103 DW_EH_PE_textrel = 0x20,
104 /// Value relative to the data segment
105 DW_EH_PE_datarel = 0x30,
106 /// Value relative to the start of the function
107 DW_EH_PE_funcrel = 0x40,
108 /// Aligned pointer (Not supported yet - are they actually used?)
109 DW_EH_PE_aligned = 0x50,
110 /// Pointer points to address of real value
111 DW_EH_PE_indirect = 0x80
112 };
113 /**
114 * Returns the addressing mode component of this encoding.
115 */
get_base(unsigned char x)116 static inline enum dwarf_data_relative get_base(unsigned char x)
117 {
118 return static_cast<enum dwarf_data_relative>(x & 0x70);
119 }
120 /**
121 * Returns whether an encoding represents an indirect address.
122 */
is_indirect(unsigned char x)123 static int is_indirect(unsigned char x)
124 {
125 return ((x & DW_EH_PE_indirect) == DW_EH_PE_indirect);
126 }
127
128 /**
129 * Returns the size of a fixed-size encoding. This function will abort if
130 * called with a value that is not a fixed-size encoding.
131 */
dwarf_size_of_fixed_size_field(unsigned char type)132 static inline int dwarf_size_of_fixed_size_field(unsigned char type)
133 {
134 switch (get_encoding(type))
135 {
136 default: abort();
137 case DW_EH_PE_sdata2:
138 case DW_EH_PE_udata2: return 2;
139 case DW_EH_PE_sdata4:
140 case DW_EH_PE_udata4: return 4;
141 case DW_EH_PE_sdata8:
142 case DW_EH_PE_udata8: return 8;
143 case DW_EH_PE_absptr: return sizeof(void*);
144 }
145 }
146
147 /**
148 * Read an unsigned, little-endian, base-128, DWARF value. Updates *data to
149 * point to the end of the value. Stores the number of bits read in the value
150 * pointed to by b, allowing you to determine the value of the highest bit, and
151 * therefore the sign of a signed value.
152 *
153 * This function is not intended to be called directly. Use read_sleb128() or
154 * read_uleb128() for reading signed and unsigned versions, respectively.
155 */
read_leb128(dw_eh_ptr_t * data,int * b)156 static uint64_t read_leb128(dw_eh_ptr_t *data, int *b)
157 {
158 uint64_t uleb = 0;
159 unsigned int bit = 0;
160 unsigned char digit = 0;
161 // We have to read at least one octet, and keep reading until we get to one
162 // with the high bit unset
163 do
164 {
165 // This check is a bit too strict - we should also check the highest
166 // bit of the digit.
167 assert(bit < sizeof(uint64_t) * 8);
168 // Get the base 128 digit
169 digit = (**data) & 0x7f;
170 // Add it to the current value
171 uleb += digit << bit;
172 // Increase the shift value
173 bit += 7;
174 // Proceed to the next octet
175 (*data)++;
176 // Terminate when we reach a value that does not have the high bit set
177 // (i.e. which was not modified when we mask it with 0x7f)
178 } while ((*(*data - 1)) != digit);
179 *b = bit;
180
181 return uleb;
182 }
183
184 /**
185 * Reads an unsigned little-endian base-128 value starting at the address
186 * pointed to by *data. Updates *data to point to the next byte after the end
187 * of the variable-length value.
188 */
read_uleb128(dw_eh_ptr_t * data)189 static int64_t read_uleb128(dw_eh_ptr_t *data)
190 {
191 int b;
192 return read_leb128(data, &b);
193 }
194
195 /**
196 * Reads a signed little-endian base-128 value starting at the address pointed
197 * to by *data. Updates *data to point to the next byte after the end of the
198 * variable-length value.
199 */
read_sleb128(dw_eh_ptr_t * data)200 static int64_t read_sleb128(dw_eh_ptr_t *data)
201 {
202 int bits;
203 // Read as if it's signed
204 uint64_t uleb = read_leb128(data, &bits);
205 // If the most significant bit read is 1, then we need to sign extend it
206 if ((uleb >> (bits-1)) == 1)
207 {
208 // Sign extend by setting all bits in front of it to 1
209 uleb |= static_cast<int64_t>(-1) << bits;
210 }
211 return static_cast<int64_t>(uleb);
212 }
213 /**
214 * Reads a value using the specified encoding from the address pointed to by
215 * *data. Updates the value of *data to point to the next byte after the end
216 * of the data.
217 */
read_value(char encoding,dw_eh_ptr_t * data)218 static uint64_t read_value(char encoding, dw_eh_ptr_t *data)
219 {
220 enum dwarf_data_encoding type = get_encoding(encoding);
221 switch (type)
222 {
223 // Read fixed-length types
224 #define READ(dwarf, type) \
225 case dwarf:\
226 {\
227 type t;\
228 memcpy(&t, *data, sizeof t);\
229 *data += sizeof t;\
230 return static_cast<uint64_t>(t);\
231 }
232 READ(DW_EH_PE_udata2, uint16_t)
233 READ(DW_EH_PE_udata4, uint32_t)
234 READ(DW_EH_PE_udata8, uint64_t)
235 READ(DW_EH_PE_sdata2, int16_t)
236 READ(DW_EH_PE_sdata4, int32_t)
237 READ(DW_EH_PE_sdata8, int64_t)
238 READ(DW_EH_PE_absptr, intptr_t)
239 #undef READ
240 // Read variable-length types
241 case DW_EH_PE_sleb128:
242 return read_sleb128(data);
243 case DW_EH_PE_uleb128:
244 return read_uleb128(data);
245 default: abort();
246 }
247 }
248
249 /**
250 * Resolves an indirect value. This expects an unwind context, an encoding, a
251 * decoded value, and the start of the region as arguments. The returned value
252 * is a pointer to the address identified by the encoded value.
253 *
254 * If the encoding does not specify an indirect value, then this returns v.
255 */
resolve_indirect_value(_Unwind_Context * c,unsigned char encoding,int64_t v,dw_eh_ptr_t start)256 static uint64_t resolve_indirect_value(_Unwind_Context *c,
257 unsigned char encoding,
258 int64_t v,
259 dw_eh_ptr_t start)
260 {
261 switch (get_base(encoding))
262 {
263 case DW_EH_PE_pcrel:
264 v += reinterpret_cast<uint64_t>(start);
265 break;
266 case DW_EH_PE_textrel:
267 v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetTextRelBase(c)));
268 break;
269 case DW_EH_PE_datarel:
270 v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetDataRelBase(c)));
271 break;
272 case DW_EH_PE_funcrel:
273 v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetRegionStart(c)));
274 default:
275 break;
276 }
277 // If this is an indirect value, then it is really the address of the real
278 // value
279 // TODO: Check whether this should really always be a pointer - it seems to
280 // be a GCC extensions, so not properly documented...
281 if (is_indirect(encoding))
282 {
283 v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(*reinterpret_cast<void**>(v)));
284 }
285 return v;
286 }
287
288
289 /**
290 * Reads an encoding and a value, updating *data to point to the next byte.
291 */
read_value_with_encoding(_Unwind_Context * context,dw_eh_ptr_t * data,uint64_t * out)292 static inline void read_value_with_encoding(_Unwind_Context *context,
293 dw_eh_ptr_t *data,
294 uint64_t *out)
295 {
296 dw_eh_ptr_t start = *data;
297 unsigned char encoding = *((*data)++);
298 // If this value is omitted, skip it and don't touch the output value
299 if (encoding == DW_EH_PE_omit) { return; }
300
301 *out = read_value(encoding, data);
302 *out = resolve_indirect_value(context, encoding, *out, start);
303 }
304
305 /**
306 * Structure storing a decoded language-specific data area. Use parse_lsda()
307 * to generate an instance of this structure from the address returned by the
308 * generic unwind library.
309 *
310 * You should not need to inspect the fields of this structure directly if you
311 * are just using this header. The structure stores the locations of the
312 * various tables used for unwinding exceptions and is used by the functions
313 * for reading values from these tables.
314 */
315 struct dwarf_eh_lsda
316 {
317 /// The start of the region. This is a cache of the value returned by
318 /// _Unwind_GetRegionStart().
319 dw_eh_ptr_t region_start;
320 /// The start of the landing pads table.
321 dw_eh_ptr_t landing_pads;
322 /// The start of the type table.
323 dw_eh_ptr_t type_table;
324 /// The encoding used for entries in the type tables.
325 unsigned char type_table_encoding;
326 /// The location of the call-site table.
327 dw_eh_ptr_t call_site_table;
328 /// The location of the action table.
329 dw_eh_ptr_t action_table;
330 /// The encoding used for entries in the call-site table.
331 unsigned char callsite_encoding;
332 };
333
334 /**
335 * Parse the header on the language-specific data area and return a structure
336 * containing the addresses and encodings of the various tables.
337 */
parse_lsda(_Unwind_Context * context,unsigned char * data)338 static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context,
339 unsigned char *data)
340 {
341 struct dwarf_eh_lsda lsda;
342
343 lsda.region_start = reinterpret_cast<dw_eh_ptr_t>(_Unwind_GetRegionStart(context));
344
345 // If the landing pads are relative to anything other than the start of
346 // this region, find out where. This is @LPStart in the spec, although the
347 // encoding that GCC uses does not quite match the spec.
348 uint64_t v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(lsda.region_start));
349 read_value_with_encoding(context, &data, &v);
350 lsda.landing_pads = reinterpret_cast<dw_eh_ptr_t>(static_cast<uintptr_t>(v));
351
352 // If there is a type table, find out where it is. This is @TTBase in the
353 // spec. Note: we find whether there is a type table pointer by checking
354 // whether the leading byte is DW_EH_PE_omit (0xff), which is not what the
355 // spec says, but does seem to be how G++ indicates this.
356 lsda.type_table = 0;
357 lsda.type_table_encoding = *data++;
358 if (lsda.type_table_encoding != DW_EH_PE_omit)
359 {
360 v = read_uleb128(&data);
361 dw_eh_ptr_t type_table = data;
362 type_table += v;
363 lsda.type_table = type_table;
364 //lsda.type_table = (uintptr_t*)(data + v);
365 }
366 #if defined(__arm__) && !defined(__ARM_DWARF_EH__)
367 lsda.type_table_encoding = (DW_EH_PE_pcrel | DW_EH_PE_indirect);
368 #endif
369
370 lsda.callsite_encoding = static_cast<enum dwarf_data_encoding>(*(data++));
371
372 // Action table is immediately after the call site table
373 lsda.action_table = data;
374 uintptr_t callsite_size = static_cast<uintptr_t>(read_uleb128(&data));
375 lsda.action_table = data + callsite_size;
376 // Call site table is immediately after the header
377 lsda.call_site_table = static_cast<dw_eh_ptr_t>(data);
378
379
380 return lsda;
381 }
382
383 /**
384 * Structure representing an action to be performed while unwinding. This
385 * contains the address that should be unwound to and the action record that
386 * provoked this action.
387 */
388 struct dwarf_eh_action
389 {
390 /**
391 * The address that this action directs should be the new program counter
392 * value after unwinding.
393 */
394 dw_eh_ptr_t landing_pad;
395 /// The address of the action record.
396 dw_eh_ptr_t action_record;
397 };
398
399 /**
400 * Look up the landing pad that corresponds to the current invoke.
401 * Returns true if record exists. The context is provided by the generic
402 * unwind library and the lsda should be the result of a call to parse_lsda().
403 *
404 * The action record is returned via the result parameter.
405 */
dwarf_eh_find_callsite(struct _Unwind_Context * context,struct dwarf_eh_lsda * lsda,struct dwarf_eh_action * result)406 static bool dwarf_eh_find_callsite(struct _Unwind_Context *context,
407 struct dwarf_eh_lsda *lsda,
408 struct dwarf_eh_action *result)
409 {
410 result->action_record = 0;
411 result->landing_pad = 0;
412 // The current instruction pointer offset within the region
413 uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context);
414 unsigned char *callsite_table = static_cast<unsigned char*>(lsda->call_site_table);
415
416 while (callsite_table <= lsda->action_table)
417 {
418 // Once again, the layout deviates from the spec.
419 uint64_t call_site_start, call_site_size, landing_pad, action;
420 call_site_start = read_value(lsda->callsite_encoding, &callsite_table);
421 call_site_size = read_value(lsda->callsite_encoding, &callsite_table);
422
423 // Call site entries are sorted, so if we find a call site that's after
424 // the current instruction pointer then there is no action associated
425 // with this call and we should unwind straight through this frame
426 // without doing anything.
427 if (call_site_start > ip) { break; }
428
429 // Read the address of the landing pad and the action from the call
430 // site table.
431 landing_pad = read_value(lsda->callsite_encoding, &callsite_table);
432 action = read_uleb128(&callsite_table);
433
434 // We should not include the call_site_start (beginning of the region)
435 // address in the ip range. For each call site:
436 //
437 // address1: call proc
438 // address2: next instruction
439 //
440 // The call stack contains address2 and not address1, address1 can be
441 // at the end of another EH region.
442 if (call_site_start < ip && ip <= call_site_start + call_site_size)
443 {
444 if (action)
445 {
446 // Action records are 1-biased so both no-record and zeroth
447 // record can be stored.
448 result->action_record = lsda->action_table + action - 1;
449 }
450 // No landing pad means keep unwinding.
451 if (landing_pad)
452 {
453 // Landing pad is the offset from the value in the header
454 result->landing_pad = lsda->landing_pads + landing_pad;
455 }
456 return true;
457 }
458 }
459 return false;
460 }
461
462 /// Defines an exception class from 8 bytes (endian independent)
463 #define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \
464 ((static_cast<uint64_t>(a) << 56) +\
465 (static_cast<uint64_t>(b) << 48) +\
466 (static_cast<uint64_t>(c) << 40) +\
467 (static_cast<uint64_t>(d) << 32) +\
468 (static_cast<uint64_t>(e) << 24) +\
469 (static_cast<uint64_t>(f) << 16) +\
470 (static_cast<uint64_t>(g) << 8) +\
471 (static_cast<uint64_t>(h)))
472
473 #define GENERIC_EXCEPTION_CLASS(e,f,g,h) \
474 (static_cast<uint32_t>(e) << 24) +\
475 (static_cast<uint32_t>(f) << 16) +\
476 (static_cast<uint32_t>(g) << 8) +\
477 (static_cast<uint32_t>(h))
478