xref: /freebsd/contrib/libcxxrt/dwarf_eh.h (revision 9a14aa017b21c292740c00ee098195cd46642730)
1 /**
2  * dwarf_eh.h - Defines some helper functions for parsing DWARF exception
3  * handling tables.
4  *
5  * This file contains various helper functions that are independent of the
6  * language-specific code.  It can be used in any personality function for the
7  * Itanium ABI.
8  */
9 #include <assert.h>
10 
11 // TODO: Factor out Itanium / ARM differences.  We probably want an itanium.h
12 // and arm.h that can be included by this file depending on the target ABI.
13 
14 // _GNU_SOURCE must be defined for unwind.h to expose some of the functions
15 // that we want.  If it isn't, then we define it and undefine it to make sure
16 // that it doesn't impact the rest of the program.
17 #ifndef _GNU_SOURCE
18 #	define _GNU_SOURCE 1
19 #	include "unwind.h"
20 #	undef _GNU_SOURCE
21 #else
22 #	include "unwind.h"
23 #endif
24 
25 #include <stdint.h>
26 
27 /// Type used for pointers into DWARF data
28 typedef unsigned char *dw_eh_ptr_t;
29 
30 // Flag indicating a signed quantity
31 #define DW_EH_PE_signed 0x08
32 /// DWARF data encoding types.
33 enum dwarf_data_encoding
34 {
35 	/// Unsigned, little-endian, base 128-encoded (variable length).
36 	DW_EH_PE_uleb128 = 0x01,
37 	/// Unsigned 16-bit integer.
38 	DW_EH_PE_udata2  = 0x02,
39 	/// Unsigned 32-bit integer.
40 	DW_EH_PE_udata4  = 0x03,
41 	/// Unsigned 64-bit integer.
42 	DW_EH_PE_udata8  = 0x04,
43 	/// Signed, little-endian, base 128-encoded (variable length)
44 	DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed,
45 	/// Signed 16-bit integer.
46 	DW_EH_PE_sdata2  = DW_EH_PE_udata2 | DW_EH_PE_signed,
47 	/// Signed 32-bit integer.
48 	DW_EH_PE_sdata4  = DW_EH_PE_udata4 | DW_EH_PE_signed,
49 	/// Signed 32-bit integer.
50 	DW_EH_PE_sdata8  = DW_EH_PE_udata8 | DW_EH_PE_signed
51 };
52 
53 /**
54  * Returns the encoding for a DWARF EH table entry.  The encoding is stored in
55  * the low four of an octet.  The high four bits store the addressing mode.
56  */
57 static inline enum dwarf_data_encoding get_encoding(unsigned char x)
58 {
59 	return (enum dwarf_data_encoding)(x & 0xf);
60 }
61 
62 /**
63  * DWARF addressing mode constants.  When reading a pointer value from a DWARF
64  * exception table, you must know how it is stored and what the addressing mode
65  * is.  The low four bits tell you the encoding, allowing you to decode a
66  * number.  The high four bits tell you the addressing mode, allowing you to
67  * turn that number into an address in memory.
68  */
69 enum dwarf_data_relative
70 {
71 	/// Value is omitted
72 	DW_EH_PE_omit     = 0xff,
73 	/// Absolute pointer value
74 	DW_EH_PE_absptr   = 0x00,
75 	/// Value relative to program counter
76 	DW_EH_PE_pcrel    = 0x10,
77 	/// Value relative to the text segment
78 	DW_EH_PE_textrel  = 0x20,
79 	/// Value relative to the data segment
80 	DW_EH_PE_datarel  = 0x30,
81 	/// Value relative to the start of the function
82 	DW_EH_PE_funcrel  = 0x40,
83 	/// Aligned pointer (Not supported yet - are they actually used?)
84 	DW_EH_PE_aligned  = 0x50,
85 	/// Pointer points to address of real value
86 	DW_EH_PE_indirect = 0x80
87 };
88 /**
89  * Returns the addressing mode component of this encoding.
90  */
91 static inline enum dwarf_data_relative get_base(unsigned char x)
92 {
93 	return (enum dwarf_data_relative)(x & 0x70);
94 }
95 /**
96  * Returns whether an encoding represents an indirect address.
97  */
98 static int is_indirect(unsigned char x)
99 {
100 	return ((x & DW_EH_PE_indirect) == DW_EH_PE_indirect);
101 }
102 
103 /**
104  * Returns the size of a fixed-size encoding.  This function will abort if
105  * called with a value that is not a fixed-size encoding.
106  */
107 static inline int dwarf_size_of_fixed_size_field(unsigned char type)
108 {
109 	switch (get_encoding(type))
110 	{
111 		default: abort();
112 		case DW_EH_PE_sdata2:
113 		case DW_EH_PE_udata2: return 2;
114 		case DW_EH_PE_sdata4:
115 		case DW_EH_PE_udata4: return 4;
116 		case DW_EH_PE_sdata8:
117 		case DW_EH_PE_udata8: return 8;
118 		case DW_EH_PE_absptr: return sizeof(void*);
119 	}
120 }
121 
122 /**
123  * Read an unsigned, little-endian, base-128, DWARF value.  Updates *data to
124  * point to the end of the value.  Stores the number of bits read in the value
125  * pointed to by b, allowing you to determine the value of the highest bit, and
126  * therefore the sign of a signed value.
127  *
128  * This function is not intended to be called directly.  Use read_sleb128() or
129  * read_uleb128() for reading signed and unsigned versions, respectively.
130  */
131 static uint64_t read_leb128(dw_eh_ptr_t *data, int *b)
132 {
133 	uint64_t uleb = 0;
134 	unsigned int bit = 0;
135 	unsigned char digit = 0;
136 	// We have to read at least one octet, and keep reading until we get to one
137 	// with the high bit unset
138 	do
139 	{
140 		// This check is a bit too strict - we should also check the highest
141 		// bit of the digit.
142 		assert(bit < sizeof(uint64_t) * 8);
143 		// Get the base 128 digit
144 		digit = (**data) & 0x7f;
145 		// Add it to the current value
146 		uleb += digit << bit;
147 		// Increase the shift value
148 		bit += 7;
149 		// Proceed to the next octet
150 		(*data)++;
151 		// Terminate when we reach a value that does not have the high bit set
152 		// (i.e. which was not modified when we mask it with 0x7f)
153 	} while ((*(*data - 1)) != digit);
154 	*b = bit;
155 
156 	return uleb;
157 }
158 
159 /**
160  * Reads an unsigned little-endian base-128 value starting at the address
161  * pointed to by *data.  Updates *data to point to the next byte after the end
162  * of the variable-length value.
163  */
164 static int64_t read_uleb128(dw_eh_ptr_t *data)
165 {
166 	int b;
167 	return read_leb128(data, &b);
168 }
169 
170 /**
171  * Reads a signed little-endian base-128 value starting at the address pointed
172  * to by *data.  Updates *data to point to the next byte after the end of the
173  * variable-length value.
174  */
175 static int64_t read_sleb128(dw_eh_ptr_t *data)
176 {
177 	int bits;
178 	// Read as if it's signed
179 	uint64_t uleb = read_leb128(data, &bits);
180 	// If the most significant bit read is 1, then we need to sign extend it
181 	if ((uleb >> (bits-1)) == 1)
182 	{
183 		// Sign extend by setting all bits in front of it to 1
184 		uleb |= ((int64_t)-1) << bits;
185 	}
186 	return (int64_t)uleb;
187 }
188 /**
189  * Reads a value using the specified encoding from the address pointed to by
190  * *data.  Updates the value of *data to point to the next byte after the end
191  * of the data.
192  */
193 static uint64_t read_value(char encoding, dw_eh_ptr_t *data)
194 {
195 	enum dwarf_data_encoding type = get_encoding(encoding);
196 	uint64_t v;
197 	switch (type)
198 	{
199 		// Read fixed-length types
200 #define READ(dwarf, type) \
201 		case dwarf:\
202 			v = (uint64_t)(*(type*)(*data));\
203 			*data += sizeof(type);\
204 			break;
205 		READ(DW_EH_PE_udata2, uint16_t)
206 		READ(DW_EH_PE_udata4, uint32_t)
207 		READ(DW_EH_PE_udata8, uint64_t)
208 		READ(DW_EH_PE_sdata2, int16_t)
209 		READ(DW_EH_PE_sdata4, int32_t)
210 		READ(DW_EH_PE_sdata8, int64_t)
211 		READ(DW_EH_PE_absptr, intptr_t)
212 #undef READ
213 		// Read variable-length types
214 		case DW_EH_PE_sleb128:
215 			v = read_sleb128(data);
216 			break;
217 		case DW_EH_PE_uleb128:
218 			v = read_uleb128(data);
219 			break;
220 		default: abort();
221 	}
222 
223 	return v;
224 }
225 
226 /**
227  * Resolves an indirect value.  This expects an unwind context, an encoding, a
228  * decoded value, and the start of the region as arguments.  The returned value
229  * is a pointer to the address identified by the encoded value.
230  *
231  * If the encoding does not specify an indirect value, then this returns v.
232  */
233 static uint64_t resolve_indirect_value(_Unwind_Context *c,
234                                        unsigned char encoding,
235                                        int64_t v,
236                                        dw_eh_ptr_t start)
237 {
238 	switch (get_base(encoding))
239 	{
240 		case DW_EH_PE_pcrel:
241 			v += (uint64_t)start;
242 			break;
243 		case DW_EH_PE_textrel:
244 			v += (uint64_t)_Unwind_GetTextRelBase(c);
245 			break;
246 		case DW_EH_PE_datarel:
247 			v += (uint64_t)_Unwind_GetDataRelBase(c);
248 			break;
249 		case DW_EH_PE_funcrel:
250 			v += (uint64_t)_Unwind_GetRegionStart(c);
251 		default:
252 			break;
253 	}
254 	// If this is an indirect value, then it is really the address of the real
255 	// value
256 	// TODO: Check whether this should really always be a pointer - it seems to
257 	// be a GCC extensions, so not properly documented...
258 	if (is_indirect(encoding))
259 	{
260 		v = (uint64_t)(uintptr_t)*(void**)v;
261 	}
262 	return v;
263 }
264 
265 
266 /**
267  * Reads an encoding and a value, updating *data to point to the next byte.
268  */
269 static inline void read_value_with_encoding(_Unwind_Context *context,
270                                             dw_eh_ptr_t *data,
271                                             uint64_t *out)
272 {
273 	dw_eh_ptr_t start = *data;
274 	unsigned char encoding = *((*data)++);
275 	// If this value is omitted, skip it and don't touch the output value
276 	if (encoding == DW_EH_PE_omit) { return; }
277 
278 	*out = read_value(encoding, data);
279 	*out = resolve_indirect_value(context, encoding, *out, start);
280 }
281 
282 /**
283  * Structure storing a decoded language-specific data area.  Use parse_lsda()
284  * to generate an instance of this structure from the address returned by the
285  * generic unwind library.
286  *
287  * You should not need to inspect the fields of this structure directly if you
288  * are just using this header.  The structure stores the locations of the
289  * various tables used for unwinding exceptions and is used by the functions
290  * for reading values from these tables.
291  */
292 struct dwarf_eh_lsda
293 {
294 	/// The start of the region.  This is a cache of the value returned by
295 	/// _Unwind_GetRegionStart().
296 	dw_eh_ptr_t region_start;
297 	/// The start of the landing pads table.
298 	dw_eh_ptr_t landing_pads;
299 	/// The start of the type table.
300 	dw_eh_ptr_t type_table;
301 	/// The encoding used for entries in the type tables.
302 	unsigned char type_table_encoding;
303 	/// The location of the call-site table.
304 	dw_eh_ptr_t call_site_table;
305 	/// The location of the action table.
306 	dw_eh_ptr_t action_table;
307 	/// The encoding used for entries in the call-site table.
308 	unsigned char callsite_encoding;
309 };
310 
311 /**
312  * Parse the header on the language-specific data area and return a structure
313  * containing the addresses and encodings of the various tables.
314  */
315 static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context,
316                                               unsigned char *data)
317 {
318 	struct dwarf_eh_lsda lsda;
319 
320 	lsda.region_start = (dw_eh_ptr_t)(uintptr_t)_Unwind_GetRegionStart(context);
321 
322 	// If the landing pads are relative to anything other than the start of
323 	// this region, find out where.  This is @LPStart in the spec, although the
324 	// encoding that GCC uses does not quite match the spec.
325 	uint64_t v = (uint64_t)(uintptr_t)lsda.region_start;
326 	read_value_with_encoding(context, &data, &v);
327 	lsda.landing_pads = (dw_eh_ptr_t)(uintptr_t)v;
328 
329 	// If there is a type table, find out where it is.  This is @TTBase in the
330 	// spec.  Note: we find whether there is a type table pointer by checking
331 	// whether the leading byte is DW_EH_PE_omit (0xff), which is not what the
332 	// spec says, but does seem to be how G++ indicates this.
333 	lsda.type_table = 0;
334 	lsda.type_table_encoding = *data++;
335 	if (lsda.type_table_encoding != DW_EH_PE_omit)
336 	{
337 		v = read_uleb128(&data);
338 		dw_eh_ptr_t type_table = data;
339 		type_table += v;
340 		lsda.type_table = type_table;
341 		//lsda.type_table = (uintptr_t*)(data + v);
342 	}
343 #if __arm__
344 	lsda.type_table_encoding = (DW_EH_PE_pcrel | DW_EH_PE_indirect);
345 #endif
346 
347 	lsda.callsite_encoding = (enum dwarf_data_encoding)(*(data++));
348 
349 	// Action table is immediately after the call site table
350 	lsda.action_table = data;
351 	uintptr_t callsite_size = (uintptr_t)read_uleb128(&data);
352 	lsda.action_table = data + callsite_size;
353 	// Call site table is immediately after the header
354 	lsda.call_site_table = (dw_eh_ptr_t)data;
355 
356 
357 	return lsda;
358 }
359 
360 /**
361  * Structure representing an action to be performed while unwinding.  This
362  * contains the address that should be unwound to and the action record that
363  * provoked this action.
364  */
365 struct dwarf_eh_action
366 {
367 	/**
368 	 * The address that this action directs should be the new program counter
369 	 * value after unwinding.
370 	 */
371 	dw_eh_ptr_t landing_pad;
372 	/// The address of the action record.
373 	dw_eh_ptr_t action_record;
374 };
375 
376 /**
377  * Look up the landing pad that corresponds to the current invoke.
378  * Returns true if record exists.  The context is provided by the generic
379  * unwind library and the lsda should be the result of a call to parse_lsda().
380  *
381  * The action record is returned via the result parameter.
382  */
383 static bool dwarf_eh_find_callsite(struct _Unwind_Context *context,
384                                    struct dwarf_eh_lsda *lsda,
385                                    struct dwarf_eh_action *result)
386 {
387 	result->action_record = 0;
388 	result->landing_pad = 0;
389 	// The current instruction pointer offset within the region
390 	uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context);
391 	unsigned char *callsite_table = (unsigned char*)lsda->call_site_table;
392 
393 	while (callsite_table <= lsda->action_table)
394 	{
395 		// Once again, the layout deviates from the spec.
396 		uint64_t call_site_start, call_site_size, landing_pad, action;
397 		call_site_start = read_value(lsda->callsite_encoding, &callsite_table);
398 		call_site_size = read_value(lsda->callsite_encoding, &callsite_table);
399 
400 		// Call site entries are sorted, so if we find a call site that's after
401 		// the current instruction pointer then there is no action associated
402 		// with this call and we should unwind straight through this frame
403 		// without doing anything.
404 		if (call_site_start > ip) { break; }
405 
406 		// Read the address of the landing pad and the action from the call
407 		// site table.
408 		landing_pad = read_value(lsda->callsite_encoding, &callsite_table);
409 		action = read_uleb128(&callsite_table);
410 
411 		// We should not include the call_site_start (beginning of the region)
412 		// address in the ip range. For each call site:
413 		//
414 		// address1: call proc
415 		// address2: next instruction
416 		//
417 		// The call stack contains address2 and not address1, address1 can be
418 		// at the end of another EH region.
419 		if (call_site_start < ip && ip <= call_site_start + call_site_size)
420 		{
421 			if (action)
422 			{
423 				// Action records are 1-biased so both no-record and zeroth
424 				// record can be stored.
425 				result->action_record = lsda->action_table + action - 1;
426 			}
427 			// No landing pad means keep unwinding.
428 			if (landing_pad)
429 			{
430 				// Landing pad is the offset from the value in the header
431 				result->landing_pad = lsda->landing_pads + landing_pad;
432 			}
433 			return true;
434 		}
435 	}
436 	return false;
437 }
438 
439 /// Defines an exception class from 8 bytes (endian independent)
440 #define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \
441 	(((uint64_t)a << 56) +\
442 	 ((uint64_t)b << 48) +\
443 	 ((uint64_t)c << 40) +\
444 	 ((uint64_t)d << 32) +\
445 	 ((uint64_t)e << 24) +\
446 	 ((uint64_t)f << 16) +\
447 	 ((uint64_t)g << 8) +\
448 	 ((uint64_t)h))
449 
450 #define GENERIC_EXCEPTION_CLASS(e,f,g,h) \
451 	 ((uint32_t)e << 24) +\
452 	 ((uint32_t)f << 16) +\
453 	 ((uint32_t)g << 8) +\
454 	 ((uint32_t)h)
455