xref: /titanic_52/usr/src/cmd/sgs/tools/common/leb128.c (revision 4c1177a46d4d850e30806d4e27d635527bba8e90)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <stdio.h>
28 #include <dwarf.h>
29 #include <sys/types.h>
30 #include <sys/elf.h>
31 
32 /*
33  * Little Endian Base 128 (LEB128) numbers.
34  * ----------------------------------------
35  *
36  * LEB128 is a scheme for encoding integers densely that exploits the
37  * assumption that most integers are small in magnitude. (This encoding
38  * is equally suitable whether the target machine architecture represents
39  * data in big-endian or little- endian
40  *
41  * Unsigned LEB128 numbers are encoded as follows: start at the low order
42  * end of an unsigned integer and chop it into 7-bit chunks. Place each
43  * chunk into the low order 7 bits of a byte. Typically, several of the
44  * high order bytes will be zero; discard them. Emit the remaining bytes in
45  * a stream, starting with the low order byte; set the high order bit on
46  * each byte except the last emitted byte. The high bit of zero on the last
47  * byte indicates to the decoder that it has encountered the last byte.
48  * The integer zero is a special case, consisting of a single zero byte.
49  *
50  * Signed, 2s complement LEB128 numbers are encoded in a similar except
51  * that the criterion for discarding high order bytes is not whether they
52  * are zero, but whether they consist entirely of sign extension bits.
53  * Consider the 32-bit integer -2. The three high level bytes of the number
54  * are sign extension, thus LEB128 would represent it as a single byte
55  * containing the low order 7 bits, with the high order bit cleared to
56  * indicate the end of the byte stream.
57  *
58  * Note that there is nothing within the LEB128 representation that
59  * indicates whether an encoded number is signed or unsigned. The decoder
60  * must know what type of number to expect.
61  *
62  * DWARF Exception Header Encoding
63  * -------------------------------
64  *
65  * The DWARF Exception Header Encoding is used to describe the type of data
66  * used in the .eh_frame_hdr section. The upper 4 bits indicate how the
67  * value is to be applied. The lower 4 bits indicate the format of the data.
68  *
69  * DWARF Exception Header value format
70  *
71  * Name		Value Meaning
72  * DW_EH_PE_omit	    0xff No value is present.
73  * DW_EH_PE_absptr	    0x00 Value is a void*
74  * DW_EH_PE_uleb128	    0x01 Unsigned value is encoded using the
75  *				 Little Endian Base 128 (LEB128)
76  * DW_EH_PE_udata2	    0x02 A 2 bytes unsigned value.
77  * DW_EH_PE_udata4	    0x03 A 4 bytes unsigned value.
78  * DW_EH_PE_udata8	    0x04 An 8 bytes unsigned value.
79  * DW_EH_PE_signed          0x08 bit on for all signed encodings
80  * DW_EH_PE_sleb128	    0x09 Signed value is encoded using the
81  *				 Little Endian Base 128 (LEB128)
82  * DW_EH_PE_sdata2	    0x0A A 2 bytes signed value.
83  * DW_EH_PE_sdata4	    0x0B A 4 bytes signed value.
84  * DW_EH_PE_sdata8	    0x0C An 8 bytes signed value.
85  *
86  * DWARF Exception Header application
87  *
88  * Name	    Value Meaning
89  * DW_EH_PE_absptr	   0x00 Value is used with no modification.
90  * DW_EH_PE_pcrel	   0x10 Value is reletive to the location of itself
91  * DW_EH_PE_textrel	   0x20
92  * DW_EH_PE_datarel	   0x30 Value is reletive to the beginning of the
93  *				eh_frame_hdr segment ( segment type
94  *			        PT_GNU_EH_FRAME )
95  * DW_EH_PE_funcrel        0x40
96  * DW_EH_PE_aligned        0x50 value is an aligned void*
97  * DW_EH_PE_indirect       0x80 bit to signal indirection after relocation
98  * DW_EH_PE_omit	   0xff No value is present.
99  *
100  */
101 
102 dwarf_error_t
103 uleb_extract(unsigned char *data, uint64_t *dotp, size_t len, uint64_t *ret)
104 {
105 	uint64_t	dot = *dotp;
106 	uint64_t	res = 0;
107 	int		more = 1;
108 	int		shift = 0;
109 	int		val;
110 
111 	data += dot;
112 
113 	while (more) {
114 		if (dot > len)
115 			return (DW_OVERFLOW);
116 
117 		/*
118 		 * Pull off lower 7 bits
119 		 */
120 		val = (*data) & 0x7f;
121 
122 		/*
123 		 * Add prepend value to head of number.
124 		 */
125 		res = res | (val << shift);
126 
127 		/*
128 		 * Increment shift & dot pointer
129 		 */
130 		shift += 7;
131 		dot++;
132 
133 		/*
134 		 * Check to see if hi bit is set - if not, this
135 		 * is the last byte.
136 		 */
137 		more = ((*data++) & 0x80) >> 7;
138 	}
139 	*dotp = dot;
140 	*ret = res;
141 	return (DW_SUCCESS);
142 }
143 
144 dwarf_error_t
145 sleb_extract(unsigned char *data, uint64_t *dotp, size_t len, int64_t *ret)
146 {
147 	uint64_t	dot = *dotp;
148 	int64_t		res = 0;
149 	int		more = 1;
150 	int		shift = 0;
151 	int		val;
152 
153 	data += dot;
154 
155 	while (more) {
156 		if (dot > len)
157 			return (DW_OVERFLOW);
158 
159 		/*
160 		 * Pull off lower 7 bits
161 		 */
162 		val = (*data) & 0x7f;
163 
164 		/*
165 		 * Add prepend value to head of number.
166 		 */
167 		res = res | (val << shift);
168 
169 		/*
170 		 * Increment shift & dot pointer
171 		 */
172 		shift += 7;
173 		dot++;
174 
175 		/*
176 		 * Check to see if hi bit is set - if not, this
177 		 * is the last byte.
178 		 */
179 		more = ((*data++) & 0x80) >> 7;
180 	}
181 	*dotp = dot;
182 
183 	/*
184 	 * Make sure value is properly sign extended.
185 	 */
186 	res = (res << (64 - shift)) >> (64 - shift);
187 	*ret = res;
188 	return (DW_SUCCESS);
189 }
190 
191 /*
192  * Extract a DWARF encoded datum
193  *
194  * entry:
195  *	data - Base of data buffer containing encoded bytes
196  *	dotp - Address of variable containing index within data
197  *		at which the desired datum starts.
198  *	ehe_flags - DWARF encoding
199  *	eident - ELF header e_ident[] array for object being processed
200  *	frame_hdr - Boolean, true if we're extracting from .eh_frame_hdr
201  *	sh_base - Base address of ELF section containing desired datum
202  *	sh_offset - Offset relative to sh_base of desired datum.
203  *	dbase - The base address to which DW_EH_PE_datarel is relative
204  *		(if frame_hdr is false)
205  */
206 dwarf_error_t
207 dwarf_ehe_extract(unsigned char *data, size_t len, uint64_t *dotp,
208     uint64_t *ret, uint_t ehe_flags, unsigned char *eident,
209     boolean_t frame_hdr, uint64_t sh_base, uint64_t sh_offset,
210     uint64_t dbase)
211 {
212 	uint64_t    dot = *dotp;
213 	uint_t	    lsb;
214 	uint_t	    wordsize;
215 	uint_t	    fsize;
216 	uint64_t    result;
217 
218 	if (eident[EI_DATA] == ELFDATA2LSB)
219 		lsb = 1;
220 	else
221 		lsb = 0;
222 
223 	if (eident[EI_CLASS] == ELFCLASS64)
224 		wordsize = 8;
225 	else
226 		wordsize = 4;
227 
228 	switch (ehe_flags & 0x0f) {
229 	case DW_EH_PE_omit:
230 		*ret = 0;
231 		return (DW_SUCCESS);
232 	case DW_EH_PE_absptr:
233 		fsize = wordsize;
234 		break;
235 	case DW_EH_PE_udata8:
236 	case DW_EH_PE_sdata8:
237 		fsize = 8;
238 		break;
239 	case DW_EH_PE_udata4:
240 	case DW_EH_PE_sdata4:
241 		fsize = 4;
242 		break;
243 	case DW_EH_PE_udata2:
244 	case DW_EH_PE_sdata2:
245 		fsize = 2;
246 		break;
247 	case DW_EH_PE_uleb128:
248 		return (uleb_extract(data, dotp, len, ret));
249 	case DW_EH_PE_sleb128:
250 		return (sleb_extract(data, dotp, len, (int64_t *)ret));
251 	default:
252 		*ret = 0;
253 		return (DW_BAD_ENCODING);
254 	}
255 
256 	if (lsb) {
257 		/*
258 		 * Extract unaligned LSB formated data
259 		 */
260 		uint_t	cnt;
261 
262 		result = 0;
263 		for (cnt = 0; cnt < fsize;
264 		    cnt++, dot++) {
265 			uint64_t val;
266 
267 			if (dot > len)
268 				return (DW_OVERFLOW);
269 			val = data[dot];
270 			result |= val << (cnt * 8);
271 		}
272 	} else {
273 		/*
274 		 * Extract unaligned MSB formated data
275 		 */
276 		uint_t	cnt;
277 		result = 0;
278 		for (cnt = 0; cnt < fsize;
279 		    cnt++, dot++) {
280 			uint64_t val;
281 
282 			if (dot > len)
283 				return (DW_OVERFLOW);
284 			val = data[dot];
285 			result |= val << ((fsize - cnt - 1) * 8);
286 		}
287 	}
288 	/*
289 	 * perform sign extension
290 	 */
291 	if ((ehe_flags & DW_EH_PE_signed) &&
292 	    (fsize < sizeof (uint64_t))) {
293 		int64_t	sresult;
294 		uint_t	bitshift;
295 		sresult = result;
296 		bitshift = (sizeof (uint64_t) - fsize) * 8;
297 		sresult = (sresult << bitshift) >> bitshift;
298 		result = sresult;
299 	}
300 
301 	/*
302 	 * If value is relative to a base address, adjust it
303 	 */
304 	switch (ehe_flags & 0xf0) {
305 	case DW_EH_PE_pcrel:
306 		result += sh_base + sh_offset;
307 		break;
308 
309 	/*
310 	 * datarel is relative to .eh_frame_hdr if within .eh_frame,
311 	 * but GOT if not.
312 	 */
313 	case DW_EH_PE_datarel:
314 		if (frame_hdr)
315 			result += sh_base;
316 		else
317 			result += dbase;
318 		break;
319 	}
320 
321 	/* Truncate the result to its specified size */
322 	result = (result << ((sizeof (uint64_t) - fsize) * 8)) >>
323 	    ((sizeof (uint64_t) - fsize) * 8);
324 
325 	*dotp = dot;
326 	*ret = result;
327 	return (DW_SUCCESS);
328 }
329