xref: /titanic_44/usr/src/cmd/sgs/tools/common/leb128.c (revision 18c2aff776a775d34a4c9893a4c72e0434d68e36)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <stdio.h>
30 #include <dwarf.h>
31 #include <sys/types.h>
32 #include <sys/elf.h>
33 
34 /*
35  * Little Endian Base 128 (LEB128) numbers.
36  * ----------------------------------------
37  *
38  * LEB128 is a scheme for encoding integers densely that exploits the
39  * assumption that most integers are small in magnitude. (This encoding
40  * is equally suitable whether the target machine architecture represents
41  * data in big-endian or little- endian
42  *
43  * Unsigned LEB128 numbers are encoded as follows: start at the low order
44  * end of an unsigned integer and chop it into 7-bit chunks. Place each
45  * chunk into the low order 7 bits of a byte. Typically, several of the
46  * high order bytes will be zero; discard them. Emit the remaining bytes in
47  * a stream, starting with the low order byte; set the high order bit on
48  * each byte except the last emitted byte. The high bit of zero on the last
49  * byte indicates to the decoder that it has encountered the last byte.
50  * The integer zero is a special case, consisting of a single zero byte.
51  *
52  * Signed, 2s complement LEB128 numbers are encoded in a similar except
53  * that the criterion for discarding high order bytes is not whether they
54  * are zero, but whether they consist entirely of sign extension bits.
55  * Consider the 32-bit integer -2. The three high level bytes of the number
56  * are sign extension, thus LEB128 would represent it as a single byte
57  * containing the low order 7 bits, with the high order bit cleared to
58  * indicate the end of the byte stream.
59  *
60  * Note that there is nothing within the LEB128 representation that
61  * indicates whether an encoded number is signed or unsigned. The decoder
62  * must know what type of number to expect.
63  *
64  * DWARF Exception Header Encoding
65  * -------------------------------
66  *
67  * The DWARF Exception Header Encoding is used to describe the type of data
68  * used in the .eh_frame_hdr section. The upper 4 bits indicate how the
69  * value is to be applied. The lower 4 bits indicate the format of the data.
70  *
71  * DWARF Exception Header value format
72  *
73  * Name		Value Meaning
74  * DW_EH_PE_omit	    0xff No value is present.
75  * DW_EH_PE_absptr	    0x00 Value is a void*
76  * DW_EH_PE_uleb128	    0x01 Unsigned value is encoded using the
77  *				 Little Endian Base 128 (LEB128)
78  * DW_EH_PE_udata2	    0x02 A 2 bytes unsigned value.
79  * DW_EH_PE_udata4	    0x03 A 4 bytes unsigned value.
80  * DW_EH_PE_udata8	    0x04 An 8 bytes unsigned value.
81  * DW_EH_PE_signed          0x08 bit on for all signed encodings
82  * DW_EH_PE_sleb128	    0x09 Signed value is encoded using the
83  *				 Little Endian Base 128 (LEB128)
84  * DW_EH_PE_sdata2	    0x0A A 2 bytes signed value.
85  * DW_EH_PE_sdata4	    0x0B A 4 bytes signed value.
86  * DW_EH_PE_sdata8	    0x0C An 8 bytes signed value.
87  *
88  * DWARF Exception Header application
89  *
90  * Name	    Value Meaning
91  * DW_EH_PE_absptr	   0x00 Value is used with no modification.
92  * DW_EH_PE_pcrel	   0x10 Value is reletive to the location of itself
93  * DW_EH_PE_textrel	   0x20
94  * DW_EH_PE_datarel	   0x30 Value is reletive to the beginning of the
95  *				eh_frame_hdr segment ( segment type
96  *			        PT_GNU_EH_FRAME )
97  * DW_EH_PE_funcrel        0x40
98  * DW_EH_PE_aligned        0x50 value is an aligned void*
99  * DW_EH_PE_indirect       0x80 bit to signal indirection after relocation
100  * DW_EH_PE_omit	   0xff No value is present.
101  *
102  */
103 
104 uint64_t
105 uleb_extract(unsigned char *data, uint64_t *dotp)
106 {
107 	uint64_t	dot = *dotp;
108 	uint64_t	res = 0;
109 	int		more = 1;
110 	int		shift = 0;
111 	int		val;
112 
113 	data += dot;
114 
115 	while (more) {
116 		/*
117 		 * Pull off lower 7 bits
118 		 */
119 		val = (*data) & 0x7f;
120 
121 		/*
122 		 * Add prepend value to head of number.
123 		 */
124 		res = res | (val << shift);
125 
126 		/*
127 		 * Increment shift & dot pointer
128 		 */
129 		shift += 7;
130 		dot++;
131 
132 		/*
133 		 * Check to see if hi bit is set - if not, this
134 		 * is the last byte.
135 		 */
136 		more = ((*data++) & 0x80) >> 7;
137 	}
138 	*dotp = dot;
139 	return (res);
140 }
141 
142 int64_t
143 sleb_extract(unsigned char *data, uint64_t *dotp)
144 {
145 	uint64_t	dot = *dotp;
146 	int64_t		res = 0;
147 	int		more = 1;
148 	int		shift = 0;
149 	int		val;
150 
151 	data += dot;
152 
153 	while (more) {
154 		/*
155 		 * Pull off lower 7 bits
156 		 */
157 		val = (*data) & 0x7f;
158 
159 		/*
160 		 * Add prepend value to head of number.
161 		 */
162 		res = res | (val << shift);
163 
164 		/*
165 		 * Increment shift & dot pointer
166 		 */
167 		shift += 7;
168 		dot++;
169 
170 		/*
171 		 * Check to see if hi bit is set - if not, this
172 		 * is the last byte.
173 		 */
174 		more = ((*data++) & 0x80) >> 7;
175 	}
176 	*dotp = dot;
177 
178 	/*
179 	 * Make sure value is properly sign extended.
180 	 */
181 	res = (res << (64 - shift)) >> (64 - shift);
182 
183 	return (res);
184 }
185 
186 uint64_t
187 dwarf_ehe_extract(unsigned char *data, uint64_t *dotp, uint_t ehe_flags,
188     unsigned char *eident, uint64_t pcaddr)
189 {
190 	uint64_t    dot = *dotp;
191 	uint_t	    lsb;
192 	uint_t	    wordsize;
193 	uint_t	    fsize;
194 	uint64_t    result;
195 
196 	if (eident[EI_DATA] == ELFDATA2LSB)
197 		lsb = 1;
198 	else
199 		lsb = 0;
200 
201 	if (eident[EI_CLASS] == ELFCLASS64)
202 		wordsize = 8;
203 	else
204 		wordsize = 4;
205 
206 	switch (ehe_flags & 0x0f) {
207 	case DW_EH_PE_omit:
208 		return (0);
209 	case DW_EH_PE_absptr:
210 		fsize = wordsize;
211 		break;
212 	case DW_EH_PE_udata8:
213 	case DW_EH_PE_sdata8:
214 		fsize = 8;
215 		break;
216 	case DW_EH_PE_udata4:
217 	case DW_EH_PE_sdata4:
218 		fsize = 4;
219 		break;
220 	case DW_EH_PE_udata2:
221 	case DW_EH_PE_sdata2:
222 		fsize = 2;
223 		break;
224 	case DW_EH_PE_uleb128:
225 		return (uleb_extract(data, dotp));
226 	case DW_EH_PE_sleb128:
227 		return ((uint64_t)sleb_extract(data, dotp));
228 	default:
229 		return (0);
230 	}
231 
232 	if (lsb) {
233 		/*
234 		 * Extract unaligned LSB formated data
235 		 */
236 		uint_t	cnt;
237 
238 		result = 0;
239 		for (cnt = 0; cnt < fsize;
240 		    cnt++, dot++) {
241 			uint64_t val;
242 			val = data[dot];
243 			result |= val << (cnt * 8);
244 		}
245 	} else {
246 		/*
247 		 * Extract unaligned MSB formated data
248 		 */
249 		uint_t	cnt;
250 		result = 0;
251 		for (cnt = 0; cnt < fsize;
252 		    cnt++, dot++) {
253 			uint64_t	val;
254 			val = data[dot];
255 			result |= val << ((fsize - cnt - 1) * 8);
256 		}
257 	}
258 	/*
259 	 * perform sign extension
260 	 */
261 	if ((ehe_flags & DW_EH_PE_signed) &&
262 	    (fsize < sizeof (uint64_t))) {
263 		int64_t	sresult;
264 		uint_t	bitshift;
265 		sresult = result;
266 		bitshift = (sizeof (uint64_t) - fsize) * 8;
267 		sresult = (sresult << bitshift) >> bitshift;
268 		result = sresult;
269 	}
270 
271 	/*
272 	 * If pcrel and we have a value (ie: we've been
273 	 * relocated), then adjust the value.
274 	 */
275 	if (result && (ehe_flags & DW_EH_PE_pcrel)) {
276 		result = pcaddr + result;
277 	}
278 	*dotp = dot;
279 	return (result);
280 }
281