xref: /illumos-gate/usr/src/cmd/sgs/libelf/common/getarsym.c (revision 187670a04e7557914566fc449b4d3af38caea282)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1988 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 #include <stdlib.h>
30 #include <errno.h>
31 #include <libelf.h>
32 #include "decl.h"
33 #include "msg.h"
34 
35 
36 /*
37  * Convert archive symbol table to memory format
38  *
39  * This takes a pointer to file's archive symbol table, alignment
40  * unconstrained.  Returns null terminated vector of Elf_Arsym
41  * structures. Elf_Arsym uses size_t to represent offsets, which
42  * will be 32-bit in 32-bit versions, and 64-bits otherwise.
43  *
44  * There are two forms of archive symbol table, the original 32-bit
45  * form, and a 64-bit form originally found in IRIX64. The two formats
46  * differ only in the width of the integer word:
47  *
48  *		# offsets	4/8-byte word
49  *		offset[0...]	4/8-byte word each
50  *		strings		null-terminated, for offset[x]
51  *
52  * By default, the 64-bit form is only used when the archive exceeds
53  * the limits of 32-bits (4GB) in size. However, this is not required,
54  * and the ar -S option can be used to create a 64-bit symbol table in
55  * an archive that is under 4GB.
56  *
57  * Both 32 and 64-bit versions of libelf can read the 32-bit format
58  * without loss of information. Similarly, a 64-bit version of libelf
59  * will have no problem reading a 64-bit symbol table. This leaves the
60  * case where a 32-bit libelf reads a 64-bit symbol table, which requires
61  * some explanation. The offsets in a 64-bit symbol table will have zeros
62  * in the upper half of the words until the size of the archive exceeds 4GB.
63  * However, 32-bit libelf is unable to read any files larger than 2GB
64  * (see comments in update.c). As such, any archive that the 32-bit version
65  * of this code will encounter will be under 4GB in size. The upper 4
66  * bytes of each word will be zero, and can be safely ignored.
67  */
68 
69 
70 /*
71  * Offsets in archive headers are written in MSB (large endian) order
72  * on all platforms, regardless of native byte order. These macros read
73  * 4 and 8 byte values from unaligned memory.
74  *
75  * note:
76  * -	The get8() macro for 32-bit code can ignore the first 4 bytes of
77  *	of the word, because they are known to be 0.
78  *
79  * -	The inner most value in these macros is cast to an unsigned integer
80  *	of the final width in order to prevent the C comilier from doing
81  *	unwanted sign extension when the topmost bit of a byte is set.
82  */
83 #define	get4(p)	(((((((uint32_t)p[0]<<8)+p[1])<<8)+p[2])<<8)+p[3])
84 
85 #ifdef _LP64
86 #define	get8(p)	(((((((((((((((uint64_t)p[0]<<8)+p[1])<<8)+p[2])<<8)+	\
87     p[3])<<8)+p[4])<<8)+p[5])<<8)+p[6])<<8)+p[7])
88 #else
89 #define	get8(p)	(((((((uint64_t)p[4]<<8)+p[5])<<8)+p[6])<<8)+p[7])
90 #endif
91 
92 
93 static Elf_Void *
94 arsym(Byte *off, size_t sz, size_t *e, int is64)
95 {
96 	char		*endstr = (char *)off + sz;
97 	register char	*str;
98 	Byte		*endoff;
99 	Elf_Void	*oas;
100 	size_t		eltsize = is64 ? 8 : 4;
101 
102 	{
103 		register size_t	n;
104 
105 		if (is64) {
106 			if (sz < 8 || (sz - 8) / 8 < (n = get8(off))) {
107 				_elf_seterr(EFMT_ARSYMSZ, 0);
108 				return (NULL);
109 			}
110 		} else {
111 			if (sz < 4 || (sz - 4) / 4 < (n = get4(off))) {
112 				_elf_seterr(EFMT_ARSYMSZ, 0);
113 				return (NULL);
114 			}
115 		}
116 		off += eltsize;
117 		endoff = off + n * eltsize;
118 
119 		/*
120 		 * If there are symbols in the symbol table, a
121 		 * string table must be present and NULL terminated.
122 		 *
123 		 * The format dictates that the string table must always be
124 		 * present, however in the case of an archive containing no
125 		 * symbols GNU ar will not create one.  We are permissive for
126 		 * the sake of compatibility.
127 		 */
128 		if ((n > 0) && (((str = (char *)endoff) >= endstr) ||
129 		    (*(endstr - 1) != '\0'))) {
130 			_elf_seterr(EFMT_ARSYM, 0);
131 			return (NULL);
132 		}
133 
134 		/*
135 		 * There is always at least one entry returned if a symtab
136 		 * exists since the table's last entry is an artificial one
137 		 * with a NULL as_name, but is included in the count.
138 		 *
139 		 * overflow can occur here, but not likely
140 		 */
141 		*e = n + 1;
142 		if ((oas = calloc(n + 1, sizeof (Elf_Arsym))) == NULL) {
143 			_elf_seterr(EMEM_ARSYM, errno);
144 			return (NULL);
145 		}
146 	}
147 	{
148 		register Elf_Arsym	*as = (Elf_Arsym *)oas;
149 
150 		while (off < endoff) {
151 			if (str >= endstr) {
152 				_elf_seterr(EFMT_ARSYMSTR, 0);
153 				free(oas);
154 				return (NULL);
155 			}
156 			if (is64)
157 				as->as_off = get8(off);
158 			else
159 				as->as_off = get4(off);
160 			as->as_name = str;
161 			as->as_hash = elf_hash(str);
162 			++as;
163 			off += eltsize;
164 			while (*str++ != '\0')
165 				/* LINTED */
166 				;
167 		}
168 		as->as_name = NULL;
169 		as->as_off = 0;
170 		as->as_hash = ~(unsigned long)0L;
171 	}
172 	return (oas);
173 }
174 
175 
176 Elf_Arsym *
177 elf_getarsym(Elf *elf, size_t *ptr)
178 {
179 	Byte		*as;
180 	size_t		sz;
181 	Elf_Arsym	*rc;
182 	int		is64;
183 
184 	if (ptr != 0)
185 		*ptr = 0;
186 	if (elf == NULL)
187 		return (0);
188 	ELFRLOCK(elf);
189 	if (elf->ed_kind != ELF_K_AR) {
190 		ELFUNLOCK(elf);
191 		_elf_seterr(EREQ_AR, 0);
192 		return (0);
193 	}
194 	if ((as = (Byte *)elf->ed_arsym) == 0) {
195 		ELFUNLOCK(elf);
196 		return (0);
197 	}
198 	if (elf->ed_myflags & EDF_ASALLOC) {
199 		if (ptr != 0)
200 			*ptr = elf->ed_arsymsz;
201 		ELFUNLOCK(elf);
202 		/* LINTED */
203 		return ((Elf_Arsym *)as);
204 	}
205 	is64 = (elf->ed_myflags & EDF_ARSYM64) != 0;
206 
207 	/*
208 	 * We're gonna need a write lock.
209 	 */
210 	ELFUNLOCK(elf)
211 	ELFWLOCK(elf)
212 	sz = elf->ed_arsymsz;
213 	if (_elf_vm(elf, (size_t)(as - (Byte *)elf->ed_ident), sz) !=
214 	    OK_YES) {
215 		ELFUNLOCK(elf);
216 		return (0);
217 	}
218 	if ((elf->ed_arsym = arsym(as, sz, &elf->ed_arsymsz, is64)) == 0) {
219 		ELFUNLOCK(elf);
220 		return (0);
221 	}
222 	elf->ed_myflags |= EDF_ASALLOC;
223 	if (ptr != 0)
224 		*ptr = elf->ed_arsymsz;
225 	rc = (Elf_Arsym *)elf->ed_arsym;
226 	ELFUNLOCK(elf);
227 	return (rc);
228 }
229 
230 /*
231  * Private function to obtain the value sizeof() would return
232  * for a word from the symbol table from the given archive. Normally,
233  * this is an unimportant implementation detail hidden within
234  * elf_getarsym(). However, it is useful to elfdump for formatting the
235  * output correctly, and for the file command.
236  *
237  * exit:
238  *	Returns 4 (32-bit) or 8 (64-bit) if a symbol table is present.
239  *	Returns 0 in all other cases.
240  */
241 size_t
242 _elf_getarsymwordsize(Elf *elf)
243 {
244 	size_t	size;
245 
246 	if (elf == NULL)
247 		return (0);
248 
249 	ELFRLOCK(elf);
250 	if ((elf->ed_kind == ELF_K_AR) && (elf->ed_arsym != 0))
251 		size = (elf->ed_myflags & EDF_ARSYM64) ? 8 : 4;
252 	else
253 		size = 0;
254 	ELFUNLOCK(elf);
255 
256 	return (size);
257 }
258