xref: /linux/tools/lib/bpf/elf.c (revision 02091cbe9cc4f18167208eec1d6de636cc731817)
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 
3 #include <libelf.h>
4 #include <gelf.h>
5 #include <fcntl.h>
6 #include <linux/kernel.h>
7 
8 #include "libbpf_internal.h"
9 #include "str_error.h"
10 
11 #define STRERR_BUFSIZE  128
12 
13 int elf_open(const char *binary_path, struct elf_fd *elf_fd)
14 {
15 	char errmsg[STRERR_BUFSIZE];
16 	int fd, ret;
17 	Elf *elf;
18 
19 	if (elf_version(EV_CURRENT) == EV_NONE) {
20 		pr_warn("elf: failed to init libelf for %s\n", binary_path);
21 		return -LIBBPF_ERRNO__LIBELF;
22 	}
23 	fd = open(binary_path, O_RDONLY | O_CLOEXEC);
24 	if (fd < 0) {
25 		ret = -errno;
26 		pr_warn("elf: failed to open %s: %s\n", binary_path,
27 			libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
28 		return ret;
29 	}
30 	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
31 	if (!elf) {
32 		pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
33 		close(fd);
34 		return -LIBBPF_ERRNO__FORMAT;
35 	}
36 	elf_fd->fd = fd;
37 	elf_fd->elf = elf;
38 	return 0;
39 }
40 
41 void elf_close(struct elf_fd *elf_fd)
42 {
43 	if (!elf_fd)
44 		return;
45 	elf_end(elf_fd->elf);
46 	close(elf_fd->fd);
47 }
48 
49 /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
50 static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
51 {
52 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
53 		GElf_Shdr sh;
54 
55 		if (!gelf_getshdr(scn, &sh))
56 			continue;
57 		if (sh.sh_type == sh_type)
58 			return scn;
59 	}
60 	return NULL;
61 }
62 
63 struct elf_sym {
64 	const char *name;
65 	GElf_Sym sym;
66 	GElf_Shdr sh;
67 };
68 
69 struct elf_sym_iter {
70 	Elf *elf;
71 	Elf_Data *syms;
72 	size_t nr_syms;
73 	size_t strtabidx;
74 	size_t next_sym_idx;
75 	struct elf_sym sym;
76 	int st_type;
77 };
78 
79 static int elf_sym_iter_new(struct elf_sym_iter *iter,
80 			    Elf *elf, const char *binary_path,
81 			    int sh_type, int st_type)
82 {
83 	Elf_Scn *scn = NULL;
84 	GElf_Ehdr ehdr;
85 	GElf_Shdr sh;
86 
87 	memset(iter, 0, sizeof(*iter));
88 
89 	if (!gelf_getehdr(elf, &ehdr)) {
90 		pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
91 		return -EINVAL;
92 	}
93 
94 	scn = elf_find_next_scn_by_type(elf, sh_type, NULL);
95 	if (!scn) {
96 		pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
97 			 binary_path);
98 		return -ENOENT;
99 	}
100 
101 	if (!gelf_getshdr(scn, &sh))
102 		return -EINVAL;
103 
104 	iter->strtabidx = sh.sh_link;
105 	iter->syms = elf_getdata(scn, 0);
106 	if (!iter->syms) {
107 		pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
108 			binary_path, elf_errmsg(-1));
109 		return -EINVAL;
110 	}
111 	iter->nr_syms = iter->syms->d_size / sh.sh_entsize;
112 	iter->elf = elf;
113 	iter->st_type = st_type;
114 	return 0;
115 }
116 
117 static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
118 {
119 	struct elf_sym *ret = &iter->sym;
120 	GElf_Sym *sym = &ret->sym;
121 	const char *name = NULL;
122 	Elf_Scn *sym_scn;
123 	size_t idx;
124 
125 	for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) {
126 		if (!gelf_getsym(iter->syms, idx, sym))
127 			continue;
128 		if (GELF_ST_TYPE(sym->st_info) != iter->st_type)
129 			continue;
130 		name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name);
131 		if (!name)
132 			continue;
133 		sym_scn = elf_getscn(iter->elf, sym->st_shndx);
134 		if (!sym_scn)
135 			continue;
136 		if (!gelf_getshdr(sym_scn, &ret->sh))
137 			continue;
138 
139 		iter->next_sym_idx = idx + 1;
140 		ret->name = name;
141 		return ret;
142 	}
143 
144 	return NULL;
145 }
146 
147 
148 /* Transform symbol's virtual address (absolute for binaries and relative
149  * for shared libs) into file offset, which is what kernel is expecting
150  * for uprobe/uretprobe attachment.
151  * See Documentation/trace/uprobetracer.rst for more details. This is done
152  * by looking up symbol's containing section's header and using iter's virtual
153  * address (sh_addr) and corresponding file offset (sh_offset) to transform
154  * sym.st_value (virtual address) into desired final file offset.
155  */
156 static unsigned long elf_sym_offset(struct elf_sym *sym)
157 {
158 	return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset;
159 }
160 
161 /* Find offset of function name in the provided ELF object. "binary_path" is
162  * the path to the ELF binary represented by "elf", and only used for error
163  * reporting matters. "name" matches symbol name or name@@LIB for library
164  * functions.
165  */
166 long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
167 {
168 	int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
169 	bool is_shared_lib, is_name_qualified;
170 	long ret = -ENOENT;
171 	size_t name_len;
172 	GElf_Ehdr ehdr;
173 
174 	if (!gelf_getehdr(elf, &ehdr)) {
175 		pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
176 		ret = -LIBBPF_ERRNO__FORMAT;
177 		goto out;
178 	}
179 	/* for shared lib case, we do not need to calculate relative offset */
180 	is_shared_lib = ehdr.e_type == ET_DYN;
181 
182 	name_len = strlen(name);
183 	/* Does name specify "@@LIB"? */
184 	is_name_qualified = strstr(name, "@@") != NULL;
185 
186 	/* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
187 	 * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
188 	 * linked binary may not have SHT_DYMSYM, so absence of a section should not be
189 	 * reported as a warning/error.
190 	 */
191 	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
192 		struct elf_sym_iter iter;
193 		struct elf_sym *sym;
194 		int last_bind = -1;
195 		int cur_bind;
196 
197 		ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
198 		if (ret == -ENOENT)
199 			continue;
200 		if (ret)
201 			goto out;
202 
203 		while ((sym = elf_sym_iter_next(&iter))) {
204 			/* User can specify func, func@@LIB or func@@LIB_VERSION. */
205 			if (strncmp(sym->name, name, name_len) != 0)
206 				continue;
207 			/* ...but we don't want a search for "foo" to match 'foo2" also, so any
208 			 * additional characters in sname should be of the form "@@LIB".
209 			 */
210 			if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@')
211 				continue;
212 
213 			cur_bind = GELF_ST_BIND(sym->sym.st_info);
214 
215 			if (ret > 0) {
216 				/* handle multiple matches */
217 				if (last_bind != STB_WEAK && cur_bind != STB_WEAK) {
218 					/* Only accept one non-weak bind. */
219 					pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
220 						sym->name, name, binary_path);
221 					ret = -LIBBPF_ERRNO__FORMAT;
222 					goto out;
223 				} else if (cur_bind == STB_WEAK) {
224 					/* already have a non-weak bind, and
225 					 * this is a weak bind, so ignore.
226 					 */
227 					continue;
228 				}
229 			}
230 
231 			ret = elf_sym_offset(sym);
232 			last_bind = cur_bind;
233 		}
234 		if (ret > 0)
235 			break;
236 	}
237 
238 	if (ret > 0) {
239 		pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
240 			 ret);
241 	} else {
242 		if (ret == 0) {
243 			pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
244 				is_shared_lib ? "should not be 0 in a shared library" :
245 						"try using shared library path instead");
246 			ret = -ENOENT;
247 		} else {
248 			pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
249 		}
250 	}
251 out:
252 	return ret;
253 }
254 
255 /* Find offset of function name in ELF object specified by path. "name" matches
256  * symbol name or name@@LIB for library functions.
257  */
258 long elf_find_func_offset_from_file(const char *binary_path, const char *name)
259 {
260 	struct elf_fd elf_fd;
261 	long ret = -ENOENT;
262 
263 	ret = elf_open(binary_path, &elf_fd);
264 	if (ret)
265 		return ret;
266 	ret = elf_find_func_offset(elf_fd.elf, binary_path, name);
267 	elf_close(&elf_fd);
268 	return ret;
269 }
270 
271 struct symbol {
272 	const char *name;
273 	int bind;
274 	int idx;
275 };
276 
277 static int symbol_cmp(const void *a, const void *b)
278 {
279 	const struct symbol *sym_a = a;
280 	const struct symbol *sym_b = b;
281 
282 	return strcmp(sym_a->name, sym_b->name);
283 }
284 
285 /*
286  * Return offsets in @poffsets for symbols specified in @syms array argument.
287  * On success returns 0 and offsets are returned in allocated array with @cnt
288  * size, that needs to be released by the caller.
289  */
290 int elf_resolve_syms_offsets(const char *binary_path, int cnt,
291 			     const char **syms, unsigned long **poffsets)
292 {
293 	int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
294 	int err = 0, i, cnt_done = 0;
295 	unsigned long *offsets;
296 	struct symbol *symbols;
297 	struct elf_fd elf_fd;
298 
299 	err = elf_open(binary_path, &elf_fd);
300 	if (err)
301 		return err;
302 
303 	offsets = calloc(cnt, sizeof(*offsets));
304 	symbols = calloc(cnt, sizeof(*symbols));
305 
306 	if (!offsets || !symbols) {
307 		err = -ENOMEM;
308 		goto out;
309 	}
310 
311 	for (i = 0; i < cnt; i++) {
312 		symbols[i].name = syms[i];
313 		symbols[i].idx = i;
314 	}
315 
316 	qsort(symbols, cnt, sizeof(*symbols), symbol_cmp);
317 
318 	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
319 		struct elf_sym_iter iter;
320 		struct elf_sym *sym;
321 
322 		err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC);
323 		if (err == -ENOENT)
324 			continue;
325 		if (err)
326 			goto out;
327 
328 		while ((sym = elf_sym_iter_next(&iter))) {
329 			unsigned long sym_offset = elf_sym_offset(sym);
330 			int bind = GELF_ST_BIND(sym->sym.st_info);
331 			struct symbol *found, tmp = {
332 				.name = sym->name,
333 			};
334 			unsigned long *offset;
335 
336 			found = bsearch(&tmp, symbols, cnt, sizeof(*symbols), symbol_cmp);
337 			if (!found)
338 				continue;
339 
340 			offset = &offsets[found->idx];
341 			if (*offset > 0) {
342 				/* same offset, no problem */
343 				if (*offset == sym_offset)
344 					continue;
345 				/* handle multiple matches */
346 				if (found->bind != STB_WEAK && bind != STB_WEAK) {
347 					/* Only accept one non-weak bind. */
348 					pr_warn("elf: ambiguous match found '%s@%lu' in '%s' previous offset %lu\n",
349 						sym->name, sym_offset, binary_path, *offset);
350 					err = -ESRCH;
351 					goto out;
352 				} else if (bind == STB_WEAK) {
353 					/* already have a non-weak bind, and
354 					 * this is a weak bind, so ignore.
355 					 */
356 					continue;
357 				}
358 			} else {
359 				cnt_done++;
360 			}
361 			*offset = sym_offset;
362 			found->bind = bind;
363 		}
364 	}
365 
366 	if (cnt != cnt_done) {
367 		err = -ENOENT;
368 		goto out;
369 	}
370 
371 	*poffsets = offsets;
372 
373 out:
374 	free(symbols);
375 	if (err)
376 		free(offsets);
377 	elf_close(&elf_fd);
378 	return err;
379 }
380 
381 /*
382  * Return offsets in @poffsets for symbols specified by @pattern argument.
383  * On success returns 0 and offsets are returned in allocated @poffsets
384  * array with the @pctn size, that needs to be released by the caller.
385  */
386 int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
387 				unsigned long **poffsets, size_t *pcnt)
388 {
389 	int sh_types[2] = { SHT_SYMTAB, SHT_DYNSYM };
390 	unsigned long *offsets = NULL;
391 	size_t cap = 0, cnt = 0;
392 	struct elf_fd elf_fd;
393 	int err = 0, i;
394 
395 	err = elf_open(binary_path, &elf_fd);
396 	if (err)
397 		return err;
398 
399 	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
400 		struct elf_sym_iter iter;
401 		struct elf_sym *sym;
402 
403 		err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC);
404 		if (err == -ENOENT)
405 			continue;
406 		if (err)
407 			goto out;
408 
409 		while ((sym = elf_sym_iter_next(&iter))) {
410 			if (!glob_match(sym->name, pattern))
411 				continue;
412 
413 			err = libbpf_ensure_mem((void **) &offsets, &cap, sizeof(*offsets),
414 						cnt + 1);
415 			if (err)
416 				goto out;
417 
418 			offsets[cnt++] = elf_sym_offset(sym);
419 		}
420 
421 		/* If we found anything in the first symbol section,
422 		 * do not search others to avoid duplicates.
423 		 */
424 		if (cnt)
425 			break;
426 	}
427 
428 	if (cnt) {
429 		*poffsets = offsets;
430 		*pcnt = cnt;
431 	} else {
432 		err = -ENOENT;
433 	}
434 
435 out:
436 	if (err)
437 		free(offsets);
438 	elf_close(&elf_fd);
439 	return err;
440 }
441