xref: /illumos-gate/usr/src/lib/libproc/common/Psymtab.c (revision e8c318c6e8009fecb0467195d49b1b331695efa2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2016 Joyent, Inc.
25  * Copyright (c) 2013 by Delphix. All rights reserved.
26  * Copyright 2023 Oxide Computer Company
27  */
28 
29 #include <assert.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stddef.h>
33 #include <unistd.h>
34 #include <ctype.h>
35 #include <fcntl.h>
36 #include <string.h>
37 #include <strings.h>
38 #include <memory.h>
39 #include <errno.h>
40 #include <dirent.h>
41 #include <signal.h>
42 #include <limits.h>
43 #include <libgen.h>
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <sys/sysmacros.h>
47 #include <sys/crc32.h>
48 
49 #include "libproc.h"
50 #include "Pcontrol.h"
51 #include "Putil.h"
52 #include "Psymtab_machelf.h"
53 
54 static file_info_t *build_map_symtab(struct ps_prochandle *, map_info_t *);
55 static map_info_t *exec_map(struct ps_prochandle *);
56 static map_info_t *object_to_map(struct ps_prochandle *, Lmid_t, const char *);
57 static map_info_t *object_name_to_map(struct ps_prochandle *,
58 	Lmid_t, const char *);
59 static GElf_Sym *sym_by_name(sym_tbl_t *, const char *, GElf_Sym *, uint_t *);
60 static int read_ehdr32(struct ps_prochandle *, Elf32_Ehdr *, uint_t *,
61     uintptr_t);
62 #ifdef _LP64
63 static int read_ehdr64(struct ps_prochandle *, Elf64_Ehdr *, uint_t *,
64     uintptr_t);
65 #endif
66 static uint32_t psym_crc32[] = { CRC32_TABLE };
67 
68 #define	DATA_TYPES	\
69 	((1 << STT_OBJECT) | (1 << STT_FUNC) | \
70 	(1 << STT_COMMON) | (1 << STT_TLS))
71 #define	IS_DATA_TYPE(tp)	(((1 << (tp)) & DATA_TYPES) != 0)
72 
73 #define	MA_RWX	(MA_READ | MA_WRITE | MA_EXEC)
74 
75 /*
76  * Minimum and maximum length of a build-id that we'll accept. Generally it's a
77  * 20 byte SHA1 and it's expected that the first byte (which is two ascii
78  * characters) indicates a directory and the remaining bytes become the file
79  * name. Therefore, our minimum length is at least 2 bytes (one for the
80  * directory and one for the name) and the max is a bit over the minimum -- 64,
81  * just in case folks do something odd. The string length is three times the max
82  * length. This accounts for the fact that each byte is two characters, a null
83  * terminator, and the directory '/' character.
84  */
85 #define	MINBUILDID	2
86 #define	MAXBUILDID	64
87 #define	BUILDID_STRLEN	(3*MAXBUILDID)
88 #define	BUILDID_NAME	".note.gnu.build-id"
89 #define	DBGLINK_NAME	".gnu_debuglink"
90 
91 typedef enum {
92 	PRO_NATURAL,
93 	PRO_BYADDR,
94 	PRO_BYNAME
95 } pr_order_t;
96 
97 static int
addr_cmp(const void * aa,const void * bb)98 addr_cmp(const void *aa, const void *bb)
99 {
100 	uintptr_t a = *((uintptr_t *)aa);
101 	uintptr_t b = *((uintptr_t *)bb);
102 
103 	if (a > b)
104 		return (1);
105 	if (a < b)
106 		return (-1);
107 	return (0);
108 }
109 
110 /*
111  * This function creates a list of addresses for a load object's sections.
112  * The list is in ascending address order and alternates start address
113  * then end address for each section we're interested in. The function
114  * returns a pointer to the list, which must be freed by the caller.
115  */
116 static uintptr_t *
get_saddrs(struct ps_prochandle * P,uintptr_t ehdr_start,uint_t * n)117 get_saddrs(struct ps_prochandle *P, uintptr_t ehdr_start, uint_t *n)
118 {
119 	uintptr_t a, addr, *addrs, last = 0;
120 	uint_t i, naddrs = 0, unordered = 0;
121 
122 	if (P->status.pr_dmodel == PR_MODEL_ILP32) {
123 		Elf32_Ehdr ehdr;
124 		Elf32_Phdr phdr;
125 		uint_t phnum;
126 
127 		if (read_ehdr32(P, &ehdr, &phnum, ehdr_start) != 0)
128 			return (NULL);
129 
130 		addrs = malloc(sizeof (uintptr_t) * phnum * 2);
131 		a = ehdr_start + ehdr.e_phoff;
132 		for (i = 0; i < phnum; i++, a += ehdr.e_phentsize) {
133 			if (Pread(P, &phdr, sizeof (phdr), a) !=
134 			    sizeof (phdr)) {
135 				free(addrs);
136 				return (NULL);
137 			}
138 			if (phdr.p_type != PT_LOAD || phdr.p_memsz == 0)
139 				continue;
140 
141 			addr = phdr.p_vaddr;
142 			if (ehdr.e_type == ET_DYN)
143 				addr += ehdr_start;
144 			if (last > addr)
145 				unordered = 1;
146 			addrs[naddrs++] = addr;
147 			addrs[naddrs++] = last = addr + phdr.p_memsz - 1;
148 		}
149 #ifdef _LP64
150 	} else {
151 		Elf64_Ehdr ehdr;
152 		Elf64_Phdr phdr;
153 		uint_t phnum;
154 
155 		if (read_ehdr64(P, &ehdr, &phnum, ehdr_start) != 0)
156 			return (NULL);
157 
158 		addrs = malloc(sizeof (uintptr_t) * phnum * 2);
159 		a = ehdr_start + ehdr.e_phoff;
160 		for (i = 0; i < phnum; i++, a += ehdr.e_phentsize) {
161 			if (Pread(P, &phdr, sizeof (phdr), a) !=
162 			    sizeof (phdr)) {
163 				free(addrs);
164 				return (NULL);
165 			}
166 			if (phdr.p_type != PT_LOAD || phdr.p_memsz == 0)
167 				continue;
168 
169 			addr = phdr.p_vaddr;
170 			if (ehdr.e_type == ET_DYN)
171 				addr += ehdr_start;
172 			if (last > addr)
173 				unordered = 1;
174 			addrs[naddrs++] = addr;
175 			addrs[naddrs++] = last = addr + phdr.p_memsz - 1;
176 		}
177 #endif
178 	}
179 
180 	if (unordered)
181 		qsort(addrs, naddrs, sizeof (uintptr_t), addr_cmp);
182 
183 	*n = naddrs;
184 	return (addrs);
185 }
186 
187 /*
188  * Allocation function for a new file_info_t
189  */
190 file_info_t *
file_info_new(struct ps_prochandle * P,map_info_t * mptr)191 file_info_new(struct ps_prochandle *P, map_info_t *mptr)
192 {
193 	file_info_t *fptr;
194 	map_info_t *mp;
195 	uintptr_t mstart, mend, sstart, send;
196 	uint_t i;
197 
198 	if ((fptr = calloc(1, sizeof (file_info_t))) == NULL)
199 		return (NULL);
200 
201 	list_insert_tail(&P->file_head, fptr);
202 	(void) strcpy(fptr->file_pname, mptr->map_pmap.pr_mapname);
203 	mptr->map_file = fptr;
204 	fptr->file_ref = 1;
205 	fptr->file_fd = -1;
206 	fptr->file_dbgfile = -1;
207 	P->num_files++;
208 
209 	/*
210 	 * To figure out which map_info_t instances correspond to the mappings
211 	 * for this load object we try to obtain the start and end address
212 	 * for each section of our in-memory ELF image. If successful, we
213 	 * walk down the list of addresses and the list of map_info_t
214 	 * instances in lock step to correctly find the mappings that
215 	 * correspond to this load object.
216 	 */
217 	if ((fptr->file_saddrs = get_saddrs(P, mptr->map_pmap.pr_vaddr,
218 	    &fptr->file_nsaddrs)) == NULL)
219 		return (fptr);
220 
221 	mp = P->mappings;
222 	i = 0;
223 	while (mp < P->mappings + P->map_count && i < fptr->file_nsaddrs) {
224 
225 		/* Calculate the start and end of the mapping and section */
226 		mstart = mp->map_pmap.pr_vaddr;
227 		mend = mp->map_pmap.pr_vaddr + mp->map_pmap.pr_size;
228 		sstart = fptr->file_saddrs[i];
229 		send = fptr->file_saddrs[i + 1];
230 
231 		if (mend <= sstart) {
232 			/* This mapping is below the current section */
233 			mp++;
234 		} else if (mstart >= send) {
235 			/* This mapping is above the current section */
236 			i += 2;
237 		} else {
238 			/* This mapping overlaps the current section */
239 			if (mp->map_file == NULL) {
240 				dprintf("file_info_new: associating "
241 				    "segment at %p\n",
242 				    (void *)mp->map_pmap.pr_vaddr);
243 				mp->map_file = fptr;
244 				fptr->file_ref++;
245 			} else {
246 				dprintf("file_info_new: segment at %p "
247 				    "already associated with %s\n",
248 				    (void *)mp->map_pmap.pr_vaddr,
249 				    (mp == mptr ? "this file" :
250 				    mp->map_file->file_pname));
251 			}
252 			mp++;
253 		}
254 	}
255 
256 	return (fptr);
257 }
258 
259 /*
260  * Deallocation function for a file_info_t
261  */
262 static void
file_info_free(struct ps_prochandle * P,file_info_t * fptr)263 file_info_free(struct ps_prochandle *P, file_info_t *fptr)
264 {
265 	if (--fptr->file_ref == 0) {
266 		list_remove(&P->file_head, fptr);
267 		if (fptr->file_symtab.sym_elf) {
268 			(void) elf_end(fptr->file_symtab.sym_elf);
269 			free(fptr->file_symtab.sym_elfmem);
270 		}
271 		if (fptr->file_symtab.sym_byname)
272 			free(fptr->file_symtab.sym_byname);
273 		if (fptr->file_symtab.sym_byaddr)
274 			free(fptr->file_symtab.sym_byaddr);
275 
276 		if (fptr->file_dynsym.sym_elf) {
277 			(void) elf_end(fptr->file_dynsym.sym_elf);
278 			free(fptr->file_dynsym.sym_elfmem);
279 		}
280 		if (fptr->file_dynsym.sym_byname)
281 			free(fptr->file_dynsym.sym_byname);
282 		if (fptr->file_dynsym.sym_byaddr)
283 			free(fptr->file_dynsym.sym_byaddr);
284 
285 		if (fptr->file_lo)
286 			free(fptr->file_lo);
287 		if (fptr->file_lname)
288 			free(fptr->file_lname);
289 		if (fptr->file_rname)
290 			free(fptr->file_rname);
291 		if (fptr->file_elf)
292 			(void) elf_end(fptr->file_elf);
293 		if (fptr->file_elfmem != NULL)
294 			free(fptr->file_elfmem);
295 		if (fptr->file_fd >= 0)
296 			(void) close(fptr->file_fd);
297 		if (fptr->file_dbgelf)
298 			(void) elf_end(fptr->file_dbgelf);
299 		if (fptr->file_dbgfile >= 0)
300 			(void) close(fptr->file_dbgfile);
301 		ctf_close(fptr->file_ctfp);
302 		free(fptr->file_ctf_buf);
303 		if (fptr->file_saddrs)
304 			free(fptr->file_saddrs);
305 		free(fptr);
306 		P->num_files--;
307 	}
308 }
309 
310 /*
311  * Deallocation function for a map_info_t
312  */
313 static void
map_info_free(struct ps_prochandle * P,map_info_t * mptr)314 map_info_free(struct ps_prochandle *P, map_info_t *mptr)
315 {
316 	file_info_t *fptr;
317 
318 	if ((fptr = mptr->map_file) != NULL) {
319 		if (fptr->file_map == mptr)
320 			fptr->file_map = NULL;
321 		file_info_free(P, fptr);
322 	}
323 	if (P->execname && mptr == P->map_exec) {
324 		free(P->execname);
325 		P->execname = NULL;
326 	}
327 	if (P->auxv && (mptr == P->map_exec || mptr == P->map_ldso)) {
328 		free(P->auxv);
329 		P->auxv = NULL;
330 		P->nauxv = 0;
331 	}
332 	if (mptr == P->map_exec)
333 		P->map_exec = NULL;
334 	if (mptr == P->map_ldso)
335 		P->map_ldso = NULL;
336 }
337 
338 /*
339  * Call-back function for librtld_db to iterate through all of its shared
340  * libraries.  We use this to get the load object names for the mappings.
341  */
342 static int
map_iter(const rd_loadobj_t * lop,void * cd)343 map_iter(const rd_loadobj_t *lop, void *cd)
344 {
345 	char buf[PATH_MAX];
346 	struct ps_prochandle *P = cd;
347 	map_info_t *mptr;
348 	file_info_t *fptr;
349 
350 	dprintf("encountered rd object at %p\n", (void *)lop->rl_base);
351 
352 	if ((mptr = Paddr2mptr(P, lop->rl_base)) == NULL) {
353 		dprintf("map_iter: base address doesn't match any mapping\n");
354 		return (1); /* Base address does not match any mapping */
355 	}
356 
357 	if ((fptr = mptr->map_file) == NULL &&
358 	    (fptr = file_info_new(P, mptr)) == NULL) {
359 		dprintf("map_iter: failed to allocate a new file_info_t\n");
360 		return (1); /* Failed to allocate a new file_info_t */
361 	}
362 
363 	if ((fptr->file_lo == NULL) &&
364 	    (fptr->file_lo = malloc(sizeof (rd_loadobj_t))) == NULL) {
365 		dprintf("map_iter: failed to allocate rd_loadobj_t\n");
366 		file_info_free(P, fptr);
367 		return (1); /* Failed to allocate rd_loadobj_t */
368 	}
369 
370 	fptr->file_map = mptr;
371 	*fptr->file_lo = *lop;
372 
373 	fptr->file_lo->rl_plt_base = fptr->file_plt_base;
374 	fptr->file_lo->rl_plt_size = fptr->file_plt_size;
375 
376 	if (fptr->file_lname) {
377 		free(fptr->file_lname);
378 		fptr->file_lname = NULL;
379 		fptr->file_lbase = NULL;
380 	}
381 	if (fptr->file_rname) {
382 		free(fptr->file_rname);
383 		fptr->file_rname = NULL;
384 		fptr->file_rbase = NULL;
385 	}
386 
387 	if (Pread_string(P, buf, sizeof (buf), lop->rl_nameaddr) > 0) {
388 		if ((fptr->file_lname = strdup(buf)) != NULL)
389 			fptr->file_lbase = basename(fptr->file_lname);
390 	} else {
391 		dprintf("map_iter: failed to read string at %p\n",
392 		    (void *)lop->rl_nameaddr);
393 	}
394 
395 	if ((Pfindmap(P, mptr, buf, sizeof (buf)) != NULL) &&
396 	    ((fptr->file_rname = strdup(buf)) != NULL))
397 		fptr->file_rbase = basename(fptr->file_rname);
398 
399 	dprintf("loaded rd object %s lmid %lx\n",
400 	    fptr->file_lname ? buf : "<NULL>", lop->rl_lmident);
401 	return (1);
402 }
403 
404 static void
map_set(struct ps_prochandle * P,map_info_t * mptr,const char * lname)405 map_set(struct ps_prochandle *P, map_info_t *mptr, const char *lname)
406 {
407 	file_info_t *fptr;
408 	char buf[PATH_MAX];
409 
410 	if ((fptr = mptr->map_file) == NULL &&
411 	    (fptr = file_info_new(P, mptr)) == NULL)
412 		return; /* Failed to allocate a new file_info_t */
413 
414 	fptr->file_map = mptr;
415 
416 	if ((fptr->file_lo == NULL) &&
417 	    (fptr->file_lo = malloc(sizeof (rd_loadobj_t))) == NULL) {
418 		file_info_free(P, fptr);
419 		return; /* Failed to allocate rd_loadobj_t */
420 	}
421 
422 	(void) memset(fptr->file_lo, 0, sizeof (rd_loadobj_t));
423 	fptr->file_lo->rl_base = mptr->map_pmap.pr_vaddr;
424 	fptr->file_lo->rl_bend =
425 	    mptr->map_pmap.pr_vaddr + mptr->map_pmap.pr_size;
426 
427 	fptr->file_lo->rl_plt_base = fptr->file_plt_base;
428 	fptr->file_lo->rl_plt_size = fptr->file_plt_size;
429 
430 	if ((fptr->file_lname == NULL) &&
431 	    (fptr->file_lname = strdup(lname)) != NULL)
432 		fptr->file_lbase = basename(fptr->file_lname);
433 
434 	if ((Pfindmap(P, mptr, buf, sizeof (buf)) != NULL) &&
435 	    ((fptr->file_rname = strdup(buf)) != NULL))
436 		fptr->file_rbase = basename(fptr->file_rname);
437 }
438 
439 static void
load_static_maps(struct ps_prochandle * P)440 load_static_maps(struct ps_prochandle *P)
441 {
442 	map_info_t *mptr;
443 
444 	/*
445 	 * Construct the map for the a.out.
446 	 */
447 	if ((mptr = object_name_to_map(P, PR_LMID_EVERY, PR_OBJ_EXEC)) != NULL)
448 		map_set(P, mptr, "a.out");
449 
450 	/*
451 	 * If the dynamic linker exists for this process,
452 	 * construct the map for it.
453 	 */
454 	if (Pgetauxval(P, AT_BASE) != -1L &&
455 	    (mptr = object_name_to_map(P, PR_LMID_EVERY, PR_OBJ_LDSO)) != NULL)
456 		map_set(P, mptr, "ld.so.1");
457 }
458 
459 int
Preadmaps(struct ps_prochandle * P,prmap_t ** Pmapp,ssize_t * nmapp)460 Preadmaps(struct ps_prochandle *P, prmap_t **Pmapp, ssize_t *nmapp)
461 {
462 	return (P->ops.pop_read_maps(P, Pmapp, nmapp, P->data));
463 }
464 
465 /*
466  * Go through all the address space mappings, validating or updating
467  * the information already gathered, or gathering new information.
468  *
469  * This function is only called when we suspect that the mappings have changed
470  * because this is the first time we're calling it or because of rtld activity.
471  */
472 void
Pupdate_maps(struct ps_prochandle * P)473 Pupdate_maps(struct ps_prochandle *P)
474 {
475 	prmap_t *Pmap = NULL;
476 	prmap_t *pmap;
477 	ssize_t nmap;
478 	int i;
479 	uint_t oldmapcount;
480 	map_info_t *newmap, *newp;
481 	map_info_t *mptr;
482 
483 	if (P->info_valid || P->state == PS_UNDEAD)
484 		return;
485 
486 	Preadauxvec(P);
487 
488 	if (Preadmaps(P, &Pmap, &nmap) != 0)
489 		return;
490 
491 	if ((newmap = calloc(1, nmap * sizeof (map_info_t))) == NULL)
492 		return;
493 
494 	/*
495 	 * We try to merge any file information we may have for existing
496 	 * mappings, to avoid having to rebuild the file info.
497 	 */
498 	mptr = P->mappings;
499 	pmap = Pmap;
500 	newp = newmap;
501 	oldmapcount = P->map_count;
502 	for (i = 0; i < nmap; i++, pmap++, newp++) {
503 
504 		if (oldmapcount == 0) {
505 			/*
506 			 * We've exhausted all the old mappings.  Every new
507 			 * mapping should be added.
508 			 */
509 			newp->map_pmap = *pmap;
510 
511 		} else if (pmap->pr_vaddr == mptr->map_pmap.pr_vaddr &&
512 		    pmap->pr_size == mptr->map_pmap.pr_size &&
513 		    pmap->pr_offset == mptr->map_pmap.pr_offset &&
514 		    (pmap->pr_mflags & ~(MA_BREAK | MA_STACK)) ==
515 		    (mptr->map_pmap.pr_mflags & ~(MA_BREAK | MA_STACK)) &&
516 		    pmap->pr_pagesize == mptr->map_pmap.pr_pagesize &&
517 		    pmap->pr_shmid == mptr->map_pmap.pr_shmid &&
518 		    strcmp(pmap->pr_mapname, mptr->map_pmap.pr_mapname) == 0) {
519 
520 			/*
521 			 * This mapping matches exactly.  Copy over the old
522 			 * mapping, taking care to get the latest flags.
523 			 * Make sure the associated file_info_t is updated
524 			 * appropriately.
525 			 */
526 			*newp = *mptr;
527 			if (P->map_exec == mptr)
528 				P->map_exec = newp;
529 			if (P->map_ldso == mptr)
530 				P->map_ldso = newp;
531 			newp->map_pmap.pr_mflags = pmap->pr_mflags;
532 			if (mptr->map_file != NULL &&
533 			    mptr->map_file->file_map == mptr)
534 				mptr->map_file->file_map = newp;
535 			oldmapcount--;
536 			mptr++;
537 
538 		} else if (pmap->pr_vaddr + pmap->pr_size >
539 		    mptr->map_pmap.pr_vaddr) {
540 
541 			/*
542 			 * The old mapping doesn't exist any more, remove it
543 			 * from the list.
544 			 */
545 			map_info_free(P, mptr);
546 			oldmapcount--;
547 			i--;
548 			newp--;
549 			pmap--;
550 			mptr++;
551 
552 		} else {
553 
554 			/*
555 			 * This is a new mapping, add it directly.
556 			 */
557 			newp->map_pmap = *pmap;
558 		}
559 	}
560 
561 	/*
562 	 * Free any old maps
563 	 */
564 	while (oldmapcount) {
565 		map_info_free(P, mptr);
566 		oldmapcount--;
567 		mptr++;
568 	}
569 
570 	free(Pmap);
571 	if (P->mappings != NULL)
572 		free(P->mappings);
573 	P->mappings = newmap;
574 	P->map_count = P->map_alloc = nmap;
575 	P->info_valid = 1;
576 
577 	/*
578 	 * Consult librtld_db to get the load object
579 	 * names for all of the shared libraries.
580 	 */
581 	if (P->rap != NULL)
582 		(void) rd_loadobj_iter(P->rap, map_iter, P);
583 }
584 
585 /*
586  * Update all of the mappings and rtld_db as if by Pupdate_maps(), and then
587  * forcibly cache all of the symbol tables associated with all object files.
588  */
589 void
Pupdate_syms(struct ps_prochandle * P)590 Pupdate_syms(struct ps_prochandle *P)
591 {
592 	file_info_t *fptr;
593 
594 	Pupdate_maps(P);
595 
596 	for (fptr = list_head(&P->file_head); fptr != NULL;
597 	    fptr = list_next(&P->file_head, fptr)) {
598 		Pbuild_file_symtab(P, fptr);
599 		(void) Pbuild_file_ctf(P, fptr);
600 	}
601 }
602 
603 /*
604  * Return the librtld_db agent handle for the victim process.
605  * The handle will become invalid at the next successful exec() and the
606  * client (caller of proc_rd_agent()) must not use it beyond that point.
607  * If the process is already dead, we've already tried our best to
608  * create the agent during core file initialization.
609  */
610 rd_agent_t *
Prd_agent(struct ps_prochandle * P)611 Prd_agent(struct ps_prochandle *P)
612 {
613 	if (P->rap == NULL && P->state != PS_DEAD && P->state != PS_IDLE) {
614 		Pupdate_maps(P);
615 		if (P->num_files == 0)
616 			load_static_maps(P);
617 		rd_log(_libproc_debug);
618 		if ((P->rap = rd_new(P)) != NULL)
619 			(void) rd_loadobj_iter(P->rap, map_iter, P);
620 	}
621 	return (P->rap);
622 }
623 
624 /*
625  * Return the prmap_t structure containing 'addr', but only if it
626  * is in the dynamic linker's link map and is the text section.
627  */
628 const prmap_t *
Paddr_to_text_map(struct ps_prochandle * P,uintptr_t addr)629 Paddr_to_text_map(struct ps_prochandle *P, uintptr_t addr)
630 {
631 	map_info_t *mptr;
632 
633 	if (!P->info_valid)
634 		Pupdate_maps(P);
635 
636 	if ((mptr = Paddr2mptr(P, addr)) != NULL) {
637 		file_info_t *fptr = build_map_symtab(P, mptr);
638 		const prmap_t *pmp = &mptr->map_pmap;
639 
640 		/*
641 		 * Assume that if rl_data_base is NULL, it means that no
642 		 * data section was found for this load object, and that
643 		 * a section must be text. Otherwise, a section will be
644 		 * text unless it ends above the start of the data
645 		 * section.
646 		 */
647 		if (fptr != NULL && fptr->file_lo != NULL &&
648 		    (fptr->file_lo->rl_data_base == (uintptr_t)NULL ||
649 		    pmp->pr_vaddr + pmp->pr_size <=
650 		    fptr->file_lo->rl_data_base))
651 			return (pmp);
652 	}
653 
654 	return (NULL);
655 }
656 
657 /*
658  * Return the prmap_t structure containing 'addr' (no restrictions on
659  * the type of mapping).
660  */
661 const prmap_t *
Paddr_to_map(struct ps_prochandle * P,uintptr_t addr)662 Paddr_to_map(struct ps_prochandle *P, uintptr_t addr)
663 {
664 	map_info_t *mptr;
665 
666 	if (!P->info_valid)
667 		Pupdate_maps(P);
668 
669 	if ((mptr = Paddr2mptr(P, addr)) != NULL)
670 		return (&mptr->map_pmap);
671 
672 	return (NULL);
673 }
674 
675 /*
676  * Convert a full or partial load object name to the prmap_t for its
677  * corresponding primary text mapping.
678  */
679 const prmap_t *
Plmid_to_map(struct ps_prochandle * P,Lmid_t lmid,const char * name)680 Plmid_to_map(struct ps_prochandle *P, Lmid_t lmid, const char *name)
681 {
682 	map_info_t *mptr;
683 
684 	if (name == PR_OBJ_EVERY)
685 		return (NULL); /* A reasonable mistake */
686 
687 	if ((mptr = object_name_to_map(P, lmid, name)) != NULL)
688 		return (&mptr->map_pmap);
689 
690 	return (NULL);
691 }
692 
693 const prmap_t *
Pname_to_map(struct ps_prochandle * P,const char * name)694 Pname_to_map(struct ps_prochandle *P, const char *name)
695 {
696 	return (Plmid_to_map(P, PR_LMID_EVERY, name));
697 }
698 
699 const rd_loadobj_t *
Paddr_to_loadobj(struct ps_prochandle * P,uintptr_t addr)700 Paddr_to_loadobj(struct ps_prochandle *P, uintptr_t addr)
701 {
702 	map_info_t *mptr;
703 
704 	if (!P->info_valid)
705 		Pupdate_maps(P);
706 
707 	if ((mptr = Paddr2mptr(P, addr)) == NULL)
708 		return (NULL);
709 
710 	/*
711 	 * By building the symbol table, we implicitly bring the PLT
712 	 * information up to date in the load object.
713 	 */
714 	(void) build_map_symtab(P, mptr);
715 
716 	return (mptr->map_file->file_lo);
717 }
718 
719 const rd_loadobj_t *
Plmid_to_loadobj(struct ps_prochandle * P,Lmid_t lmid,const char * name)720 Plmid_to_loadobj(struct ps_prochandle *P, Lmid_t lmid, const char *name)
721 {
722 	map_info_t *mptr;
723 
724 	if (name == PR_OBJ_EVERY)
725 		return (NULL);
726 
727 	if ((mptr = object_name_to_map(P, lmid, name)) == NULL)
728 		return (NULL);
729 
730 	/*
731 	 * By building the symbol table, we implicitly bring the PLT
732 	 * information up to date in the load object.
733 	 */
734 	(void) build_map_symtab(P, mptr);
735 
736 	return (mptr->map_file->file_lo);
737 }
738 
739 const rd_loadobj_t *
Pname_to_loadobj(struct ps_prochandle * P,const char * name)740 Pname_to_loadobj(struct ps_prochandle *P, const char *name)
741 {
742 	return (Plmid_to_loadobj(P, PR_LMID_EVERY, name));
743 }
744 
745 ctf_file_t *
Pbuild_file_ctf(struct ps_prochandle * P,file_info_t * fptr)746 Pbuild_file_ctf(struct ps_prochandle *P, file_info_t *fptr)
747 {
748 	ctf_sect_t ctdata, symtab, strtab;
749 	sym_tbl_t *symp;
750 	int err;
751 
752 	if (fptr->file_ctfp != NULL)
753 		return (fptr->file_ctfp);
754 
755 	Pbuild_file_symtab(P, fptr);
756 
757 	if (fptr->file_ctf_size == 0)
758 		return (NULL);
759 
760 	symp = fptr->file_ctf_dyn ? &fptr->file_dynsym : &fptr->file_symtab;
761 	if (symp->sym_data_pri == NULL)
762 		return (NULL);
763 
764 	/*
765 	 * The buffer may alread be allocated if this is a core file that
766 	 * contained CTF data for this file.
767 	 */
768 	if (fptr->file_ctf_buf == NULL) {
769 		fptr->file_ctf_buf = malloc(fptr->file_ctf_size);
770 		if (fptr->file_ctf_buf == NULL) {
771 			dprintf("failed to allocate ctf buffer\n");
772 			return (NULL);
773 		}
774 
775 		if (pread(fptr->file_fd, fptr->file_ctf_buf,
776 		    fptr->file_ctf_size, fptr->file_ctf_off) !=
777 		    fptr->file_ctf_size) {
778 			free(fptr->file_ctf_buf);
779 			fptr->file_ctf_buf = NULL;
780 			dprintf("failed to read ctf data\n");
781 			return (NULL);
782 		}
783 	}
784 
785 	ctdata.cts_name = ".SUNW_ctf";
786 	ctdata.cts_type = SHT_PROGBITS;
787 	ctdata.cts_flags = 0;
788 	ctdata.cts_data = fptr->file_ctf_buf;
789 	ctdata.cts_size = fptr->file_ctf_size;
790 	ctdata.cts_entsize = 1;
791 	ctdata.cts_offset = 0;
792 
793 	symtab.cts_name = fptr->file_ctf_dyn ? ".dynsym" : ".symtab";
794 	symtab.cts_type = symp->sym_hdr_pri.sh_type;
795 	symtab.cts_flags = symp->sym_hdr_pri.sh_flags;
796 	symtab.cts_data = symp->sym_data_pri->d_buf;
797 	symtab.cts_size = symp->sym_hdr_pri.sh_size;
798 	symtab.cts_entsize = symp->sym_hdr_pri.sh_entsize;
799 	symtab.cts_offset = symp->sym_hdr_pri.sh_offset;
800 
801 	strtab.cts_name = fptr->file_ctf_dyn ? ".dynstr" : ".strtab";
802 	strtab.cts_type = symp->sym_strhdr.sh_type;
803 	strtab.cts_flags = symp->sym_strhdr.sh_flags;
804 	strtab.cts_data = symp->sym_strs;
805 	strtab.cts_size = symp->sym_strhdr.sh_size;
806 	strtab.cts_entsize = symp->sym_strhdr.sh_entsize;
807 	strtab.cts_offset = symp->sym_strhdr.sh_offset;
808 
809 	fptr->file_ctfp = ctf_bufopen(&ctdata, &symtab, &strtab, &err);
810 	if (fptr->file_ctfp == NULL) {
811 		dprintf("ctf_bufopen() failed, error code %d\n", err);
812 		free(fptr->file_ctf_buf);
813 		fptr->file_ctf_buf = NULL;
814 		return (NULL);
815 	}
816 
817 	dprintf("loaded %lu bytes of CTF data for %s\n",
818 	    (ulong_t)fptr->file_ctf_size, fptr->file_pname);
819 
820 	return (fptr->file_ctfp);
821 }
822 
823 ctf_file_t *
Paddr_to_ctf(struct ps_prochandle * P,uintptr_t addr)824 Paddr_to_ctf(struct ps_prochandle *P, uintptr_t addr)
825 {
826 	map_info_t *mptr;
827 	file_info_t *fptr;
828 
829 	if (!P->info_valid)
830 		Pupdate_maps(P);
831 
832 	if ((mptr = Paddr2mptr(P, addr)) == NULL ||
833 	    (fptr = mptr->map_file) == NULL)
834 		return (NULL);
835 
836 	return (Pbuild_file_ctf(P, fptr));
837 }
838 
839 ctf_file_t *
Plmid_to_ctf(struct ps_prochandle * P,Lmid_t lmid,const char * name)840 Plmid_to_ctf(struct ps_prochandle *P, Lmid_t lmid, const char *name)
841 {
842 	map_info_t *mptr;
843 	file_info_t *fptr = NULL;
844 
845 	if (name == PR_OBJ_EVERY)
846 		return (NULL);
847 
848 	/*
849 	 * While most idle files are all ELF objects, not all of them have
850 	 * mapping information available. There's nothing which would make
851 	 * sense to fake up for ET_REL. Instead, if we're being asked for their
852 	 * executable object and we know that the information is valid and they
853 	 * only have a single file, we jump straight to that file pointer.
854 	 */
855 	if (P->state == PS_IDLE && name == PR_OBJ_EXEC && P->info_valid == 1 &&
856 	    P->num_files == 1 && P->mappings == NULL) {
857 		fptr = list_head(&P->file_head);
858 	}
859 
860 	if (fptr == NULL) {
861 		if ((mptr = object_name_to_map(P, lmid, name)) == NULL ||
862 		    (fptr = mptr->map_file) == NULL)
863 			return (NULL);
864 	}
865 
866 	return (Pbuild_file_ctf(P, fptr));
867 }
868 
869 ctf_file_t *
Pname_to_ctf(struct ps_prochandle * P,const char * name)870 Pname_to_ctf(struct ps_prochandle *P, const char *name)
871 {
872 	return (Plmid_to_ctf(P, PR_LMID_EVERY, name));
873 }
874 
875 void
Preadauxvec(struct ps_prochandle * P)876 Preadauxvec(struct ps_prochandle *P)
877 {
878 	if (P->auxv != NULL) {
879 		free(P->auxv);
880 		P->auxv = NULL;
881 		P->nauxv = 0;
882 	}
883 
884 	P->ops.pop_read_aux(P, &P->auxv, &P->nauxv, P->data);
885 }
886 
887 /*
888  * Return a requested element from the process's aux vector.
889  * Return -1 on failure (this is adequate for our purposes).
890  */
891 long
Pgetauxval(struct ps_prochandle * P,int type)892 Pgetauxval(struct ps_prochandle *P, int type)
893 {
894 	auxv_t *auxv;
895 
896 	if (P->auxv == NULL)
897 		Preadauxvec(P);
898 
899 	if (P->auxv == NULL)
900 		return (-1);
901 
902 	for (auxv = P->auxv; auxv->a_type != AT_NULL; auxv++) {
903 		if (auxv->a_type == type)
904 			return (auxv->a_un.a_val);
905 	}
906 
907 	return (-1);
908 }
909 
910 /*
911  * Return a pointer to our internal copy of the process's aux vector.
912  * The caller should not hold on to this pointer across any libproc calls.
913  */
914 const auxv_t *
Pgetauxvec(struct ps_prochandle * P)915 Pgetauxvec(struct ps_prochandle *P)
916 {
917 	static const auxv_t empty = { AT_NULL, 0L };
918 
919 	if (P->auxv == NULL)
920 		Preadauxvec(P);
921 
922 	if (P->auxv == NULL)
923 		return (&empty);
924 
925 	return (P->auxv);
926 }
927 
928 /*
929  * Return 1 if the given mapping corresponds to the given file_info_t's
930  * load object; return 0 otherwise.
931  */
932 static int
is_mapping_in_file(struct ps_prochandle * P,map_info_t * mptr,file_info_t * fptr)933 is_mapping_in_file(struct ps_prochandle *P, map_info_t *mptr, file_info_t *fptr)
934 {
935 	prmap_t *pmap = &mptr->map_pmap;
936 	rd_loadobj_t *lop = fptr->file_lo;
937 	uint_t i;
938 	uintptr_t mstart, mend, sstart, send;
939 
940 	/*
941 	 * We can get for free the start address of the text and data
942 	 * sections of the load object. Start by seeing if the mapping
943 	 * encloses either of these.
944 	 */
945 	if ((pmap->pr_vaddr <= lop->rl_base &&
946 	    lop->rl_base < pmap->pr_vaddr + pmap->pr_size) ||
947 	    (pmap->pr_vaddr <= lop->rl_data_base &&
948 	    lop->rl_data_base < pmap->pr_vaddr + pmap->pr_size))
949 		return (1);
950 
951 	/*
952 	 * It's still possible that this mapping correponds to the load
953 	 * object. Consider the example of a mapping whose start and end
954 	 * addresses correspond to those of the load object's text section.
955 	 * If the mapping splits, e.g. as a result of a segment demotion,
956 	 * then although both mappings are still backed by the same section,
957 	 * only one will be seen to enclose that section's start address.
958 	 * Thus, to be rigorous, we ask not whether this mapping encloses
959 	 * the start of a section, but whether there exists a section that
960 	 * overlaps this mapping.
961 	 *
962 	 * If we don't already have the section addresses, and we successfully
963 	 * get them, then we cache them in case we come here again.
964 	 */
965 	if (fptr->file_saddrs == NULL &&
966 	    (fptr->file_saddrs = get_saddrs(P,
967 	    fptr->file_map->map_pmap.pr_vaddr, &fptr->file_nsaddrs)) == NULL)
968 		return (0);
969 
970 	mstart = mptr->map_pmap.pr_vaddr;
971 	mend = mptr->map_pmap.pr_vaddr + mptr->map_pmap.pr_size;
972 	for (i = 0; i < fptr->file_nsaddrs; i += 2) {
973 		/* Does this section overlap the mapping? */
974 		sstart = fptr->file_saddrs[i];
975 		send = fptr->file_saddrs[i + 1];
976 		if (!(mend <= sstart || mstart >= send))
977 			return (1);
978 	}
979 
980 	return (0);
981 }
982 
983 /*
984  * Find or build the symbol table for the given mapping.
985  */
986 static file_info_t *
build_map_symtab(struct ps_prochandle * P,map_info_t * mptr)987 build_map_symtab(struct ps_prochandle *P, map_info_t *mptr)
988 {
989 	prmap_t *pmap = &mptr->map_pmap;
990 	file_info_t *fptr;
991 
992 	if ((fptr = mptr->map_file) != NULL) {
993 		Pbuild_file_symtab(P, fptr);
994 		return (fptr);
995 	}
996 
997 	if (pmap->pr_mapname[0] == '\0')
998 		return (NULL);
999 
1000 	/*
1001 	 * Attempt to find a matching file.
1002 	 * (A file can be mapped at several different addresses.)
1003 	 */
1004 	for (fptr = list_head(&P->file_head); fptr != NULL;
1005 	    fptr = list_next(&P->file_head, fptr)) {
1006 		if (strcmp(fptr->file_pname, pmap->pr_mapname) == 0 &&
1007 		    fptr->file_lo && is_mapping_in_file(P, mptr, fptr)) {
1008 			mptr->map_file = fptr;
1009 			fptr->file_ref++;
1010 			Pbuild_file_symtab(P, fptr);
1011 			return (fptr);
1012 		}
1013 	}
1014 
1015 	/*
1016 	 * If we need to create a new file_info structure, iterate
1017 	 * through the load objects in order to attempt to connect
1018 	 * this new file with its primary text mapping.  We again
1019 	 * need to handle ld.so as a special case because we need
1020 	 * to be able to bootstrap librtld_db.
1021 	 */
1022 	if ((fptr = file_info_new(P, mptr)) == NULL)
1023 		return (NULL);
1024 
1025 	if (P->map_ldso != mptr) {
1026 		if (P->rap != NULL)
1027 			(void) rd_loadobj_iter(P->rap, map_iter, P);
1028 		else
1029 			(void) Prd_agent(P);
1030 	} else {
1031 		fptr->file_map = mptr;
1032 	}
1033 
1034 	/*
1035 	 * If librtld_db wasn't able to help us connect the file to a primary
1036 	 * text mapping, set file_map to the current mapping because we require
1037 	 * fptr->file_map to be set in Pbuild_file_symtab.  librtld_db may be
1038 	 * unaware of what's going on in the rare case that a legitimate ELF
1039 	 * file has been mmap(2)ed into the process address space *without*
1040 	 * the use of dlopen(3x).
1041 	 */
1042 	if (fptr->file_map == NULL)
1043 		fptr->file_map = mptr;
1044 
1045 	Pbuild_file_symtab(P, fptr);
1046 
1047 	return (fptr);
1048 }
1049 
1050 static int
read_ehdr32(struct ps_prochandle * P,Elf32_Ehdr * ehdr,uint_t * phnum,uintptr_t addr)1051 read_ehdr32(struct ps_prochandle *P, Elf32_Ehdr *ehdr, uint_t *phnum,
1052     uintptr_t addr)
1053 {
1054 	if (Pread(P, ehdr, sizeof (*ehdr), addr) != sizeof (*ehdr))
1055 		return (-1);
1056 
1057 	if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
1058 	    ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
1059 	    ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1060 	    ehdr->e_ident[EI_MAG3] != ELFMAG3 ||
1061 	    ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1062 #ifdef _BIG_ENDIAN
1063 	    ehdr->e_ident[EI_DATA] != ELFDATA2MSB ||
1064 #else
1065 	    ehdr->e_ident[EI_DATA] != ELFDATA2LSB ||
1066 #endif
1067 	    ehdr->e_ident[EI_VERSION] != EV_CURRENT)
1068 		return (-1);
1069 
1070 	if ((*phnum = ehdr->e_phnum) == PN_XNUM) {
1071 		Elf32_Shdr shdr0;
1072 
1073 		if (ehdr->e_shoff == 0 || ehdr->e_shentsize < sizeof (shdr0) ||
1074 		    Pread(P, &shdr0, sizeof (shdr0), addr + ehdr->e_shoff) !=
1075 		    sizeof (shdr0))
1076 			return (-1);
1077 
1078 		if (shdr0.sh_info != 0)
1079 			*phnum = shdr0.sh_info;
1080 	}
1081 
1082 	return (0);
1083 }
1084 
1085 static int
read_dynamic_phdr32(struct ps_prochandle * P,const Elf32_Ehdr * ehdr,uint_t phnum,Elf32_Phdr * phdr,uintptr_t addr)1086 read_dynamic_phdr32(struct ps_prochandle *P, const Elf32_Ehdr *ehdr,
1087     uint_t phnum, Elf32_Phdr *phdr, uintptr_t addr)
1088 {
1089 	uint_t i;
1090 
1091 	for (i = 0; i < phnum; i++) {
1092 		uintptr_t a = addr + ehdr->e_phoff + i * ehdr->e_phentsize;
1093 		if (Pread(P, phdr, sizeof (*phdr), a) != sizeof (*phdr))
1094 			return (-1);
1095 
1096 		if (phdr->p_type == PT_DYNAMIC)
1097 			return (0);
1098 	}
1099 
1100 	return (-1);
1101 }
1102 
1103 #ifdef _LP64
1104 static int
read_ehdr64(struct ps_prochandle * P,Elf64_Ehdr * ehdr,uint_t * phnum,uintptr_t addr)1105 read_ehdr64(struct ps_prochandle *P, Elf64_Ehdr *ehdr, uint_t *phnum,
1106     uintptr_t addr)
1107 {
1108 	if (Pread(P, ehdr, sizeof (Elf64_Ehdr), addr) != sizeof (Elf64_Ehdr))
1109 		return (-1);
1110 
1111 	if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
1112 	    ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
1113 	    ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1114 	    ehdr->e_ident[EI_MAG3] != ELFMAG3 ||
1115 	    ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1116 #ifdef _BIG_ENDIAN
1117 	    ehdr->e_ident[EI_DATA] != ELFDATA2MSB ||
1118 #else
1119 	    ehdr->e_ident[EI_DATA] != ELFDATA2LSB ||
1120 #endif
1121 	    ehdr->e_ident[EI_VERSION] != EV_CURRENT)
1122 		return (-1);
1123 
1124 	if ((*phnum = ehdr->e_phnum) == PN_XNUM) {
1125 		Elf64_Shdr shdr0;
1126 
1127 		if (ehdr->e_shoff == 0 || ehdr->e_shentsize < sizeof (shdr0) ||
1128 		    Pread(P, &shdr0, sizeof (shdr0), addr + ehdr->e_shoff) !=
1129 		    sizeof (shdr0))
1130 			return (-1);
1131 
1132 		if (shdr0.sh_info != 0)
1133 			*phnum = shdr0.sh_info;
1134 	}
1135 
1136 	return (0);
1137 }
1138 
1139 static int
read_dynamic_phdr64(struct ps_prochandle * P,const Elf64_Ehdr * ehdr,uint_t phnum,Elf64_Phdr * phdr,uintptr_t addr)1140 read_dynamic_phdr64(struct ps_prochandle *P, const Elf64_Ehdr *ehdr,
1141     uint_t phnum, Elf64_Phdr *phdr, uintptr_t addr)
1142 {
1143 	uint_t i;
1144 
1145 	for (i = 0; i < phnum; i++) {
1146 		uintptr_t a = addr + ehdr->e_phoff + i * ehdr->e_phentsize;
1147 		if (Pread(P, phdr, sizeof (*phdr), a) != sizeof (*phdr))
1148 			return (-1);
1149 
1150 		if (phdr->p_type == PT_DYNAMIC)
1151 			return (0);
1152 	}
1153 
1154 	return (-1);
1155 }
1156 #endif	/* _LP64 */
1157 
1158 /*
1159  * The text segment for each load object contains the elf header and
1160  * program headers. We can use this information to determine if the
1161  * file that corresponds to the load object is the same file that
1162  * was loaded into the process's address space. There can be a discrepency
1163  * if a file is recompiled after the process is started or if the target
1164  * represents a core file from a differently configured system -- two
1165  * common examples. The DT_CHECKSUM entry in the dynamic section
1166  * provides an easy method of comparison. It is important to note that
1167  * the dynamic section usually lives in the data segment, but the meta
1168  * data we use to find the dynamic section lives in the text segment so
1169  * if either of those segments is absent we can't proceed.
1170  *
1171  * We're looking through the elf file for several items: the symbol tables
1172  * (both dynsym and symtab), the procedure linkage table (PLT) base,
1173  * size, and relocation base, and the CTF information. Most of this can
1174  * be recovered from the loaded image of the file itself, the exceptions
1175  * being the symtab and CTF data.
1176  *
1177  * First we try to open the file that we think corresponds to the load
1178  * object, if the DT_CHECKSUM values match, we're all set, and can simply
1179  * recover all the information we need from the file. If the values of
1180  * DT_CHECKSUM don't match, or if we can't access the file for whatever
1181  * reasaon, we fake up a elf file to use in its stead. If we can't read
1182  * the elf data in the process's address space, we fall back to using
1183  * the file even though it may give inaccurate information.
1184  *
1185  * The elf file that we fake up has to consist of sections for the
1186  * dynsym, the PLT and the dynamic section. Note that in the case of a
1187  * core file, we'll get the CTF data in the file_info_t later on from
1188  * a section embedded the core file (if it's present).
1189  *
1190  * file_differs() conservatively looks for mismatched files, identifying
1191  * a match when there is any ambiguity (since that's the legacy behavior).
1192  */
1193 static int
file_differs(struct ps_prochandle * P,Elf * elf,file_info_t * fptr)1194 file_differs(struct ps_prochandle *P, Elf *elf, file_info_t *fptr)
1195 {
1196 	Elf_Scn *scn;
1197 	GElf_Shdr shdr;
1198 	GElf_Dyn dyn;
1199 	Elf_Data *data;
1200 	uint_t i, ndyn;
1201 	GElf_Xword cksum;
1202 	uintptr_t addr;
1203 
1204 	if (fptr->file_map == NULL)
1205 		return (0);
1206 
1207 	if ((Pcontent(P) & (CC_CONTENT_TEXT | CC_CONTENT_DATA)) !=
1208 	    (CC_CONTENT_TEXT | CC_CONTENT_DATA))
1209 		return (0);
1210 
1211 	/*
1212 	 * First, we find the checksum value in the elf file.
1213 	 */
1214 	scn = NULL;
1215 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
1216 		if (gelf_getshdr(scn, &shdr) != NULL &&
1217 		    shdr.sh_type == SHT_DYNAMIC)
1218 			goto found_shdr;
1219 	}
1220 	return (0);
1221 
1222 found_shdr:
1223 	if ((data = elf_getdata(scn, NULL)) == NULL)
1224 		return (0);
1225 
1226 	if (P->status.pr_dmodel == PR_MODEL_ILP32)
1227 		ndyn = shdr.sh_size / sizeof (Elf32_Dyn);
1228 #ifdef _LP64
1229 	else if (P->status.pr_dmodel == PR_MODEL_LP64)
1230 		ndyn = shdr.sh_size / sizeof (Elf64_Dyn);
1231 #endif
1232 	else
1233 		return (0);
1234 
1235 	for (i = 0; i < ndyn; i++) {
1236 		if (gelf_getdyn(data, i, &dyn) != NULL &&
1237 		    dyn.d_tag == DT_CHECKSUM)
1238 			goto found_cksum;
1239 	}
1240 
1241 	/*
1242 	 * The in-memory ELF has no DT_CHECKSUM section, but we will report it
1243 	 * as matching the file anyhow.
1244 	 */
1245 	return (0);
1246 
1247 found_cksum:
1248 	cksum = dyn.d_un.d_val;
1249 	dprintf("elf cksum value is %llx\n", (u_longlong_t)cksum);
1250 
1251 	/*
1252 	 * Get the base of the text mapping that corresponds to this file.
1253 	 */
1254 	addr = fptr->file_map->map_pmap.pr_vaddr;
1255 
1256 	if (P->status.pr_dmodel == PR_MODEL_ILP32) {
1257 		Elf32_Ehdr ehdr;
1258 		Elf32_Phdr phdr;
1259 		Elf32_Dyn dync, *dynp;
1260 		uint_t phnum, i;
1261 
1262 		if (read_ehdr32(P, &ehdr, &phnum, addr) != 0 ||
1263 		    read_dynamic_phdr32(P, &ehdr, phnum, &phdr, addr) != 0)
1264 			return (0);
1265 
1266 		if (ehdr.e_type == ET_DYN)
1267 			phdr.p_vaddr += addr;
1268 		if ((dynp = malloc(phdr.p_filesz)) == NULL)
1269 			return (0);
1270 		dync.d_tag = DT_NULL;
1271 		if (Pread(P, dynp, phdr.p_filesz, phdr.p_vaddr) !=
1272 		    phdr.p_filesz) {
1273 			free(dynp);
1274 			return (0);
1275 		}
1276 
1277 		for (i = 0; i < phdr.p_filesz / sizeof (Elf32_Dyn); i++) {
1278 			if (dynp[i].d_tag == DT_CHECKSUM)
1279 				dync = dynp[i];
1280 		}
1281 
1282 		free(dynp);
1283 
1284 		if (dync.d_tag != DT_CHECKSUM)
1285 			return (0);
1286 
1287 		dprintf("image cksum value is %llx\n",
1288 		    (u_longlong_t)dync.d_un.d_val);
1289 		return (dync.d_un.d_val != cksum);
1290 #ifdef _LP64
1291 	} else if (P->status.pr_dmodel == PR_MODEL_LP64) {
1292 		Elf64_Ehdr ehdr;
1293 		Elf64_Phdr phdr;
1294 		Elf64_Dyn dync, *dynp;
1295 		uint_t phnum, i;
1296 
1297 		if (read_ehdr64(P, &ehdr, &phnum, addr) != 0 ||
1298 		    read_dynamic_phdr64(P, &ehdr, phnum, &phdr, addr) != 0)
1299 			return (0);
1300 
1301 		if (ehdr.e_type == ET_DYN)
1302 			phdr.p_vaddr += addr;
1303 		if ((dynp = malloc(phdr.p_filesz)) == NULL)
1304 			return (0);
1305 		dync.d_tag = DT_NULL;
1306 		if (Pread(P, dynp, phdr.p_filesz, phdr.p_vaddr) !=
1307 		    phdr.p_filesz) {
1308 			free(dynp);
1309 			return (0);
1310 		}
1311 
1312 		for (i = 0; i < phdr.p_filesz / sizeof (Elf64_Dyn); i++) {
1313 			if (dynp[i].d_tag == DT_CHECKSUM)
1314 				dync = dynp[i];
1315 		}
1316 
1317 		free(dynp);
1318 
1319 		if (dync.d_tag != DT_CHECKSUM)
1320 			return (0);
1321 
1322 		dprintf("image cksum value is %llx\n",
1323 		    (u_longlong_t)dync.d_un.d_val);
1324 		return (dync.d_un.d_val != cksum);
1325 #endif	/* _LP64 */
1326 	}
1327 
1328 	return (0);
1329 }
1330 
1331 /*
1332  * Read data from the specified process and construct an in memory
1333  * image of an ELF file that represents it well enough to let
1334  * us probe it for information.
1335  */
1336 static Elf *
fake_elf(struct ps_prochandle * P,file_info_t * fptr)1337 fake_elf(struct ps_prochandle *P, file_info_t *fptr)
1338 {
1339 	Elf *elf;
1340 	uintptr_t addr;
1341 	uint_t phnum;
1342 
1343 	if (fptr->file_map == NULL)
1344 		return (NULL);
1345 
1346 	if ((Pcontent(P) & (CC_CONTENT_TEXT | CC_CONTENT_DATA)) !=
1347 	    (CC_CONTENT_TEXT | CC_CONTENT_DATA))
1348 		return (NULL);
1349 
1350 	addr = fptr->file_map->map_pmap.pr_vaddr;
1351 
1352 	if (P->status.pr_dmodel == PR_MODEL_ILP32) {
1353 		Elf32_Ehdr ehdr;
1354 		Elf32_Phdr phdr;
1355 
1356 		if ((read_ehdr32(P, &ehdr, &phnum, addr) != 0) ||
1357 		    read_dynamic_phdr32(P, &ehdr, phnum, &phdr, addr) != 0)
1358 			return (NULL);
1359 
1360 		elf = fake_elf32(P, fptr, addr, &ehdr, phnum, &phdr);
1361 #ifdef _LP64
1362 	} else {
1363 		Elf64_Ehdr ehdr;
1364 		Elf64_Phdr phdr;
1365 
1366 		if (read_ehdr64(P, &ehdr, &phnum, addr) != 0 ||
1367 		    read_dynamic_phdr64(P, &ehdr, phnum, &phdr, addr) != 0)
1368 			return (NULL);
1369 
1370 		elf = fake_elf64(P, fptr, addr, &ehdr, phnum, &phdr);
1371 #endif
1372 	}
1373 
1374 	return (elf);
1375 }
1376 
1377 /*
1378  * We wouldn't need these if qsort(3C) took an argument for the callback...
1379  */
1380 static mutex_t sort_mtx = DEFAULTMUTEX;
1381 static char *sort_strs;
1382 static GElf_Sym *sort_syms;
1383 
1384 int
byaddr_cmp_common(GElf_Sym * a,char * aname,GElf_Sym * b,char * bname)1385 byaddr_cmp_common(GElf_Sym *a, char *aname, GElf_Sym *b, char *bname)
1386 {
1387 	if (a->st_value < b->st_value)
1388 		return (-1);
1389 	if (a->st_value > b->st_value)
1390 		return (1);
1391 
1392 	/*
1393 	 * Prefer the function to the non-function.
1394 	 */
1395 	if (GELF_ST_TYPE(a->st_info) != GELF_ST_TYPE(b->st_info)) {
1396 		if (GELF_ST_TYPE(a->st_info) == STT_FUNC)
1397 			return (-1);
1398 		if (GELF_ST_TYPE(b->st_info) == STT_FUNC)
1399 			return (1);
1400 	}
1401 
1402 	/*
1403 	 * Prefer the weak or strong global symbol to the local symbol.
1404 	 */
1405 	if (GELF_ST_BIND(a->st_info) != GELF_ST_BIND(b->st_info)) {
1406 		if (GELF_ST_BIND(b->st_info) == STB_LOCAL)
1407 			return (-1);
1408 		if (GELF_ST_BIND(a->st_info) == STB_LOCAL)
1409 			return (1);
1410 	}
1411 
1412 	/*
1413 	 * Prefer the symbol that doesn't begin with a '$' since compilers and
1414 	 * other symbol generators often use it as a prefix.
1415 	 */
1416 	if (*bname == '$')
1417 		return (-1);
1418 	if (*aname == '$')
1419 		return (1);
1420 
1421 	/*
1422 	 * Prefer the name with fewer leading underscores in the name.
1423 	 */
1424 	while (*aname == '_' && *bname == '_') {
1425 		aname++;
1426 		bname++;
1427 	}
1428 
1429 	if (*bname == '_')
1430 		return (-1);
1431 	if (*aname == '_')
1432 		return (1);
1433 
1434 	/*
1435 	 * Prefer the symbol with the smaller size.
1436 	 */
1437 	if (a->st_size < b->st_size)
1438 		return (-1);
1439 	if (a->st_size > b->st_size)
1440 		return (1);
1441 
1442 	/*
1443 	 * All other factors being equal, fall back to lexicographic order.
1444 	 */
1445 	return (strcmp(aname, bname));
1446 }
1447 
1448 static int
byaddr_cmp(const void * aa,const void * bb)1449 byaddr_cmp(const void *aa, const void *bb)
1450 {
1451 	GElf_Sym *a = &sort_syms[*(uint_t *)aa];
1452 	GElf_Sym *b = &sort_syms[*(uint_t *)bb];
1453 	char *aname = sort_strs + a->st_name;
1454 	char *bname = sort_strs + b->st_name;
1455 
1456 	return (byaddr_cmp_common(a, aname, b, bname));
1457 }
1458 
1459 static int
byname_cmp(const void * aa,const void * bb)1460 byname_cmp(const void *aa, const void *bb)
1461 {
1462 	GElf_Sym *a = &sort_syms[*(uint_t *)aa];
1463 	GElf_Sym *b = &sort_syms[*(uint_t *)bb];
1464 	char *aname = sort_strs + a->st_name;
1465 	char *bname = sort_strs + b->st_name;
1466 
1467 	return (strcmp(aname, bname));
1468 }
1469 
1470 /*
1471  * Given a symbol index, look up the corresponding symbol from the
1472  * given symbol table.
1473  *
1474  * This function allows the caller to treat the symbol table as a single
1475  * logical entity even though there may be 2 actual ELF symbol tables
1476  * involved. See the comments in Pcontrol.h for details.
1477  */
1478 static GElf_Sym *
symtab_getsym(sym_tbl_t * symtab,int ndx,GElf_Sym * dst)1479 symtab_getsym(sym_tbl_t *symtab, int ndx, GElf_Sym *dst)
1480 {
1481 	/* If index is in range of primary symtab, look it up there */
1482 	if (ndx >= symtab->sym_symn_aux) {
1483 		return (gelf_getsym(symtab->sym_data_pri,
1484 		    ndx - symtab->sym_symn_aux, dst));
1485 	}
1486 
1487 	/* Not in primary: Look it up in the auxiliary symtab */
1488 	return (gelf_getsym(symtab->sym_data_aux, ndx, dst));
1489 }
1490 
1491 void
optimize_symtab(sym_tbl_t * symtab)1492 optimize_symtab(sym_tbl_t *symtab)
1493 {
1494 	GElf_Sym *symp, *syms;
1495 	uint_t i, *indexa, *indexb;
1496 	size_t symn, strsz, count;
1497 
1498 	if (symtab == NULL || symtab->sym_data_pri == NULL ||
1499 	    symtab->sym_byaddr != NULL)
1500 		return;
1501 
1502 	symn = symtab->sym_symn;
1503 	strsz = symtab->sym_strsz;
1504 
1505 	symp = syms = malloc(sizeof (GElf_Sym) * symn);
1506 	if (symp == NULL) {
1507 		dprintf("optimize_symtab: failed to malloc symbol array");
1508 		return;
1509 	}
1510 
1511 	/*
1512 	 * First record all the symbols into a table and count up the ones
1513 	 * that we're interested in. We mark symbols as invalid by setting
1514 	 * the st_name to an illegal value.
1515 	 */
1516 	for (i = 0, count = 0; i < symn; i++, symp++) {
1517 		if (symtab_getsym(symtab, i, symp) != NULL &&
1518 		    symp->st_name < strsz &&
1519 		    IS_DATA_TYPE(GELF_ST_TYPE(symp->st_info)))
1520 			count++;
1521 		else
1522 			symp->st_name = strsz;
1523 	}
1524 
1525 	/*
1526 	 * Allocate sufficient space for both tables and populate them
1527 	 * with the same symbols we just counted.
1528 	 */
1529 	symtab->sym_count = count;
1530 	indexa = symtab->sym_byaddr = calloc(sizeof (uint_t), count);
1531 	indexb = symtab->sym_byname = calloc(sizeof (uint_t), count);
1532 	if (indexa == NULL || indexb == NULL) {
1533 		dprintf(
1534 		    "optimize_symtab: failed to malloc symbol index arrays");
1535 		symtab->sym_count = 0;
1536 		if (indexa != NULL) {	/* First alloc succeeded. Free it */
1537 			free(indexa);
1538 			symtab->sym_byaddr = NULL;
1539 		}
1540 		free(syms);
1541 		return;
1542 	}
1543 	for (i = 0, symp = syms; i < symn; i++, symp++) {
1544 		if (symp->st_name < strsz)
1545 			*indexa++ = *indexb++ = i;
1546 	}
1547 
1548 	/*
1549 	 * Sort the two tables according to the appropriate criteria,
1550 	 * unless the user has overridden this behaviour.
1551 	 *
1552 	 * An example where we might not sort the tables is the relatively
1553 	 * unusual case of a process with very large symbol tables in which
1554 	 * we perform few lookups. In such a case the total time would be
1555 	 * dominated by the sort. It is difficult to determine a priori
1556 	 * how many lookups an arbitrary client will perform, and
1557 	 * hence whether the symbol tables should be sorted. We therefore
1558 	 * sort the tables by default, but provide the user with a
1559 	 * "chicken switch" in the form of the LIBPROC_NO_QSORT
1560 	 * environment variable.
1561 	 */
1562 	if (!_libproc_no_qsort) {
1563 		(void) mutex_lock(&sort_mtx);
1564 		sort_strs = symtab->sym_strs;
1565 		sort_syms = syms;
1566 
1567 		qsort(symtab->sym_byaddr, count, sizeof (uint_t), byaddr_cmp);
1568 		qsort(symtab->sym_byname, count, sizeof (uint_t), byname_cmp);
1569 
1570 		sort_strs = NULL;
1571 		sort_syms = NULL;
1572 		(void) mutex_unlock(&sort_mtx);
1573 	}
1574 
1575 	free(syms);
1576 }
1577 
1578 
1579 static Elf *
build_fake_elf(struct ps_prochandle * P,file_info_t * fptr,GElf_Ehdr * ehdr,size_t * nshdrs,Elf_Data ** shdata)1580 build_fake_elf(struct ps_prochandle *P, file_info_t *fptr, GElf_Ehdr *ehdr,
1581     size_t *nshdrs, Elf_Data **shdata)
1582 {
1583 	size_t shstrndx;
1584 	Elf_Scn *scn;
1585 	Elf *elf;
1586 
1587 	if ((elf = fake_elf(P, fptr)) == NULL ||
1588 	    elf_kind(elf) != ELF_K_ELF ||
1589 	    gelf_getehdr(elf, ehdr) == NULL ||
1590 	    elf_getshdrnum(elf, nshdrs) == -1 ||
1591 	    elf_getshdrstrndx(elf, &shstrndx) == -1 ||
1592 	    (scn = elf_getscn(elf, shstrndx)) == NULL ||
1593 	    (*shdata = elf_getdata(scn, NULL)) == NULL) {
1594 		if (elf != NULL)
1595 			(void) elf_end(elf);
1596 		dprintf("failed to fake up ELF file\n");
1597 		return (NULL);
1598 	}
1599 
1600 	return (elf);
1601 }
1602 
1603 /*
1604  * Try and find the file described by path in the file system and validate that
1605  * it matches our CRC before we try and process it for symbol information. If we
1606  * instead have an ELF data section, then that means we're checking a build-id
1607  * section instead. In that case we just need to find and bcmp the corresponding
1608  * section.
1609  *
1610  * Before we validate if it's a valid CRC or data section, we check to ensure
1611  * that it's a normal file and not anything else.
1612  */
1613 static boolean_t
build_alt_debug(file_info_t * fptr,const char * path,uint32_t crc,Elf_Data * data)1614 build_alt_debug(file_info_t *fptr, const char *path, uint32_t crc,
1615     Elf_Data *data)
1616 {
1617 	int fd;
1618 	struct stat st;
1619 	Elf *elf;
1620 	Elf_Scn *scn;
1621 	GElf_Shdr symshdr, strshdr;
1622 	Elf_Data *symdata, *strdata;
1623 	boolean_t valid;
1624 	uint32_t c = -1U;
1625 
1626 	if ((fd = open(path, O_RDONLY)) < 0)
1627 		return (B_FALSE);
1628 
1629 	if (fstat(fd, &st) != 0) {
1630 		(void) close(fd);
1631 		return (B_FALSE);
1632 	}
1633 
1634 	if (S_ISREG(st.st_mode) == 0) {
1635 		(void) close(fd);
1636 		return (B_FALSE);
1637 	}
1638 
1639 	/*
1640 	 * Only check the CRC if we've come here through a GNU debug link
1641 	 * section as opposed to the build id. This is indicated by having the
1642 	 * value of data be NULL.
1643 	 */
1644 	if (data == NULL) {
1645 		for (;;) {
1646 			char buf[4096];
1647 			ssize_t ret = read(fd, buf, sizeof (buf));
1648 			if (ret == -1) {
1649 				if (ret == EINTR)
1650 					continue;
1651 				(void) close(fd);
1652 				return (B_FALSE);
1653 			}
1654 			if (ret == 0) {
1655 				c = ~c;
1656 				if (c != crc) {
1657 					dprintf("crc mismatch, found: 0x%x "
1658 					    "expected 0x%x\n", c, crc);
1659 					(void) close(fd);
1660 					return (B_FALSE);
1661 				}
1662 				break;
1663 			}
1664 			CRC32(c, buf, ret, c, psym_crc32);
1665 		}
1666 	}
1667 
1668 	elf = elf_begin(fd, ELF_C_READ, NULL);
1669 	if (elf == NULL) {
1670 		(void) close(fd);
1671 		return (B_FALSE);
1672 	}
1673 
1674 	if (elf_kind(elf) != ELF_K_ELF) {
1675 		goto fail;
1676 	}
1677 
1678 	/*
1679 	 * If we have a data section, that indicates we have a build-id which
1680 	 * means we need to find the corresponding build-id section and compare
1681 	 * it.
1682 	 */
1683 	scn = NULL;
1684 	valid = B_FALSE;
1685 	for (scn = elf_nextscn(elf, scn); data != NULL && scn != NULL;
1686 	    scn = elf_nextscn(elf, scn)) {
1687 		GElf_Shdr hdr;
1688 		Elf_Data *ntdata;
1689 
1690 		if (gelf_getshdr(scn, &hdr) == NULL)
1691 			goto fail;
1692 
1693 		if (hdr.sh_type != SHT_NOTE)
1694 			continue;
1695 
1696 		if ((ntdata = elf_getdata(scn, NULL)) == NULL)
1697 			goto fail;
1698 
1699 		/*
1700 		 * First verify the data section sizes are equal, then the
1701 		 * section name. If that's all true, then we can just do a bcmp.
1702 		 */
1703 		if (data->d_size != ntdata->d_size)
1704 			continue;
1705 
1706 		dprintf("found corresponding section in alternate file\n");
1707 		if (bcmp(ntdata->d_buf, data->d_buf, data->d_size) != 0)
1708 			goto fail;
1709 
1710 		valid = B_TRUE;
1711 		break;
1712 	}
1713 	if (data != NULL && valid == B_FALSE) {
1714 		dprintf("failed to find a matching %s section in %s\n",
1715 		    BUILDID_NAME, path);
1716 		goto fail;
1717 	}
1718 
1719 
1720 	/*
1721 	 * Do two passes, first see if we have a symbol header, then see if we
1722 	 * can find the corresponding linked string table.
1723 	 */
1724 	scn = NULL;
1725 	for (scn = elf_nextscn(elf, scn); scn != NULL;
1726 	    scn = elf_nextscn(elf, scn)) {
1727 
1728 		if (gelf_getshdr(scn, &symshdr) == NULL)
1729 			goto fail;
1730 
1731 		if (symshdr.sh_type != SHT_SYMTAB)
1732 			continue;
1733 
1734 		if ((symdata = elf_getdata(scn, NULL)) == NULL)
1735 			goto fail;
1736 
1737 		break;
1738 	}
1739 	if (scn == NULL)
1740 		goto fail;
1741 
1742 	if ((scn = elf_getscn(elf, symshdr.sh_link)) == NULL)
1743 		goto fail;
1744 
1745 	if (gelf_getshdr(scn, &strshdr) == NULL)
1746 		goto fail;
1747 
1748 	if ((strdata = elf_getdata(scn, NULL)) == NULL)
1749 		goto fail;
1750 
1751 	fptr->file_symtab.sym_data_pri = symdata;
1752 	fptr->file_symtab.sym_symn += symshdr.sh_size / symshdr.sh_entsize;
1753 	fptr->file_symtab.sym_strs = strdata->d_buf;
1754 	fptr->file_symtab.sym_strsz = strdata->d_size;
1755 	fptr->file_symtab.sym_hdr_pri = symshdr;
1756 	fptr->file_symtab.sym_strhdr = strshdr;
1757 
1758 	dprintf("successfully loaded additional debug symbols for %s from %s\n",
1759 	    fptr->file_rname, path);
1760 
1761 	fptr->file_dbgfile = fd;
1762 	fptr->file_dbgelf = elf;
1763 	return (B_TRUE);
1764 fail:
1765 	(void) elf_end(elf);
1766 	(void) close(fd);
1767 	return (B_FALSE);
1768 }
1769 
1770 /*
1771  * We're here because the object in question has no symbol information, that's a
1772  * bit unfortunate. However, we've found that there's a .gnu_debuglink sitting
1773  * around. By convention that means that given the current location of the
1774  * object on disk, and the debug name that we found in the binary we need to
1775  * search the following locations for a matching file.
1776  *
1777  * <dirname>/.debug/<debug-name>
1778  * /usr/lib/debug/<dirname>/<debug-name>
1779  *
1780  * In the future, we should consider supporting looking in the prefix's
1781  * lib/debug directory for a matching object or supporting an arbitrary user
1782  * defined set of places to look.
1783  */
1784 static void
find_alt_debuglink(file_info_t * fptr,const char * name,uint32_t crc)1785 find_alt_debuglink(file_info_t *fptr, const char *name, uint32_t crc)
1786 {
1787 	boolean_t r;
1788 	char *dup = NULL, *path = NULL, *dname;
1789 
1790 	dprintf("find_alt_debug: looking for %s, crc 0x%x\n", name, crc);
1791 	if (fptr->file_rname == NULL) {
1792 		dprintf("find_alt_debug: encountered null file_rname\n");
1793 		return;
1794 	}
1795 
1796 	dup = strdup(fptr->file_rname);
1797 	if (dup == NULL)
1798 		return;
1799 
1800 	dname = dirname(dup);
1801 	if (asprintf(&path, "%s/.debug/%s", dname, name) != -1) {
1802 		dprintf("attempting to load alternate debug information "
1803 		    "from %s\n", path);
1804 		r = build_alt_debug(fptr, path, crc, NULL);
1805 		free(path);
1806 		if (r == B_TRUE)
1807 			goto out;
1808 	}
1809 
1810 	if (asprintf(&path, "/usr/lib/debug/%s/%s", dname, name) != -1) {
1811 		dprintf("attempting to load alternate debug information "
1812 		    "from %s\n", path);
1813 		r = build_alt_debug(fptr, path, crc, NULL);
1814 		free(path);
1815 		if (r == B_TRUE)
1816 			goto out;
1817 	}
1818 out:
1819 	free(dup);
1820 }
1821 
1822 /*
1823  * Build the symbol table for the given mapped file.
1824  */
1825 void
Pbuild_file_symtab(struct ps_prochandle * P,file_info_t * fptr)1826 Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr)
1827 {
1828 	char objectfile[PATH_MAX];
1829 	uint_t i;
1830 
1831 	GElf_Ehdr ehdr;
1832 	GElf_Sym s;
1833 
1834 	Elf_Data *shdata;
1835 	Elf_Scn *scn;
1836 	Elf *elf;
1837 	size_t nshdrs, shstrndx;
1838 
1839 	struct {
1840 		GElf_Shdr c_shdr;
1841 		Elf_Data *c_data;
1842 		const char *c_name;
1843 	} *cp, *cache = NULL, *dyn = NULL, *plt = NULL, *ctf = NULL,
1844 	*dbglink = NULL, *buildid = NULL;
1845 
1846 	if (fptr->file_init)
1847 		return;	/* We've already processed this file */
1848 
1849 	/*
1850 	 * Mark the file_info struct as having the symbol table initialized
1851 	 * even if we fail below.  We tried once; we don't try again.
1852 	 */
1853 	fptr->file_init = 1;
1854 
1855 	if (elf_version(EV_CURRENT) == EV_NONE) {
1856 		dprintf("libproc ELF version is more recent than libelf\n");
1857 		return;
1858 	}
1859 
1860 	if (P->state == PS_DEAD || P->state == PS_IDLE) {
1861 		char *name;
1862 		/*
1863 		 * If we're a not live, we can't open files from the /proc
1864 		 * object directory; we have only the mapping and file names
1865 		 * to guide us.  We prefer the file_lname, but need to handle
1866 		 * the case of it being NULL in order to bootstrap: we first
1867 		 * come here during rd_new() when the only information we have
1868 		 * is interpreter name associated with the AT_BASE mapping.
1869 		 *
1870 		 * Also, if the zone associated with the core file seems
1871 		 * to exists on this machine we'll try to open the object
1872 		 * file within the zone.
1873 		 */
1874 		if (fptr->file_rname != NULL)
1875 			name = fptr->file_rname;
1876 		else if (fptr->file_lname != NULL)
1877 			name = fptr->file_lname;
1878 		else
1879 			name = fptr->file_pname;
1880 		(void) strlcpy(objectfile, name, sizeof (objectfile));
1881 	} else {
1882 		(void) snprintf(objectfile, sizeof (objectfile),
1883 		    "%s/%d/object/%s",
1884 		    procfs_path, (int)P->pid, fptr->file_pname);
1885 	}
1886 
1887 	/*
1888 	 * Open the object file, create the elf file, and then get the elf
1889 	 * header and .shstrtab data buffer so we can process sections by
1890 	 * name. If anything goes wrong try to fake up an elf file from
1891 	 * the in-core elf image.
1892 	 */
1893 
1894 	if (_libproc_incore_elf || (P->flags & INCORE)) {
1895 		dprintf("Pbuild_file_symtab: using in-core data for: %s\n",
1896 		    fptr->file_pname);
1897 
1898 		if ((elf = build_fake_elf(P, fptr, &ehdr, &nshdrs, &shdata)) ==
1899 		    NULL)
1900 			return;
1901 
1902 	} else if ((fptr->file_fd = open(objectfile, O_RDONLY)) < 0) {
1903 		dprintf("Pbuild_file_symtab: failed to open %s: %s\n",
1904 		    objectfile, strerror(errno));
1905 
1906 		if ((elf = build_fake_elf(P, fptr, &ehdr, &nshdrs, &shdata)) ==
1907 		    NULL)
1908 			return;
1909 
1910 	} else if ((elf = elf_begin(fptr->file_fd, ELF_C_READ, NULL)) == NULL ||
1911 	    elf_kind(elf) != ELF_K_ELF ||
1912 	    gelf_getehdr(elf, &ehdr) == NULL ||
1913 	    elf_getshdrnum(elf, &nshdrs) == -1 ||
1914 	    elf_getshdrstrndx(elf, &shstrndx) == -1 ||
1915 	    (scn = elf_getscn(elf, shstrndx)) == NULL ||
1916 	    (shdata = elf_getdata(scn, NULL)) == NULL) {
1917 		int err = elf_errno();
1918 
1919 		dprintf("failed to process ELF file %s: %s\n",
1920 		    objectfile, (err == 0) ? "<null>" : elf_errmsg(err));
1921 		(void) elf_end(elf);
1922 
1923 		if ((elf = build_fake_elf(P, fptr, &ehdr, &nshdrs, &shdata)) ==
1924 		    NULL)
1925 			return;
1926 
1927 	} else if (file_differs(P, elf, fptr)) {
1928 		Elf *newelf;
1929 
1930 		/*
1931 		 * Before we get too excited about this elf file, we'll check
1932 		 * its checksum value against the value we have in memory. If
1933 		 * they don't agree, we try to fake up a new elf file and
1934 		 * proceed with that instead.
1935 		 */
1936 		dprintf("ELF file %s (%lx) doesn't match in-core image\n",
1937 		    fptr->file_pname,
1938 		    (ulong_t)fptr->file_map->map_pmap.pr_vaddr);
1939 
1940 		if ((newelf = build_fake_elf(P, fptr, &ehdr, &nshdrs, &shdata))
1941 		    != NULL) {
1942 			(void) elf_end(elf);
1943 			elf = newelf;
1944 			dprintf("switched to faked up ELF file\n");
1945 
1946 			/*
1947 			 * Check to see if the file that we just discovered
1948 			 * to be an imposter matches the execname that was
1949 			 * determined by Pfindexec().  If it does, we (clearly)
1950 			 * don't have the right binary, and we zero out
1951 			 * execname before anyone gets hurt.
1952 			 */
1953 			if (fptr->file_rname != NULL && P->execname != NULL &&
1954 			    strcmp(fptr->file_rname, P->execname) == 0) {
1955 				dprintf("file/in-core image mismatch was "
1956 				    "on P->execname; discarding\n");
1957 				free(P->execname);
1958 				P->execname = NULL;
1959 			}
1960 		}
1961 	}
1962 
1963 	if ((cache = malloc(nshdrs * sizeof (*cache))) == NULL) {
1964 		dprintf("failed to malloc section cache for %s\n", objectfile);
1965 		goto bad;
1966 	}
1967 
1968 	dprintf("processing ELF file %s\n", objectfile);
1969 	fptr->file_class = ehdr.e_ident[EI_CLASS];
1970 	fptr->file_etype = ehdr.e_type;
1971 	fptr->file_elf = elf;
1972 	fptr->file_shstrs = shdata->d_buf;
1973 	fptr->file_shstrsz = shdata->d_size;
1974 
1975 	/*
1976 	 * Iterate through each section, caching its section header, data
1977 	 * pointer, and name.  We use this for handling sh_link values below.
1978 	 */
1979 	for (cp = cache + 1, scn = NULL; scn = elf_nextscn(elf, scn); cp++) {
1980 		if (gelf_getshdr(scn, &cp->c_shdr) == NULL) {
1981 			dprintf("Pbuild_file_symtab: Failed to get section "
1982 			    "header\n");
1983 			goto bad; /* Failed to get section header */
1984 		}
1985 
1986 		if ((cp->c_data = elf_getdata(scn, NULL)) == NULL) {
1987 			dprintf("Pbuild_file_symtab: Failed to get section "
1988 			    "data\n");
1989 			goto bad; /* Failed to get section data */
1990 		}
1991 
1992 		if (cp->c_shdr.sh_name >= shdata->d_size) {
1993 			dprintf("Pbuild_file_symtab: corrupt section name");
1994 			goto bad; /* Corrupt section name */
1995 		}
1996 
1997 		cp->c_name = (const char *)shdata->d_buf + cp->c_shdr.sh_name;
1998 	}
1999 
2000 	/*
2001 	 * Now iterate through the section cache in order to locate info
2002 	 * for the .symtab, .dynsym, .SUNW_ldynsym, .dynamic, .plt,
2003 	 * and .SUNW_ctf sections:
2004 	 */
2005 	for (i = 1, cp = cache + 1; i < nshdrs; i++, cp++) {
2006 		GElf_Shdr *shp = &cp->c_shdr;
2007 
2008 		if (shp->sh_type == SHT_SYMTAB || shp->sh_type == SHT_DYNSYM) {
2009 			sym_tbl_t *symp = shp->sh_type == SHT_SYMTAB ?
2010 			    &fptr->file_symtab : &fptr->file_dynsym;
2011 			/*
2012 			 * It's possible that the we already got the symbol
2013 			 * table from the core file itself. Either the file
2014 			 * differs in which case our faked up elf file will
2015 			 * only contain the dynsym (not the symtab) or the
2016 			 * file matches in which case we'll just be replacing
2017 			 * the symbol table we pulled out of the core file
2018 			 * with an equivalent one. In either case, this
2019 			 * check isn't essential, but it's a good idea.
2020 			 */
2021 			if (symp->sym_data_pri == NULL) {
2022 				dprintf("Symbol table found for %s\n",
2023 				    objectfile);
2024 				symp->sym_data_pri = cp->c_data;
2025 				symp->sym_symn +=
2026 				    shp->sh_size / shp->sh_entsize;
2027 				symp->sym_strs =
2028 				    cache[shp->sh_link].c_data->d_buf;
2029 				symp->sym_strsz =
2030 				    cache[shp->sh_link].c_data->d_size;
2031 				symp->sym_hdr_pri = cp->c_shdr;
2032 				symp->sym_strhdr = cache[shp->sh_link].c_shdr;
2033 			} else {
2034 				dprintf("Symbol table already there for %s\n",
2035 				    objectfile);
2036 			}
2037 		} else if (shp->sh_type == SHT_SUNW_LDYNSYM) {
2038 			/* .SUNW_ldynsym section is auxiliary to .dynsym */
2039 			if (fptr->file_dynsym.sym_data_aux == NULL) {
2040 				dprintf(".SUNW_ldynsym symbol table"
2041 				    " found for %s\n", objectfile);
2042 				fptr->file_dynsym.sym_data_aux = cp->c_data;
2043 				fptr->file_dynsym.sym_symn_aux =
2044 				    shp->sh_size / shp->sh_entsize;
2045 				fptr->file_dynsym.sym_symn +=
2046 				    fptr->file_dynsym.sym_symn_aux;
2047 				fptr->file_dynsym.sym_hdr_aux = cp->c_shdr;
2048 			} else {
2049 				dprintf(".SUNW_ldynsym symbol table already"
2050 				    " there for %s\n", objectfile);
2051 			}
2052 		} else if (shp->sh_type == SHT_DYNAMIC) {
2053 			dyn = cp;
2054 		} else if (strcmp(cp->c_name, ".plt") == 0) {
2055 			plt = cp;
2056 		} else if (strcmp(cp->c_name, ".SUNW_ctf") == 0) {
2057 			/*
2058 			 * Skip over bogus CTF sections so they don't come back
2059 			 * to haunt us later.
2060 			 */
2061 			if (shp->sh_link == 0 ||
2062 			    shp->sh_link >= nshdrs ||
2063 			    (cache[shp->sh_link].c_shdr.sh_type != SHT_DYNSYM &&
2064 			    cache[shp->sh_link].c_shdr.sh_type != SHT_SYMTAB)) {
2065 				dprintf("Bad sh_link %d for "
2066 				    "CTF\n", shp->sh_link);
2067 				continue;
2068 			}
2069 			ctf = cp;
2070 		} else if (strcmp(cp->c_name, BUILDID_NAME) == 0) {
2071 			dprintf("Found a %s section for %s\n", BUILDID_NAME,
2072 			    fptr->file_rname);
2073 			/* The ElfXX_Nhdr is 32/64-bit neutral */
2074 			if (cp->c_shdr.sh_type == SHT_NOTE &&
2075 			    cp->c_data->d_buf != NULL &&
2076 			    cp->c_data->d_size >= sizeof (Elf32_Nhdr)) {
2077 				Elf32_Nhdr *hdr = cp->c_data->d_buf;
2078 				if (hdr->n_type != 3)
2079 					continue;
2080 				if (hdr->n_namesz != 4)
2081 					continue;
2082 				if (hdr->n_descsz < MINBUILDID)
2083 					continue;
2084 				/* Set a reasonable upper bound */
2085 				if (hdr->n_descsz > MAXBUILDID) {
2086 					dprintf("Skipped %s as too large "
2087 					    "(%ld)\n", BUILDID_NAME,
2088 					    (unsigned long)hdr->n_descsz);
2089 					continue;
2090 				}
2091 
2092 				if (cp->c_data->d_size < sizeof (hdr) +
2093 				    hdr->n_namesz + hdr->n_descsz)
2094 					continue;
2095 				buildid = cp;
2096 			}
2097 		} else if (strcmp(cp->c_name, DBGLINK_NAME) == 0) {
2098 			dprintf("found %s section for %s\n", DBGLINK_NAME,
2099 			    fptr->file_rname);
2100 			/*
2101 			 * Let's make sure of a few things before we do this.
2102 			 */
2103 			if (cp->c_shdr.sh_type == SHT_PROGBITS &&
2104 			    cp->c_data->d_buf != NULL &&
2105 			    cp->c_data->d_size) {
2106 				dbglink = cp;
2107 			}
2108 		}
2109 	}
2110 
2111 	/*
2112 	 * If we haven't found any symbol table information and we have found
2113 	 * either a .note.gnu.build-id or a .gnu_debuglink, it's time to try and
2114 	 * figure out where we might find this. Originally, GNU used the
2115 	 * .gnu_debuglink solely, but then they added a .note.gnu.build-id. The
2116 	 * build-id is some size, usually 16 or 20 bytes, often a SHA1 sum of
2117 	 * parts of the original file. This is maintained across all versions of
2118 	 * the subsequent file.
2119 	 *
2120 	 * For the .note.gnu.build-id, we're going to check a few things before
2121 	 * using it, first that the name is 4 bytes, and is GNU and that the
2122 	 * type is 3, which they say is the build-id identifier.
2123 	 *
2124 	 * To verify that the elf data for the .gnu_debuglink seems somewhat
2125 	 * sane, eg. the elf data should be a string, so we want to verify we
2126 	 * have a null-terminator.
2127 	 */
2128 	if (fptr->file_symtab.sym_data_pri == NULL && buildid != NULL) {
2129 		int i, bo;
2130 		uint8_t *dp;
2131 		char buf[BUILDID_STRLEN], *path;
2132 		Elf32_Nhdr *hdr = buildid->c_data->d_buf;
2133 
2134 		/*
2135 		 * This was checked for validity when assigning the buildid
2136 		 * variable.
2137 		 */
2138 		bzero(buf, sizeof (buf));
2139 		dp = (uint8_t *)((uintptr_t)hdr + sizeof (*hdr) +
2140 		    hdr->n_namesz);
2141 		for (i = 0, bo = 0; i < hdr->n_descsz; i++, bo += 2, dp++) {
2142 			assert(sizeof (buf) - bo > 0);
2143 
2144 			/*
2145 			 * Recall that the build-id is structured as a series of
2146 			 * bytes. However, the first two characters are supposed
2147 			 * to represent a directory. Hence, once we reach offset
2148 			 * two, we insert a '/' character.
2149 			 */
2150 			if (bo == 2) {
2151 				buf[bo] = '/';
2152 				bo++;
2153 			}
2154 			(void) snprintf(buf + bo, sizeof (buf) - bo, "%2x",
2155 			    *dp);
2156 		}
2157 
2158 		if (asprintf(&path, "/usr/lib/debug/.build-id/%s.debug",
2159 		    buf) != -1) {
2160 			boolean_t r;
2161 			dprintf("attempting to find build id alternate debug "
2162 			    "file at %s\n", path);
2163 			r = build_alt_debug(fptr, path, 0, buildid->c_data);
2164 			dprintf("attempt %s\n", r == B_TRUE ?
2165 			    "succeeded" : "failed");
2166 			free(path);
2167 		} else {
2168 			dprintf("failed to construct build id path: %s\n",
2169 			    strerror(errno));
2170 		}
2171 	}
2172 
2173 	if (fptr->file_symtab.sym_data_pri == NULL && dbglink != NULL) {
2174 		char *c = dbglink->c_data->d_buf;
2175 		size_t i;
2176 		boolean_t found = B_FALSE;
2177 		Elf_Data *ed = dbglink->c_data;
2178 		uint32_t crc;
2179 
2180 		for (i = 0; i < ed->d_size; i++) {
2181 			if (c[i] == '\0') {
2182 				uintptr_t off;
2183 				dprintf("got .gnu_debuglink terminator at "
2184 				    "offset %lu\n", (unsigned long)i);
2185 				/*
2186 				 * After the null terminator, there should be
2187 				 * padding, followed by a 4 byte CRC of the
2188 				 * file. If we don't see this, we're going to
2189 				 * assume this is bogus.
2190 				 */
2191 				if ((i % sizeof (uint32_t)) == 0) {
2192 					i += 4;
2193 				} else {
2194 					i += sizeof (uint32_t) -
2195 					    (i % sizeof (uint32_t));
2196 				}
2197 				if (i + sizeof (uint32_t) ==
2198 				    dbglink->c_data->d_size) {
2199 					found = B_TRUE;
2200 					off = (uintptr_t)ed->d_buf + i;
2201 					crc = *(uint32_t *)off;
2202 				} else {
2203 					dprintf(".gnu_debuglink size mismatch, "
2204 					    "expected: %lu, found: %lu\n",
2205 					    (unsigned long)i,
2206 					    (unsigned long)ed->d_size);
2207 				}
2208 				break;
2209 			}
2210 		}
2211 
2212 		if (found == B_TRUE)
2213 			find_alt_debuglink(fptr, dbglink->c_data->d_buf, crc);
2214 	}
2215 
2216 	/*
2217 	 * At this point, we've found all the symbol tables we're ever going
2218 	 * to find: the ones in the loop above and possibly the symtab that
2219 	 * was included in the core file. Before we perform any lookups, we
2220 	 * create sorted versions to optimize for lookups.
2221 	 */
2222 	optimize_symtab(&fptr->file_symtab);
2223 	optimize_symtab(&fptr->file_dynsym);
2224 
2225 	/*
2226 	 * Fill in the base address of the text mapping for shared libraries.
2227 	 * This allows us to translate symbols before librtld_db is ready.
2228 	 */
2229 	if (fptr->file_etype == ET_DYN) {
2230 		fptr->file_dyn_base = fptr->file_map->map_pmap.pr_vaddr -
2231 		    fptr->file_map->map_pmap.pr_offset;
2232 		dprintf("setting file_dyn_base for %s to %lx\n",
2233 		    objectfile, (long)fptr->file_dyn_base);
2234 	}
2235 
2236 	/*
2237 	 * Record the CTF section information in the file info structure.
2238 	 */
2239 	if (ctf != NULL) {
2240 		fptr->file_ctf_off = ctf->c_shdr.sh_offset;
2241 		fptr->file_ctf_size = ctf->c_shdr.sh_size;
2242 		if (ctf->c_shdr.sh_link != 0 &&
2243 		    cache[ctf->c_shdr.sh_link].c_shdr.sh_type == SHT_DYNSYM)
2244 			fptr->file_ctf_dyn = 1;
2245 	}
2246 
2247 	if (fptr->file_lo == NULL)
2248 		goto done; /* Nothing else to do if no load object info */
2249 
2250 	/*
2251 	 * If the object is a shared library and we have a different rl_base
2252 	 * value, reset file_dyn_base according to librtld_db's information.
2253 	 */
2254 	if (fptr->file_etype == ET_DYN &&
2255 	    fptr->file_lo->rl_base != fptr->file_dyn_base) {
2256 		dprintf("resetting file_dyn_base for %s to %lx\n",
2257 		    objectfile, (long)fptr->file_lo->rl_base);
2258 		fptr->file_dyn_base = fptr->file_lo->rl_base;
2259 	}
2260 
2261 	/*
2262 	 * Fill in the PLT information for this file if a PLT symbol is found.
2263 	 */
2264 	if (sym_by_name(&fptr->file_dynsym, "_PROCEDURE_LINKAGE_TABLE_", &s,
2265 	    NULL) != NULL) {
2266 		fptr->file_plt_base = s.st_value + fptr->file_dyn_base;
2267 		fptr->file_plt_size = (plt != NULL) ? plt->c_shdr.sh_size : 0;
2268 
2269 		/*
2270 		 * Bring the load object up to date; it is the only way the
2271 		 * user has to access the PLT data. The PLT information in the
2272 		 * rd_loadobj_t is not set in the call to map_iter() (the
2273 		 * callback for rd_loadobj_iter) where we set file_lo.
2274 		 */
2275 		fptr->file_lo->rl_plt_base = fptr->file_plt_base;
2276 		fptr->file_lo->rl_plt_size = fptr->file_plt_size;
2277 
2278 		dprintf("PLT found at %p, size = %lu\n",
2279 		    (void *)fptr->file_plt_base, (ulong_t)fptr->file_plt_size);
2280 	}
2281 
2282 	/*
2283 	 * Fill in the PLT information.
2284 	 */
2285 	if (dyn != NULL) {
2286 		uintptr_t dynaddr = dyn->c_shdr.sh_addr + fptr->file_dyn_base;
2287 		size_t ndyn = dyn->c_shdr.sh_size / dyn->c_shdr.sh_entsize;
2288 		GElf_Dyn d;
2289 
2290 		for (i = 0; i < ndyn; i++) {
2291 			if (gelf_getdyn(dyn->c_data, i, &d) == NULL)
2292 				continue;
2293 
2294 			switch (d.d_tag) {
2295 			case DT_JMPREL:
2296 				dprintf("DT_JMPREL is %p\n",
2297 				    (void *)(uintptr_t)d.d_un.d_ptr);
2298 				fptr->file_jmp_rel =
2299 				    d.d_un.d_ptr + fptr->file_dyn_base;
2300 				break;
2301 			case DT_STRTAB:
2302 				dprintf("DT_STRTAB is %p\n",
2303 				    (void *)(uintptr_t)d.d_un.d_ptr);
2304 				break;
2305 			case DT_PLTGOT:
2306 				dprintf("DT_PLTGOT is %p\n",
2307 				    (void *)(uintptr_t)d.d_un.d_ptr);
2308 				break;
2309 			case DT_SUNW_SYMTAB:
2310 				dprintf("DT_SUNW_SYMTAB is %p\n",
2311 				    (void *)(uintptr_t)d.d_un.d_ptr);
2312 				break;
2313 			case DT_SYMTAB:
2314 				dprintf("DT_SYMTAB is %p\n",
2315 				    (void *)(uintptr_t)d.d_un.d_ptr);
2316 				break;
2317 			case DT_HASH:
2318 				dprintf("DT_HASH is %p\n",
2319 				    (void *)(uintptr_t)d.d_un.d_ptr);
2320 				break;
2321 			}
2322 		}
2323 
2324 		dprintf("_DYNAMIC found at %p, %lu entries, DT_JMPREL = %p\n",
2325 		    (void *)dynaddr, (ulong_t)ndyn, (void *)fptr->file_jmp_rel);
2326 	}
2327 
2328 done:
2329 	free(cache);
2330 	return;
2331 
2332 bad:
2333 	if (cache != NULL)
2334 		free(cache);
2335 
2336 	(void) elf_end(elf);
2337 	fptr->file_elf = NULL;
2338 	if (fptr->file_elfmem != NULL) {
2339 		free(fptr->file_elfmem);
2340 		fptr->file_elfmem = NULL;
2341 	}
2342 	(void) close(fptr->file_fd);
2343 	if (fptr->file_dbgelf != NULL)
2344 		(void) elf_end(fptr->file_dbgelf);
2345 	fptr->file_dbgelf = NULL;
2346 	if (fptr->file_dbgfile >= 0)
2347 		(void) close(fptr->file_dbgfile);
2348 	fptr->file_fd = -1;
2349 	fptr->file_dbgfile = -1;
2350 }
2351 
2352 /*
2353  * Given a process virtual address, return the map_info_t containing it.
2354  * If none found, return NULL.
2355  */
2356 map_info_t *
Paddr2mptr(struct ps_prochandle * P,uintptr_t addr)2357 Paddr2mptr(struct ps_prochandle *P, uintptr_t addr)
2358 {
2359 	int lo = 0;
2360 	int hi = P->map_count - 1;
2361 	int mid;
2362 	map_info_t *mp;
2363 
2364 	while (lo <= hi) {
2365 
2366 		mid = (lo + hi) / 2;
2367 		mp = &P->mappings[mid];
2368 
2369 		/* check that addr is in [vaddr, vaddr + size) */
2370 		if ((addr - mp->map_pmap.pr_vaddr) < mp->map_pmap.pr_size)
2371 			return (mp);
2372 
2373 		if (addr < mp->map_pmap.pr_vaddr)
2374 			hi = mid - 1;
2375 		else
2376 			lo = mid + 1;
2377 	}
2378 
2379 	return (NULL);
2380 }
2381 
2382 /*
2383  * Return the map_info_t for the executable file.
2384  * If not found, return NULL.
2385  */
2386 static map_info_t *
exec_map(struct ps_prochandle * P)2387 exec_map(struct ps_prochandle *P)
2388 {
2389 	uint_t i;
2390 	map_info_t *mptr;
2391 	map_info_t *mold = NULL;
2392 	file_info_t *fptr;
2393 	uintptr_t base;
2394 
2395 	for (i = 0, mptr = P->mappings; i < P->map_count; i++, mptr++) {
2396 		if (mptr->map_pmap.pr_mapname[0] == '\0')
2397 			continue;
2398 		if (strcmp(mptr->map_pmap.pr_mapname, "a.out") == 0) {
2399 			if ((fptr = mptr->map_file) != NULL &&
2400 			    fptr->file_lo != NULL) {
2401 				base = fptr->file_lo->rl_base;
2402 				if (base >= mptr->map_pmap.pr_vaddr &&
2403 				    base < mptr->map_pmap.pr_vaddr +
2404 				    mptr->map_pmap.pr_size)	/* text space */
2405 					return (mptr);
2406 				mold = mptr;	/* must be the data */
2407 				continue;
2408 			}
2409 			/* This is a poor way to test for text space */
2410 			if (!(mptr->map_pmap.pr_mflags & MA_EXEC) ||
2411 			    (mptr->map_pmap.pr_mflags & MA_WRITE)) {
2412 				mold = mptr;
2413 				continue;
2414 			}
2415 			return (mptr);
2416 		}
2417 	}
2418 
2419 	return (mold);
2420 }
2421 
2422 /*
2423  * Given a shared object name, return the map_info_t for it.  If no matching
2424  * object is found, return NULL.  Normally, the link maps contain the full
2425  * object pathname, e.g. /usr/lib/libc.so.1.  We allow the object name to
2426  * take one of the following forms:
2427  *
2428  * 1. An exact match (i.e. a full pathname): "/usr/lib/libc.so.1"
2429  * 2. An exact basename match: "libc.so.1"
2430  * 3. An initial basename match up to a '.' suffix: "libc.so" or "libc"
2431  * 4. The literal string "a.out" is an alias for the executable mapping
2432  *
2433  * The third case is a convenience for callers and may not be necessary.
2434  *
2435  * As the exact same object name may be loaded on different link maps (see
2436  * dlmopen(3DL)), we also allow the caller to resolve the object name by
2437  * specifying a particular link map id.  If lmid is PR_LMID_EVERY, the
2438  * first matching name will be returned, regardless of the link map id.
2439  */
2440 static map_info_t *
object_to_map(struct ps_prochandle * P,Lmid_t lmid,const char * objname)2441 object_to_map(struct ps_prochandle *P, Lmid_t lmid, const char *objname)
2442 {
2443 	map_info_t *mp;
2444 	file_info_t *fp;
2445 	size_t objlen;
2446 	uint_t i;
2447 
2448 	/*
2449 	 * If we have no rtld_db, then always treat a request as one for all
2450 	 * link maps.
2451 	 */
2452 	if (P->rap == NULL)
2453 		lmid = PR_LMID_EVERY;
2454 
2455 	/*
2456 	 * First pass: look for exact matches of the entire pathname or
2457 	 * basename (cases 1 and 2 above):
2458 	 */
2459 	for (i = 0, mp = P->mappings; i < P->map_count; i++, mp++) {
2460 
2461 		if (mp->map_pmap.pr_mapname[0] == '\0' ||
2462 		    (fp = mp->map_file) == NULL ||
2463 		    ((fp->file_lname == NULL) && (fp->file_rname == NULL)))
2464 			continue;
2465 
2466 		if (lmid != PR_LMID_EVERY &&
2467 		    (fp->file_lo == NULL || lmid != fp->file_lo->rl_lmident))
2468 			continue;
2469 
2470 		/*
2471 		 * If we match, return the primary text mapping; otherwise
2472 		 * just return the mapping we matched.
2473 		 */
2474 		if ((fp->file_lbase && strcmp(fp->file_lbase, objname) == 0) ||
2475 		    (fp->file_rbase && strcmp(fp->file_rbase, objname) == 0) ||
2476 		    (fp->file_lname && strcmp(fp->file_lname, objname) == 0) ||
2477 		    (fp->file_rname && strcmp(fp->file_rname, objname) == 0))
2478 			return (fp->file_map ? fp->file_map : mp);
2479 	}
2480 
2481 	objlen = strlen(objname);
2482 
2483 	/*
2484 	 * Second pass: look for partial matches (case 3 above):
2485 	 */
2486 	for (i = 0, mp = P->mappings; i < P->map_count; i++, mp++) {
2487 
2488 		if (mp->map_pmap.pr_mapname[0] == '\0' ||
2489 		    (fp = mp->map_file) == NULL ||
2490 		    ((fp->file_lname == NULL) && (fp->file_rname == NULL)))
2491 			continue;
2492 
2493 		if (lmid != PR_LMID_EVERY &&
2494 		    (fp->file_lo == NULL || lmid != fp->file_lo->rl_lmident))
2495 			continue;
2496 
2497 		/*
2498 		 * If we match, return the primary text mapping; otherwise
2499 		 * just return the mapping we matched.
2500 		 */
2501 		if ((fp->file_lbase != NULL) &&
2502 		    (strncmp(fp->file_lbase, objname, objlen) == 0) &&
2503 		    (fp->file_lbase[objlen] == '.'))
2504 			return (fp->file_map ? fp->file_map : mp);
2505 		if ((fp->file_rbase != NULL) &&
2506 		    (strncmp(fp->file_rbase, objname, objlen) == 0) &&
2507 		    (fp->file_rbase[objlen] == '.'))
2508 			return (fp->file_map ? fp->file_map : mp);
2509 	}
2510 
2511 	/*
2512 	 * One last check: we allow "a.out" to always alias the executable,
2513 	 * assuming this name was not in use for something else.
2514 	 */
2515 	if ((lmid == PR_LMID_EVERY || lmid == LM_ID_BASE) &&
2516 	    (strcmp(objname, "a.out") == 0))
2517 		return (P->map_exec);
2518 
2519 	return (NULL);
2520 }
2521 
2522 static map_info_t *
object_name_to_map(struct ps_prochandle * P,Lmid_t lmid,const char * name)2523 object_name_to_map(struct ps_prochandle *P, Lmid_t lmid, const char *name)
2524 {
2525 	map_info_t *mptr;
2526 
2527 	if (!P->info_valid)
2528 		Pupdate_maps(P);
2529 
2530 	if (P->map_exec == NULL && ((mptr = Paddr2mptr(P,
2531 	    Pgetauxval(P, AT_ENTRY))) != NULL || (mptr = exec_map(P)) != NULL))
2532 		P->map_exec = mptr;
2533 
2534 	if (P->map_ldso == NULL && (mptr = Paddr2mptr(P,
2535 	    Pgetauxval(P, AT_BASE))) != NULL)
2536 		P->map_ldso = mptr;
2537 
2538 	if (name == PR_OBJ_EXEC)
2539 		mptr = P->map_exec;
2540 	else if (name == PR_OBJ_LDSO)
2541 		mptr = P->map_ldso;
2542 	else if (Prd_agent(P) != NULL || P->state == PS_IDLE)
2543 		mptr = object_to_map(P, lmid, name);
2544 	else
2545 		mptr = NULL;
2546 
2547 	return (mptr);
2548 }
2549 
2550 /*
2551  * When two symbols are found by address, decide which one is to be preferred.
2552  */
2553 static GElf_Sym *
sym_prefer(GElf_Sym * sym1,char * name1,GElf_Sym * sym2,char * name2)2554 sym_prefer(GElf_Sym *sym1, char *name1, GElf_Sym *sym2, char *name2)
2555 {
2556 	/*
2557 	 * Prefer the non-NULL symbol.
2558 	 */
2559 	if (sym1 == NULL)
2560 		return (sym2);
2561 	if (sym2 == NULL)
2562 		return (sym1);
2563 
2564 	/*
2565 	 * Defer to the sort ordering...
2566 	 */
2567 	return (byaddr_cmp_common(sym1, name1, sym2, name2) <= 0 ? sym1 : sym2);
2568 }
2569 
2570 /*
2571  * Use a binary search to do the work of sym_by_addr().
2572  */
2573 static GElf_Sym *
sym_by_addr_binary(sym_tbl_t * symtab,GElf_Addr addr,GElf_Sym * symp,uint_t * idp)2574 sym_by_addr_binary(sym_tbl_t *symtab, GElf_Addr addr, GElf_Sym *symp,
2575     uint_t *idp)
2576 {
2577 	GElf_Sym sym, osym;
2578 	uint_t i, oid, *byaddr = symtab->sym_byaddr;
2579 	int min, max, mid, omid, found = 0;
2580 
2581 	if (symtab->sym_data_pri == NULL || symtab->sym_count == 0)
2582 		return (NULL);
2583 
2584 	min = 0;
2585 	max = symtab->sym_count - 1;
2586 	osym.st_value = 0;
2587 
2588 	/*
2589 	 * We can't return when we've found a match, we have to continue
2590 	 * searching for the closest matching symbol.
2591 	 */
2592 	while (min <= max) {
2593 		mid = (max + min) / 2;
2594 
2595 		i = byaddr[mid];
2596 		(void) symtab_getsym(symtab, i, &sym);
2597 
2598 		if (addr >= sym.st_value &&
2599 		    addr < sym.st_value + sym.st_size &&
2600 		    (!found || sym.st_value > osym.st_value)) {
2601 			osym = sym;
2602 			omid = mid;
2603 			oid = i;
2604 			found = 1;
2605 		}
2606 
2607 		if (addr < sym.st_value)
2608 			max = mid - 1;
2609 		else
2610 			min = mid + 1;
2611 	}
2612 
2613 	if (!found)
2614 		return (NULL);
2615 
2616 	/*
2617 	 * There may be many symbols with identical values so we walk
2618 	 * backward in the byaddr table to find the best match.
2619 	 */
2620 	do {
2621 		sym = osym;
2622 		i = oid;
2623 
2624 		if (omid == 0)
2625 			break;
2626 
2627 		oid = byaddr[--omid];
2628 		(void) symtab_getsym(symtab, oid, &osym);
2629 	} while (addr >= osym.st_value &&
2630 	    addr < sym.st_value + osym.st_size &&
2631 	    osym.st_value == sym.st_value);
2632 
2633 	*symp = sym;
2634 	if (idp != NULL)
2635 		*idp = i;
2636 	return (symp);
2637 }
2638 
2639 /*
2640  * Use a linear search to do the work of sym_by_addr().
2641  */
2642 static GElf_Sym *
sym_by_addr_linear(sym_tbl_t * symtab,GElf_Addr addr,GElf_Sym * symbolp,uint_t * idp)2643 sym_by_addr_linear(sym_tbl_t *symtab, GElf_Addr addr, GElf_Sym *symbolp,
2644     uint_t *idp)
2645 {
2646 	size_t symn = symtab->sym_symn;
2647 	char *strs = symtab->sym_strs;
2648 	GElf_Sym sym, *symp = NULL;
2649 	GElf_Sym osym, *osymp = NULL;
2650 	int i, id;
2651 
2652 	if (symtab->sym_data_pri == NULL || symn == 0 || strs == NULL)
2653 		return (NULL);
2654 
2655 	for (i = 0; i < symn; i++) {
2656 		if ((symp = symtab_getsym(symtab, i, &sym)) != NULL) {
2657 			if (addr >= sym.st_value &&
2658 			    addr < sym.st_value + sym.st_size) {
2659 				if (osymp)
2660 					symp = sym_prefer(
2661 					    symp, strs + symp->st_name,
2662 					    osymp, strs + osymp->st_name);
2663 				if (symp != osymp) {
2664 					osym = sym;
2665 					osymp = &osym;
2666 					id = i;
2667 				}
2668 			}
2669 		}
2670 	}
2671 	if (osymp) {
2672 		*symbolp = osym;
2673 		if (idp)
2674 			*idp = id;
2675 		return (symbolp);
2676 	}
2677 	return (NULL);
2678 }
2679 
2680 /*
2681  * Look up a symbol by address in the specified symbol table.
2682  * Adjustment to 'addr' must already have been made for the
2683  * offset of the symbol if this is a dynamic library symbol table.
2684  *
2685  * Use a linear or a binary search depending on whether or not we
2686  * chose to sort the table in optimize_symtab().
2687  */
2688 static GElf_Sym *
sym_by_addr(sym_tbl_t * symtab,GElf_Addr addr,GElf_Sym * symp,uint_t * idp)2689 sym_by_addr(sym_tbl_t *symtab, GElf_Addr addr, GElf_Sym *symp, uint_t *idp)
2690 {
2691 	if (_libproc_no_qsort) {
2692 		return (sym_by_addr_linear(symtab, addr, symp, idp));
2693 	} else {
2694 		return (sym_by_addr_binary(symtab, addr, symp, idp));
2695 	}
2696 }
2697 
2698 /*
2699  * Use a binary search to do the work of sym_by_name().
2700  */
2701 static GElf_Sym *
sym_by_name_binary(sym_tbl_t * symtab,const char * name,GElf_Sym * symp,uint_t * idp)2702 sym_by_name_binary(sym_tbl_t *symtab, const char *name, GElf_Sym *symp,
2703     uint_t *idp)
2704 {
2705 	char *strs = symtab->sym_strs;
2706 	uint_t i, *byname = symtab->sym_byname;
2707 	int min, mid, max, cmp;
2708 
2709 	if (symtab->sym_data_pri == NULL || strs == NULL ||
2710 	    symtab->sym_count == 0)
2711 		return (NULL);
2712 
2713 	min = 0;
2714 	max = symtab->sym_count - 1;
2715 
2716 	while (min <= max) {
2717 		mid = (max + min) / 2;
2718 
2719 		i = byname[mid];
2720 		(void) symtab_getsym(symtab, i, symp);
2721 
2722 		if ((cmp = strcmp(name, strs + symp->st_name)) == 0) {
2723 			if (idp != NULL)
2724 				*idp = i;
2725 			return (symp);
2726 		}
2727 
2728 		if (cmp < 0)
2729 			max = mid - 1;
2730 		else
2731 			min = mid + 1;
2732 	}
2733 
2734 	return (NULL);
2735 }
2736 
2737 /*
2738  * Use a linear search to do the work of sym_by_name().
2739  */
2740 static GElf_Sym *
sym_by_name_linear(sym_tbl_t * symtab,const char * name,GElf_Sym * symp,uint_t * idp)2741 sym_by_name_linear(sym_tbl_t *symtab, const char *name, GElf_Sym *symp,
2742     uint_t *idp)
2743 {
2744 	size_t symn = symtab->sym_symn;
2745 	char *strs = symtab->sym_strs;
2746 	int i;
2747 
2748 	if (symtab->sym_data_pri == NULL || symn == 0 || strs == NULL)
2749 		return (NULL);
2750 
2751 	for (i = 0; i < symn; i++) {
2752 		if (symtab_getsym(symtab, i, symp) &&
2753 		    strcmp(name, strs + symp->st_name) == 0) {
2754 			if (idp)
2755 				*idp = i;
2756 			return (symp);
2757 		}
2758 	}
2759 
2760 	return (NULL);
2761 }
2762 
2763 /*
2764  * Look up a symbol by name in the specified symbol table.
2765  *
2766  * Use a linear or a binary search depending on whether or not we
2767  * chose to sort the table in optimize_symtab().
2768  */
2769 static GElf_Sym *
sym_by_name(sym_tbl_t * symtab,const char * name,GElf_Sym * symp,uint_t * idp)2770 sym_by_name(sym_tbl_t *symtab, const char *name, GElf_Sym *symp, uint_t *idp)
2771 {
2772 	if (_libproc_no_qsort) {
2773 		return (sym_by_name_linear(symtab, name, symp, idp));
2774 	} else {
2775 		return (sym_by_name_binary(symtab, name, symp, idp));
2776 	}
2777 }
2778 
2779 /*
2780  * Search the process symbol tables looking for a symbol whose
2781  * value to value+size contain the address specified by addr.
2782  * Return values are:
2783  *	sym_name_buffer containing the symbol name
2784  *	GElf_Sym symbol table entry
2785  *	prsyminfo_t ancillary symbol information
2786  * Returns 0 on success, -1 on failure.
2787  */
2788 static int
i_Pxlookup_by_addr(struct ps_prochandle * P,int lmresolve,uintptr_t addr,char * sym_name_buffer,size_t bufsize,GElf_Sym * symbolp,prsyminfo_t * sip)2789 i_Pxlookup_by_addr(
2790 	struct ps_prochandle *P,
2791 	int lmresolve,			/* use resolve linker object names */
2792 	uintptr_t addr,			/* process address being sought */
2793 	char *sym_name_buffer,		/* buffer for the symbol name */
2794 	size_t bufsize,			/* size of sym_name_buffer */
2795 	GElf_Sym *symbolp,		/* returned symbol table entry */
2796 	prsyminfo_t *sip)		/* returned symbol info */
2797 {
2798 	GElf_Sym	*symp;
2799 	char		*name;
2800 	GElf_Sym	sym1, *sym1p = NULL;
2801 	GElf_Sym	sym2, *sym2p = NULL;
2802 	char		*name1 = NULL;
2803 	char		*name2 = NULL;
2804 	uint_t		i1;
2805 	uint_t		i2;
2806 	map_info_t	*mptr;
2807 	file_info_t	*fptr;
2808 
2809 	(void) Prd_agent(P);
2810 
2811 	if ((mptr = Paddr2mptr(P, addr)) == NULL ||	/* no such address */
2812 	    (fptr = build_map_symtab(P, mptr)) == NULL || /* no mapped file */
2813 	    fptr->file_elf == NULL)			/* not an ELF file */
2814 		return (-1);
2815 
2816 	/*
2817 	 * Adjust the address by the load object base address in
2818 	 * case the address turns out to be in a shared library.
2819 	 */
2820 	addr -= fptr->file_dyn_base;
2821 
2822 	/*
2823 	 * Search both symbol tables, symtab first, then dynsym.
2824 	 */
2825 	if ((sym1p = sym_by_addr(&fptr->file_symtab, addr, &sym1, &i1)) != NULL)
2826 		name1 = fptr->file_symtab.sym_strs + sym1.st_name;
2827 	if ((sym2p = sym_by_addr(&fptr->file_dynsym, addr, &sym2, &i2)) != NULL)
2828 		name2 = fptr->file_dynsym.sym_strs + sym2.st_name;
2829 
2830 	if ((symp = sym_prefer(sym1p, name1, sym2p, name2)) == NULL)
2831 		return (-1);
2832 
2833 	name = (symp == sym1p) ? name1 : name2;
2834 	if (bufsize > 0) {
2835 		(void) strncpy(sym_name_buffer, name, bufsize);
2836 		sym_name_buffer[bufsize - 1] = '\0';
2837 	}
2838 
2839 	*symbolp = *symp;
2840 	if (sip != NULL) {
2841 		sip->prs_name = bufsize == 0 ? NULL : sym_name_buffer;
2842 		if (lmresolve && (fptr->file_rname != NULL))
2843 			sip->prs_object = fptr->file_rbase;
2844 		else
2845 			sip->prs_object = fptr->file_lbase;
2846 		sip->prs_id = (symp == sym1p) ? i1 : i2;
2847 		sip->prs_table = (symp == sym1p) ? PR_SYMTAB : PR_DYNSYM;
2848 		sip->prs_lmid = (fptr->file_lo == NULL) ? LM_ID_BASE :
2849 		    fptr->file_lo->rl_lmident;
2850 	}
2851 
2852 	if (GELF_ST_TYPE(symbolp->st_info) != STT_TLS)
2853 		symbolp->st_value += fptr->file_dyn_base;
2854 
2855 	return (0);
2856 }
2857 
2858 int
Pxlookup_by_addr(struct ps_prochandle * P,uintptr_t addr,char * buf,size_t bufsize,GElf_Sym * symp,prsyminfo_t * sip)2859 Pxlookup_by_addr(struct ps_prochandle *P, uintptr_t addr, char *buf,
2860     size_t bufsize, GElf_Sym *symp, prsyminfo_t *sip)
2861 {
2862 	return (i_Pxlookup_by_addr(P, B_FALSE, addr, buf, bufsize, symp, sip));
2863 }
2864 
2865 int
Pxlookup_by_addr_resolved(struct ps_prochandle * P,uintptr_t addr,char * buf,size_t bufsize,GElf_Sym * symp,prsyminfo_t * sip)2866 Pxlookup_by_addr_resolved(struct ps_prochandle *P, uintptr_t addr, char *buf,
2867     size_t bufsize, GElf_Sym *symp, prsyminfo_t *sip)
2868 {
2869 	return (i_Pxlookup_by_addr(P, B_TRUE, addr, buf, bufsize, symp, sip));
2870 }
2871 
2872 int
Plookup_by_addr(struct ps_prochandle * P,uintptr_t addr,char * buf,size_t size,GElf_Sym * symp)2873 Plookup_by_addr(struct ps_prochandle *P, uintptr_t addr, char *buf,
2874     size_t size, GElf_Sym *symp)
2875 {
2876 	return (i_Pxlookup_by_addr(P, B_FALSE, addr, buf, size, symp, NULL));
2877 }
2878 
2879 /*
2880  * Search the process symbol tables looking for a symbol whose name matches the
2881  * specified name and whose object and link map optionally match the specified
2882  * parameters.  On success, the function returns 0 and fills in the GElf_Sym
2883  * symbol table entry.  On failure, -1 is returned.
2884  */
2885 int
Pxlookup_by_name(struct ps_prochandle * P,Lmid_t lmid,const char * oname,const char * sname,GElf_Sym * symp,prsyminfo_t * sip)2886 Pxlookup_by_name(
2887 	struct ps_prochandle *P,
2888 	Lmid_t lmid,			/* link map to match, or -1 for any */
2889 	const char *oname,		/* load object name */
2890 	const char *sname,		/* symbol name */
2891 	GElf_Sym *symp,			/* returned symbol table entry */
2892 	prsyminfo_t *sip)		/* returned symbol info */
2893 {
2894 	map_info_t *mptr;
2895 	file_info_t *fptr;
2896 	int cnt;
2897 
2898 	GElf_Sym sym;
2899 	prsyminfo_t si;
2900 	int rv = -1;
2901 	uint_t id;
2902 
2903 	if (oname == PR_OBJ_EVERY) {
2904 		/* create all the file_info_t's for all the mappings */
2905 		(void) Prd_agent(P);
2906 		cnt = P->num_files;
2907 		fptr = list_head(&P->file_head);
2908 	} else {
2909 		cnt = 1;
2910 		if ((mptr = object_name_to_map(P, lmid, oname)) == NULL ||
2911 		    (fptr = build_map_symtab(P, mptr)) == NULL)
2912 			return (-1);
2913 	}
2914 
2915 	/*
2916 	 * Iterate through the loaded object files and look for the symbol
2917 	 * name in the .symtab and .dynsym of each.  If we encounter a match
2918 	 * with SHN_UNDEF, keep looking in hopes of finding a better match.
2919 	 * This means that a name such as "puts" will match the puts function
2920 	 * in libc instead of matching the puts PLT entry in the a.out file.
2921 	 */
2922 	for (; cnt > 0; cnt--, fptr = list_next(&P->file_head, fptr)) {
2923 		Pbuild_file_symtab(P, fptr);
2924 
2925 		if (fptr->file_elf == NULL)
2926 			continue;
2927 
2928 		if (lmid != PR_LMID_EVERY && fptr->file_lo != NULL &&
2929 		    lmid != fptr->file_lo->rl_lmident)
2930 			continue;
2931 
2932 		if (fptr->file_symtab.sym_data_pri != NULL &&
2933 		    sym_by_name(&fptr->file_symtab, sname, symp, &id)) {
2934 			if (sip != NULL) {
2935 				sip->prs_id = id;
2936 				sip->prs_table = PR_SYMTAB;
2937 				sip->prs_object = oname;
2938 				sip->prs_name = sname;
2939 				sip->prs_lmid = fptr->file_lo == NULL ?
2940 				    LM_ID_BASE : fptr->file_lo->rl_lmident;
2941 			}
2942 		} else if (fptr->file_dynsym.sym_data_pri != NULL &&
2943 		    sym_by_name(&fptr->file_dynsym, sname, symp, &id)) {
2944 			if (sip != NULL) {
2945 				sip->prs_id = id;
2946 				sip->prs_table = PR_DYNSYM;
2947 				sip->prs_object = oname;
2948 				sip->prs_name = sname;
2949 				sip->prs_lmid = fptr->file_lo == NULL ?
2950 				    LM_ID_BASE : fptr->file_lo->rl_lmident;
2951 			}
2952 		} else {
2953 			continue;
2954 		}
2955 
2956 		if (GELF_ST_TYPE(symp->st_info) != STT_TLS)
2957 			symp->st_value += fptr->file_dyn_base;
2958 
2959 		if (symp->st_shndx != SHN_UNDEF)
2960 			return (0);
2961 
2962 		if (rv != 0) {
2963 			if (sip != NULL)
2964 				si = *sip;
2965 			sym = *symp;
2966 			rv = 0;
2967 		}
2968 	}
2969 
2970 	if (rv == 0) {
2971 		if (sip != NULL)
2972 			*sip = si;
2973 		*symp = sym;
2974 	}
2975 
2976 	return (rv);
2977 }
2978 
2979 /*
2980  * Search the process symbol tables looking for a symbol whose name matches the
2981  * specified name, but without any restriction on the link map id.
2982  */
2983 int
Plookup_by_name(struct ps_prochandle * P,const char * object,const char * symbol,GElf_Sym * symp)2984 Plookup_by_name(struct ps_prochandle *P, const char *object,
2985     const char *symbol, GElf_Sym *symp)
2986 {
2987 	return (Pxlookup_by_name(P, PR_LMID_EVERY, object, symbol, symp, NULL));
2988 }
2989 
2990 /*
2991  * Iterate over the process's address space mappings.
2992  */
2993 static int
i_Pmapping_iter(struct ps_prochandle * P,boolean_t lmresolve,proc_map_f * func,void * cd)2994 i_Pmapping_iter(struct ps_prochandle *P, boolean_t lmresolve,
2995     proc_map_f *func, void *cd)
2996 {
2997 	map_info_t *mptr;
2998 	file_info_t *fptr;
2999 	char *object_name;
3000 	int rc = 0;
3001 	int i;
3002 
3003 	/* create all the file_info_t's for all the mappings */
3004 	(void) Prd_agent(P);
3005 
3006 	for (i = 0, mptr = P->mappings; i < P->map_count; i++, mptr++) {
3007 		if ((fptr = mptr->map_file) == NULL)
3008 			object_name = NULL;
3009 		else if (lmresolve && (fptr->file_rname != NULL))
3010 			object_name = fptr->file_rname;
3011 		else
3012 			object_name = fptr->file_lname;
3013 		if ((rc = func(cd, &mptr->map_pmap, object_name)) != 0)
3014 			return (rc);
3015 	}
3016 	return (0);
3017 }
3018 
3019 int
Pmapping_iter(struct ps_prochandle * P,proc_map_f * func,void * cd)3020 Pmapping_iter(struct ps_prochandle *P, proc_map_f *func, void *cd)
3021 {
3022 	return (i_Pmapping_iter(P, B_FALSE, func, cd));
3023 }
3024 
3025 int
Pmapping_iter_resolved(struct ps_prochandle * P,proc_map_f * func,void * cd)3026 Pmapping_iter_resolved(struct ps_prochandle *P, proc_map_f *func, void *cd)
3027 {
3028 	return (i_Pmapping_iter(P, B_TRUE, func, cd));
3029 }
3030 
3031 /*
3032  * Iterate over the process's mapped objects.
3033  */
3034 static int
i_Pobject_iter(struct ps_prochandle * P,boolean_t lmresolve,proc_map_f * func,void * cd)3035 i_Pobject_iter(struct ps_prochandle *P, boolean_t lmresolve,
3036     proc_map_f *func, void *cd)
3037 {
3038 	map_info_t *mptr;
3039 	file_info_t *fptr;
3040 	int rc = 0;
3041 
3042 	(void) Prd_agent(P); /* create file_info_t's for all the mappings */
3043 	Pupdate_maps(P);
3044 
3045 	for (fptr = list_head(&P->file_head); fptr != NULL;
3046 	    fptr = list_next(&P->file_head, fptr)) {
3047 		const char *lname;
3048 
3049 		if (lmresolve && (fptr->file_rname != NULL))
3050 			lname = fptr->file_rname;
3051 		else if (fptr->file_lname != NULL)
3052 			lname = fptr->file_lname;
3053 		else
3054 			lname = "";
3055 
3056 		if ((mptr = fptr->file_map) == NULL)
3057 			continue;
3058 
3059 		if ((rc = func(cd, &mptr->map_pmap, lname)) != 0)
3060 			return (rc);
3061 
3062 		if (!P->info_valid)
3063 			Pupdate_maps(P);
3064 	}
3065 	return (0);
3066 }
3067 
3068 int
Pobject_iter(struct ps_prochandle * P,proc_map_f * func,void * cd)3069 Pobject_iter(struct ps_prochandle *P, proc_map_f *func, void *cd)
3070 {
3071 	return (i_Pobject_iter(P, B_FALSE, func, cd));
3072 }
3073 
3074 int
Pobject_iter_resolved(struct ps_prochandle * P,proc_map_f * func,void * cd)3075 Pobject_iter_resolved(struct ps_prochandle *P, proc_map_f *func, void *cd)
3076 {
3077 	return (i_Pobject_iter(P, B_TRUE, func, cd));
3078 }
3079 
3080 static char *
i_Pobjname(struct ps_prochandle * P,boolean_t lmresolve,uintptr_t addr,char * buffer,size_t bufsize)3081 i_Pobjname(struct ps_prochandle *P, boolean_t lmresolve, uintptr_t addr,
3082     char *buffer, size_t bufsize)
3083 {
3084 	map_info_t *mptr;
3085 	file_info_t *fptr;
3086 
3087 	/* create all the file_info_t's for all the mappings */
3088 	(void) Prd_agent(P);
3089 
3090 	if ((mptr = Paddr2mptr(P, addr)) == NULL)
3091 		return (NULL);
3092 
3093 	if (!lmresolve) {
3094 		if (((fptr = mptr->map_file) == NULL) ||
3095 		    (fptr->file_lname == NULL))
3096 			return (NULL);
3097 		(void) strlcpy(buffer, fptr->file_lname, bufsize);
3098 		return (buffer);
3099 	}
3100 
3101 	/* Check for a cached copy of the resolved path */
3102 	if (Pfindmap(P, mptr, buffer, bufsize) != NULL)
3103 		return (buffer);
3104 
3105 	return (NULL);
3106 }
3107 
3108 /*
3109  * Given a virtual address, return the name of the underlying
3110  * mapped object (file) as provided by the dynamic linker.
3111  * Return NULL if we can't find any name information for the object.
3112  */
3113 char *
Pobjname(struct ps_prochandle * P,uintptr_t addr,char * buffer,size_t bufsize)3114 Pobjname(struct ps_prochandle *P, uintptr_t addr,
3115     char *buffer, size_t bufsize)
3116 {
3117 	return (i_Pobjname(P, B_FALSE, addr, buffer, bufsize));
3118 }
3119 
3120 /*
3121  * Given a virtual address, try to return a filesystem path to the
3122  * underlying mapped object (file).  If we're in the global zone,
3123  * this path could resolve to an object in another zone.  If we're
3124  * unable return a valid filesystem path, we'll fall back to providing
3125  * the mapped object (file) name provided by the dynamic linker in
3126  * the target process (ie, the object reported by Pobjname()).
3127  */
3128 char *
Pobjname_resolved(struct ps_prochandle * P,uintptr_t addr,char * buffer,size_t bufsize)3129 Pobjname_resolved(struct ps_prochandle *P, uintptr_t addr,
3130     char *buffer, size_t bufsize)
3131 {
3132 	return (i_Pobjname(P, B_TRUE, addr, buffer, bufsize));
3133 }
3134 
3135 /*
3136  * Given a virtual address, return the link map id of the underlying mapped
3137  * object (file), as provided by the dynamic linker.  Return -1 on failure.
3138  */
3139 int
Plmid(struct ps_prochandle * P,uintptr_t addr,Lmid_t * lmidp)3140 Plmid(struct ps_prochandle *P, uintptr_t addr, Lmid_t *lmidp)
3141 {
3142 	map_info_t *mptr;
3143 	file_info_t *fptr;
3144 
3145 	/* create all the file_info_t's for all the mappings */
3146 	(void) Prd_agent(P);
3147 
3148 	if ((mptr = Paddr2mptr(P, addr)) != NULL &&
3149 	    (fptr = mptr->map_file) != NULL && fptr->file_lo != NULL) {
3150 		*lmidp = fptr->file_lo->rl_lmident;
3151 		return (0);
3152 	}
3153 
3154 	return (-1);
3155 }
3156 
3157 /*
3158  * Given an object name and optional lmid, iterate over the object's symbols.
3159  * If which == PR_SYMTAB, search the normal symbol table.
3160  * If which == PR_DYNSYM, search the dynamic symbol table.
3161  */
3162 static int
Psymbol_iter_com(struct ps_prochandle * P,Lmid_t lmid,const char * object_name,int which,int mask,pr_order_t order,proc_xsym_f * func,void * cd)3163 Psymbol_iter_com(struct ps_prochandle *P, Lmid_t lmid, const char *object_name,
3164     int which, int mask, pr_order_t order, proc_xsym_f *func, void *cd)
3165 {
3166 #if STT_NUM != (STT_TLS + 1)
3167 #error "STT_NUM has grown. update Psymbol_iter_com()"
3168 #endif
3169 
3170 	GElf_Sym sym;
3171 	GElf_Shdr shdr;
3172 	map_info_t *mptr;
3173 	file_info_t *fptr;
3174 	sym_tbl_t *symtab;
3175 	size_t symn;
3176 	const char *strs;
3177 	size_t strsz;
3178 	prsyminfo_t si;
3179 	int rv;
3180 	uint_t *map, i, count, ndx;
3181 
3182 	if ((mptr = object_name_to_map(P, lmid, object_name)) == NULL)
3183 		return (-1);
3184 
3185 	if ((fptr = build_map_symtab(P, mptr)) == NULL || /* no mapped file */
3186 	    fptr->file_elf == NULL)			/* not an ELF file */
3187 		return (-1);
3188 
3189 	/*
3190 	 * Search the specified symbol table.
3191 	 */
3192 	switch (which) {
3193 	case PR_SYMTAB:
3194 		symtab = &fptr->file_symtab;
3195 		si.prs_table = PR_SYMTAB;
3196 		break;
3197 	case PR_DYNSYM:
3198 		symtab = &fptr->file_dynsym;
3199 		si.prs_table = PR_DYNSYM;
3200 		break;
3201 	default:
3202 		return (-1);
3203 	}
3204 
3205 	si.prs_object = object_name;
3206 	si.prs_lmid = fptr->file_lo == NULL ?
3207 	    LM_ID_BASE : fptr->file_lo->rl_lmident;
3208 
3209 	symn = symtab->sym_symn;
3210 	strs = symtab->sym_strs;
3211 	strsz = symtab->sym_strsz;
3212 
3213 	switch (order) {
3214 	case PRO_NATURAL:
3215 		map = NULL;
3216 		count = symn;
3217 		break;
3218 	case PRO_BYNAME:
3219 		map = symtab->sym_byname;
3220 		count = symtab->sym_count;
3221 		break;
3222 	case PRO_BYADDR:
3223 		map = symtab->sym_byaddr;
3224 		count = symtab->sym_count;
3225 		break;
3226 	default:
3227 		return (-1);
3228 	}
3229 
3230 	if (symtab->sym_data_pri == NULL || strs == NULL || count == 0)
3231 		return (-1);
3232 
3233 	rv = 0;
3234 
3235 	for (i = 0; i < count; i++) {
3236 		ndx = map == NULL ? i : map[i];
3237 		if (symtab_getsym(symtab, ndx, &sym) != NULL) {
3238 			uint_t s_bind, s_type, type;
3239 
3240 			if (sym.st_name >= strsz)	/* invalid st_name */
3241 				continue;
3242 
3243 			s_bind = GELF_ST_BIND(sym.st_info);
3244 			s_type = GELF_ST_TYPE(sym.st_info);
3245 
3246 			/*
3247 			 * In case you haven't already guessed, this relies on
3248 			 * the bitmask used in <libproc.h> for encoding symbol
3249 			 * type and binding matching the order of STB and STT
3250 			 * constants in <sys/elf.h>.  Changes to ELF must
3251 			 * maintain binary compatibility, so I think this is
3252 			 * reasonably fair game.
3253 			 */
3254 			if (s_bind < STB_NUM && s_type < STT_NUM) {
3255 				type = (1 << (s_type + 8)) | (1 << s_bind);
3256 				if ((type & ~mask) != 0)
3257 					continue;
3258 			} else
3259 				continue; /* Invalid type or binding */
3260 
3261 			if (GELF_ST_TYPE(sym.st_info) != STT_TLS)
3262 				sym.st_value += fptr->file_dyn_base;
3263 
3264 			si.prs_name = strs + sym.st_name;
3265 
3266 			/*
3267 			 * If symbol's type is STT_SECTION, then try to lookup
3268 			 * the name of the corresponding section.
3269 			 */
3270 			if (GELF_ST_TYPE(sym.st_info) == STT_SECTION &&
3271 			    fptr->file_shstrs != NULL &&
3272 			    gelf_getshdr(elf_getscn(fptr->file_elf,
3273 			    sym.st_shndx), &shdr) != NULL &&
3274 			    shdr.sh_name != 0 &&
3275 			    shdr.sh_name < fptr->file_shstrsz)
3276 				si.prs_name = fptr->file_shstrs + shdr.sh_name;
3277 
3278 			si.prs_id = ndx;
3279 			if ((rv = func(cd, &sym, si.prs_name, &si)) != 0)
3280 				break;
3281 		}
3282 	}
3283 
3284 	return (rv);
3285 }
3286 
3287 int
Pxsymbol_iter(struct ps_prochandle * P,Lmid_t lmid,const char * object_name,int which,int mask,proc_xsym_f * func,void * cd)3288 Pxsymbol_iter(struct ps_prochandle *P, Lmid_t lmid, const char *object_name,
3289     int which, int mask, proc_xsym_f *func, void *cd)
3290 {
3291 	return (Psymbol_iter_com(P, lmid, object_name, which, mask,
3292 	    PRO_NATURAL, func, cd));
3293 }
3294 
3295 int
Psymbol_iter_by_lmid(struct ps_prochandle * P,Lmid_t lmid,const char * object_name,int which,int mask,proc_sym_f * func,void * cd)3296 Psymbol_iter_by_lmid(struct ps_prochandle *P, Lmid_t lmid,
3297     const char *object_name, int which, int mask, proc_sym_f *func, void *cd)
3298 {
3299 	return (Psymbol_iter_com(P, lmid, object_name, which, mask,
3300 	    PRO_NATURAL, (proc_xsym_f *)(uintptr_t)func, cd));
3301 }
3302 
3303 int
Psymbol_iter(struct ps_prochandle * P,const char * object_name,int which,int mask,proc_sym_f * func,void * cd)3304 Psymbol_iter(struct ps_prochandle *P,
3305     const char *object_name, int which, int mask, proc_sym_f *func, void *cd)
3306 {
3307 	return (Psymbol_iter_com(P, PR_LMID_EVERY, object_name, which, mask,
3308 	    PRO_NATURAL, (proc_xsym_f *)(uintptr_t)func, cd));
3309 }
3310 
3311 int
Psymbol_iter_by_addr(struct ps_prochandle * P,const char * object_name,int which,int mask,proc_sym_f * func,void * cd)3312 Psymbol_iter_by_addr(struct ps_prochandle *P,
3313     const char *object_name, int which, int mask, proc_sym_f *func, void *cd)
3314 {
3315 	return (Psymbol_iter_com(P, PR_LMID_EVERY, object_name, which, mask,
3316 	    PRO_BYADDR, (proc_xsym_f *)(uintptr_t)func, cd));
3317 }
3318 
3319 int
Psymbol_iter_by_name(struct ps_prochandle * P,const char * object_name,int which,int mask,proc_sym_f * func,void * cd)3320 Psymbol_iter_by_name(struct ps_prochandle *P,
3321     const char *object_name, int which, int mask, proc_sym_f *func, void *cd)
3322 {
3323 	return (Psymbol_iter_com(P, PR_LMID_EVERY, object_name, which, mask,
3324 	    PRO_BYNAME, (proc_xsym_f *)(uintptr_t)func, cd));
3325 }
3326 
3327 /*
3328  * Get the platform string.
3329  */
3330 char *
Pplatform(struct ps_prochandle * P,char * s,size_t n)3331 Pplatform(struct ps_prochandle *P, char *s, size_t n)
3332 {
3333 	return (P->ops.pop_platform(P, s, n, P->data));
3334 }
3335 
3336 /*
3337  * Get the uname(2) information.
3338  */
3339 int
Puname(struct ps_prochandle * P,struct utsname * u)3340 Puname(struct ps_prochandle *P, struct utsname *u)
3341 {
3342 	return (P->ops.pop_uname(P, u, P->data));
3343 }
3344 
3345 /*
3346  * Called from Pcreate(), Pgrab(), and Pfgrab_core() to initialize
3347  * the symbol table heads in the new ps_prochandle.
3348  */
3349 void
Pinitsym(struct ps_prochandle * P)3350 Pinitsym(struct ps_prochandle *P)
3351 {
3352 	P->num_files = 0;
3353 	list_create(&P->file_head, sizeof (file_info_t),
3354 	    offsetof(file_info_t, file_list));
3355 }
3356 
3357 /*
3358  * Called from Prelease() to destroy the symbol tables.
3359  * Must be called by the client after an exec() in the victim process.
3360  */
3361 void
Preset_maps(struct ps_prochandle * P)3362 Preset_maps(struct ps_prochandle *P)
3363 {
3364 	int i;
3365 
3366 	if (P->rap != NULL) {
3367 		rd_delete(P->rap);
3368 		P->rap = NULL;
3369 	}
3370 
3371 	if (P->execname != NULL) {
3372 		free(P->execname);
3373 		P->execname = NULL;
3374 	}
3375 
3376 	if (P->auxv != NULL) {
3377 		free(P->auxv);
3378 		P->auxv = NULL;
3379 		P->nauxv = 0;
3380 	}
3381 
3382 	for (i = 0; i < P->map_count; i++)
3383 		map_info_free(P, &P->mappings[i]);
3384 
3385 	if (P->mappings != NULL) {
3386 		free(P->mappings);
3387 		P->mappings = NULL;
3388 	}
3389 	P->map_count = P->map_alloc = 0;
3390 
3391 	P->info_valid = 0;
3392 }
3393 
3394 typedef struct getenv_data {
3395 	char *buf;
3396 	size_t bufsize;
3397 	const char *search;
3398 	size_t searchlen;
3399 } getenv_data_t;
3400 
3401 /*ARGSUSED*/
3402 static int
getenv_func(void * data,struct ps_prochandle * P,uintptr_t addr,const char * nameval)3403 getenv_func(void *data, struct ps_prochandle *P, uintptr_t addr,
3404     const char *nameval)
3405 {
3406 	getenv_data_t *d = data;
3407 	size_t len;
3408 
3409 	if (nameval == NULL)
3410 		return (0);
3411 
3412 	if (d->searchlen < strlen(nameval) &&
3413 	    strncmp(nameval, d->search, d->searchlen) == 0 &&
3414 	    nameval[d->searchlen] == '=') {
3415 		len = MIN(strlen(nameval), d->bufsize - 1);
3416 		(void) strncpy(d->buf, nameval, len);
3417 		d->buf[len] = '\0';
3418 		return (1);
3419 	}
3420 
3421 	return (0);
3422 }
3423 
3424 char *
Pgetenv(struct ps_prochandle * P,const char * name,char * buf,size_t buflen)3425 Pgetenv(struct ps_prochandle *P, const char *name, char *buf, size_t buflen)
3426 {
3427 	getenv_data_t d;
3428 
3429 	d.buf = buf;
3430 	d.bufsize = buflen;
3431 	d.search = name;
3432 	d.searchlen = strlen(name);
3433 
3434 	if (Penv_iter(P, getenv_func, &d) == 1) {
3435 		char *equals = strchr(d.buf, '=');
3436 
3437 		if (equals != NULL) {
3438 			(void) memmove(d.buf, equals + 1,
3439 			    d.buf + buflen - equals - 1);
3440 			d.buf[d.buf + buflen - equals] = '\0';
3441 
3442 			return (buf);
3443 		}
3444 	}
3445 
3446 	return (NULL);
3447 }
3448 
3449 /* number of argument or environment pointers to read all at once */
3450 #define	NARG	100
3451 
3452 int
Penv_iter(struct ps_prochandle * P,proc_env_f * func,void * data)3453 Penv_iter(struct ps_prochandle *P, proc_env_f *func, void *data)
3454 {
3455 	const psinfo_t *psp;
3456 	uintptr_t envpoff;
3457 	GElf_Sym sym;
3458 	int ret;
3459 	char *buf, *nameval;
3460 	size_t buflen;
3461 
3462 	int nenv = NARG;
3463 	long envp[NARG];
3464 
3465 	/*
3466 	 * Attempt to find the "_environ" variable in the process.
3467 	 * Failing that, use the original value provided by Ppsinfo().
3468 	 *
3469 	 * The "_environ" variable is initialized by the CRT. We use a rough
3470 	 * heuristic to try and figure out if we have started running before the
3471 	 * CRT has executed by checking if the _environ pointer points to NULL
3472 	 * or not. Once initialized, it will never point to NULL absent an
3473 	 * application manipulating it directly, libc does not do so, even if
3474 	 * one calls clearenv(). There is a rare chance that an application is
3475 	 * messing with the _environ pointer directly; however, in practice that
3476 	 * is much rarer than this case and if someone is, libc is unlikely to
3477 	 * have a good day.
3478 	 *
3479 	 * While it's tempting to look towards libc variables such as
3480 	 * initenv_done and related, we have to remember that we're here because
3481 	 * we haven't actually called  libc_init() or even loaded it!
3482 	 */
3483 	if ((psp = Ppsinfo(P)) == NULL)
3484 		return (-1);
3485 
3486 	envpoff = psp->pr_envp; /* Default if no _environ found */
3487 
3488 	if (Plookup_by_name(P, PR_OBJ_EXEC, "_environ", &sym) == 0) {
3489 		if (P->status.pr_dmodel == PR_MODEL_NATIVE) {
3490 			if (Pread(P, &envpoff, sizeof (envpoff),
3491 			    sym.st_value) != sizeof (envpoff))
3492 				envpoff = psp->pr_envp;
3493 		} else if (P->status.pr_dmodel == PR_MODEL_ILP32) {
3494 			uint32_t envpoff32;
3495 
3496 			if (Pread(P, &envpoff32, sizeof (envpoff32),
3497 			    sym.st_value) != sizeof (envpoff32))
3498 				envpoff = psp->pr_envp;
3499 			else
3500 				envpoff = envpoff32;
3501 		}
3502 
3503 		if (envpoff == 0) {
3504 			envpoff = psp->pr_envp;
3505 		}
3506 	}
3507 
3508 	buflen = 128;
3509 	buf = malloc(buflen);
3510 
3511 	ret = 0;
3512 	for (;;) {
3513 		uintptr_t envoff;
3514 
3515 		if (nenv == NARG) {
3516 			(void) memset(envp, 0, sizeof (envp));
3517 			if (P->status.pr_dmodel == PR_MODEL_NATIVE) {
3518 				if (Pread(P, envp,
3519 				    sizeof (envp), envpoff) <= 0) {
3520 					ret = -1;
3521 					break;
3522 				}
3523 			} else if (P->status.pr_dmodel == PR_MODEL_ILP32) {
3524 				uint32_t e32[NARG];
3525 				int i;
3526 
3527 				(void) memset(e32, 0, sizeof (e32));
3528 				if (Pread(P, e32, sizeof (e32), envpoff) <= 0) {
3529 					ret = -1;
3530 					break;
3531 				}
3532 				for (i = 0; i < NARG; i++)
3533 					envp[i] = e32[i];
3534 			}
3535 			nenv = 0;
3536 		}
3537 
3538 		if ((envoff = envp[nenv++]) == (uintptr_t)NULL)
3539 			break;
3540 
3541 		/*
3542 		 * Attempt to read the string from the process.
3543 		 */
3544 again:
3545 		ret = Pread_string(P, buf, buflen, envoff);
3546 
3547 		if (ret <= 0) {
3548 			nameval = NULL;
3549 		} else if (ret == buflen - 1) {
3550 			free(buf);
3551 			/*
3552 			 * Bail if we have a corrupted environment
3553 			 */
3554 			if (buflen >= ARG_MAX)
3555 				return (-1);
3556 			buflen *= 2;
3557 			buf = malloc(buflen);
3558 			goto again;
3559 		} else {
3560 			nameval = buf;
3561 		}
3562 
3563 		if ((ret = func(data, P, envoff, nameval)) != 0)
3564 			break;
3565 
3566 		envpoff += (P->status.pr_dmodel == PR_MODEL_LP64)? 8 : 4;
3567 	}
3568 
3569 	free(buf);
3570 
3571 	return (ret);
3572 }
3573