xref: /illumos-gate/usr/src/lib/libproc/common/Psymtab.c (revision 72dae0eb2635f5732d4165158288e85a4b68729d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2016 Joyent, Inc.
25  * Copyright (c) 2013 by Delphix. All rights reserved.
26  * Copyright 2023 Oxide Computer Company
27  */
28 
29 #include <assert.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stddef.h>
33 #include <unistd.h>
34 #include <ctype.h>
35 #include <fcntl.h>
36 #include <string.h>
37 #include <strings.h>
38 #include <memory.h>
39 #include <errno.h>
40 #include <dirent.h>
41 #include <signal.h>
42 #include <limits.h>
43 #include <libgen.h>
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <sys/sysmacros.h>
47 #include <sys/crc32.h>
48 
49 #include "libproc.h"
50 #include "Pcontrol.h"
51 #include "Putil.h"
52 #include "Psymtab_machelf.h"
53 
54 static file_info_t *build_map_symtab(struct ps_prochandle *, map_info_t *);
55 static map_info_t *exec_map(struct ps_prochandle *);
56 static map_info_t *object_to_map(struct ps_prochandle *, Lmid_t, const char *);
57 static map_info_t *object_name_to_map(struct ps_prochandle *,
58 	Lmid_t, const char *);
59 static GElf_Sym *sym_by_name(sym_tbl_t *, const char *, GElf_Sym *, uint_t *);
60 static int read_ehdr32(struct ps_prochandle *, Elf32_Ehdr *, uint_t *,
61     uintptr_t);
62 #ifdef _LP64
63 static int read_ehdr64(struct ps_prochandle *, Elf64_Ehdr *, uint_t *,
64     uintptr_t);
65 #endif
66 static uint32_t psym_crc32[] = { CRC32_TABLE };
67 
68 #define	DATA_TYPES	\
69 	((1 << STT_OBJECT) | (1 << STT_FUNC) | \
70 	(1 << STT_COMMON) | (1 << STT_TLS))
71 #define	IS_DATA_TYPE(tp)	(((1 << (tp)) & DATA_TYPES) != 0)
72 
73 #define	MA_RWX	(MA_READ | MA_WRITE | MA_EXEC)
74 
75 /*
76  * Minimum and maximum length of a build-id that we'll accept. Generally it's a
77  * 20 byte SHA1 and it's expected that the first byte (which is two ascii
78  * characters) indicates a directory and the remaining bytes become the file
79  * name. Therefore, our minimum length is at least 2 bytes (one for the
80  * directory and one for the name) and the max is a bit over the minimum -- 64,
81  * just in case folks do something odd. The string length is three times the max
82  * length. This accounts for the fact that each byte is two characters, a null
83  * terminator, and the directory '/' character.
84  */
85 #define	MINBUILDID	2
86 #define	MAXBUILDID	64
87 #define	BUILDID_STRLEN	(3*MAXBUILDID)
88 #define	BUILDID_NAME	".note.gnu.build-id"
89 #define	DBGLINK_NAME	".gnu_debuglink"
90 
91 typedef enum {
92 	PRO_NATURAL,
93 	PRO_BYADDR,
94 	PRO_BYNAME
95 } pr_order_t;
96 
97 static int
addr_cmp(const void * aa,const void * bb)98 addr_cmp(const void *aa, const void *bb)
99 {
100 	uintptr_t a = *((uintptr_t *)aa);
101 	uintptr_t b = *((uintptr_t *)bb);
102 
103 	if (a > b)
104 		return (1);
105 	if (a < b)
106 		return (-1);
107 	return (0);
108 }
109 
110 /*
111  * This function creates a list of addresses for a load object's sections.
112  * The list is in ascending address order and alternates start address
113  * then end address for each section we're interested in. The function
114  * returns a pointer to the list, which must be freed by the caller.
115  */
116 static uintptr_t *
get_saddrs(struct ps_prochandle * P,uintptr_t ehdr_start,uint_t * n)117 get_saddrs(struct ps_prochandle *P, uintptr_t ehdr_start, uint_t *n)
118 {
119 	uintptr_t a, addr, *addrs, last = 0;
120 	uint_t i, naddrs = 0, unordered = 0;
121 
122 	if (P->status.pr_dmodel == PR_MODEL_ILP32) {
123 		Elf32_Ehdr ehdr;
124 		Elf32_Phdr phdr;
125 		uint_t phnum;
126 
127 		if (read_ehdr32(P, &ehdr, &phnum, ehdr_start) != 0)
128 			return (NULL);
129 
130 		addrs = malloc(sizeof (uintptr_t) * phnum * 2);
131 		a = ehdr_start + ehdr.e_phoff;
132 		for (i = 0; i < phnum; i++, a += ehdr.e_phentsize) {
133 			if (Pread(P, &phdr, sizeof (phdr), a) !=
134 			    sizeof (phdr)) {
135 				free(addrs);
136 				return (NULL);
137 			}
138 			if (phdr.p_type != PT_LOAD || phdr.p_memsz == 0)
139 				continue;
140 
141 			addr = phdr.p_vaddr;
142 			if (ehdr.e_type == ET_DYN)
143 				addr += ehdr_start;
144 			if (last > addr)
145 				unordered = 1;
146 			addrs[naddrs++] = addr;
147 			addrs[naddrs++] = last = addr + phdr.p_memsz - 1;
148 		}
149 #ifdef _LP64
150 	} else {
151 		Elf64_Ehdr ehdr;
152 		Elf64_Phdr phdr;
153 		uint_t phnum;
154 
155 		if (read_ehdr64(P, &ehdr, &phnum, ehdr_start) != 0)
156 			return (NULL);
157 
158 		addrs = malloc(sizeof (uintptr_t) * phnum * 2);
159 		a = ehdr_start + ehdr.e_phoff;
160 		for (i = 0; i < phnum; i++, a += ehdr.e_phentsize) {
161 			if (Pread(P, &phdr, sizeof (phdr), a) !=
162 			    sizeof (phdr)) {
163 				free(addrs);
164 				return (NULL);
165 			}
166 			if (phdr.p_type != PT_LOAD || phdr.p_memsz == 0)
167 				continue;
168 
169 			addr = phdr.p_vaddr;
170 			if (ehdr.e_type == ET_DYN)
171 				addr += ehdr_start;
172 			if (last > addr)
173 				unordered = 1;
174 			addrs[naddrs++] = addr;
175 			addrs[naddrs++] = last = addr + phdr.p_memsz - 1;
176 		}
177 #endif
178 	}
179 
180 	if (unordered)
181 		qsort(addrs, naddrs, sizeof (uintptr_t), addr_cmp);
182 
183 	*n = naddrs;
184 	return (addrs);
185 }
186 
187 /*
188  * Allocation function for a new file_info_t
189  */
190 file_info_t *
file_info_new(struct ps_prochandle * P,map_info_t * mptr)191 file_info_new(struct ps_prochandle *P, map_info_t *mptr)
192 {
193 	file_info_t *fptr;
194 	map_info_t *mp;
195 	uintptr_t mstart, mend, sstart, send;
196 	uint_t i;
197 
198 	if ((fptr = calloc(1, sizeof (file_info_t))) == NULL)
199 		return (NULL);
200 
201 	list_insert_tail(&P->file_head, fptr);
202 	(void) strcpy(fptr->file_pname, mptr->map_pmap.pr_mapname);
203 	mptr->map_file = fptr;
204 	fptr->file_ref = 1;
205 	fptr->file_fd = -1;
206 	fptr->file_dbgfile = -1;
207 	P->num_files++;
208 
209 	/*
210 	 * To figure out which map_info_t instances correspond to the mappings
211 	 * for this load object we try to obtain the start and end address
212 	 * for each section of our in-memory ELF image. If successful, we
213 	 * walk down the list of addresses and the list of map_info_t
214 	 * instances in lock step to correctly find the mappings that
215 	 * correspond to this load object.
216 	 */
217 	if ((fptr->file_saddrs = get_saddrs(P, mptr->map_pmap.pr_vaddr,
218 	    &fptr->file_nsaddrs)) == NULL)
219 		return (fptr);
220 
221 	mp = P->mappings;
222 	i = 0;
223 	while (mp < P->mappings + P->map_count && i < fptr->file_nsaddrs) {
224 
225 		/* Calculate the start and end of the mapping and section */
226 		mstart = mp->map_pmap.pr_vaddr;
227 		mend = mp->map_pmap.pr_vaddr + mp->map_pmap.pr_size;
228 		sstart = fptr->file_saddrs[i];
229 		send = fptr->file_saddrs[i + 1];
230 
231 		if (mend <= sstart) {
232 			/* This mapping is below the current section */
233 			mp++;
234 		} else if (mstart >= send) {
235 			/* This mapping is above the current section */
236 			i += 2;
237 		} else {
238 			/* This mapping overlaps the current section */
239 			if (mp->map_file == NULL) {
240 				Pdprintf("file_info_new: associating "
241 				    "segment at %p\n",
242 				    (void *)mp->map_pmap.pr_vaddr);
243 				mp->map_file = fptr;
244 				fptr->file_ref++;
245 			} else {
246 				Pdprintf("file_info_new: segment at %p "
247 				    "already associated with %s\n",
248 				    (void *)mp->map_pmap.pr_vaddr,
249 				    (mp == mptr ? "this file" :
250 				    mp->map_file->file_pname));
251 			}
252 			mp++;
253 		}
254 	}
255 
256 	return (fptr);
257 }
258 
259 /*
260  * Deallocation function for a file_info_t
261  */
262 static void
file_info_free(struct ps_prochandle * P,file_info_t * fptr)263 file_info_free(struct ps_prochandle *P, file_info_t *fptr)
264 {
265 	if (--fptr->file_ref == 0) {
266 		list_remove(&P->file_head, fptr);
267 		if (fptr->file_symtab.sym_elf) {
268 			(void) elf_end(fptr->file_symtab.sym_elf);
269 			free(fptr->file_symtab.sym_elfmem);
270 		}
271 		if (fptr->file_symtab.sym_byname)
272 			free(fptr->file_symtab.sym_byname);
273 		if (fptr->file_symtab.sym_byaddr)
274 			free(fptr->file_symtab.sym_byaddr);
275 
276 		if (fptr->file_dynsym.sym_elf) {
277 			(void) elf_end(fptr->file_dynsym.sym_elf);
278 			free(fptr->file_dynsym.sym_elfmem);
279 		}
280 		if (fptr->file_dynsym.sym_byname)
281 			free(fptr->file_dynsym.sym_byname);
282 		if (fptr->file_dynsym.sym_byaddr)
283 			free(fptr->file_dynsym.sym_byaddr);
284 
285 		if (fptr->file_lo)
286 			free(fptr->file_lo);
287 		if (fptr->file_lname)
288 			free(fptr->file_lname);
289 		if (fptr->file_rname)
290 			free(fptr->file_rname);
291 		if (fptr->file_elf)
292 			(void) elf_end(fptr->file_elf);
293 		if (fptr->file_elfmem != NULL)
294 			free(fptr->file_elfmem);
295 		if (fptr->file_fd >= 0)
296 			(void) close(fptr->file_fd);
297 		if (fptr->file_dbgelf)
298 			(void) elf_end(fptr->file_dbgelf);
299 		if (fptr->file_dbgfile >= 0)
300 			(void) close(fptr->file_dbgfile);
301 		ctf_close(fptr->file_ctfp);
302 		free(fptr->file_ctf_buf);
303 		if (fptr->file_saddrs)
304 			free(fptr->file_saddrs);
305 		free(fptr);
306 		P->num_files--;
307 	}
308 }
309 
310 /*
311  * Deallocation function for a map_info_t
312  */
313 static void
map_info_free(struct ps_prochandle * P,map_info_t * mptr)314 map_info_free(struct ps_prochandle *P, map_info_t *mptr)
315 {
316 	file_info_t *fptr;
317 
318 	if ((fptr = mptr->map_file) != NULL) {
319 		if (fptr->file_map == mptr)
320 			fptr->file_map = NULL;
321 		file_info_free(P, fptr);
322 	}
323 	if (P->execname && mptr == P->map_exec) {
324 		free(P->execname);
325 		P->execname = NULL;
326 	}
327 	if (P->auxv && (mptr == P->map_exec || mptr == P->map_ldso)) {
328 		free(P->auxv);
329 		P->auxv = NULL;
330 		P->nauxv = 0;
331 	}
332 	if (mptr == P->map_exec)
333 		P->map_exec = NULL;
334 	if (mptr == P->map_ldso)
335 		P->map_ldso = NULL;
336 }
337 
338 /*
339  * Call-back function for librtld_db to iterate through all of its shared
340  * libraries.  We use this to get the load object names for the mappings.
341  */
342 static int
map_iter(const rd_loadobj_t * lop,void * cd)343 map_iter(const rd_loadobj_t *lop, void *cd)
344 {
345 	char buf[PATH_MAX];
346 	struct ps_prochandle *P = cd;
347 	map_info_t *mptr;
348 	file_info_t *fptr;
349 
350 	Pdprintf("encountered rd object at %p\n", (void *)lop->rl_base);
351 
352 	if ((mptr = Paddr2mptr(P, lop->rl_base)) == NULL) {
353 		Pdprintf("map_iter: base address doesn't match any mapping\n");
354 		return (1); /* Base address does not match any mapping */
355 	}
356 
357 	if ((fptr = mptr->map_file) == NULL &&
358 	    (fptr = file_info_new(P, mptr)) == NULL) {
359 		Pdprintf("map_iter: failed to allocate a new file_info_t\n");
360 		return (1); /* Failed to allocate a new file_info_t */
361 	}
362 
363 	if ((fptr->file_lo == NULL) &&
364 	    (fptr->file_lo = malloc(sizeof (rd_loadobj_t))) == NULL) {
365 		Pdprintf("map_iter: failed to allocate rd_loadobj_t\n");
366 		file_info_free(P, fptr);
367 		return (1); /* Failed to allocate rd_loadobj_t */
368 	}
369 
370 	fptr->file_map = mptr;
371 	*fptr->file_lo = *lop;
372 
373 	fptr->file_lo->rl_plt_base = fptr->file_plt_base;
374 	fptr->file_lo->rl_plt_size = fptr->file_plt_size;
375 
376 	if (fptr->file_lname) {
377 		free(fptr->file_lname);
378 		fptr->file_lname = NULL;
379 		fptr->file_lbase = NULL;
380 	}
381 	if (fptr->file_rname) {
382 		free(fptr->file_rname);
383 		fptr->file_rname = NULL;
384 		fptr->file_rbase = NULL;
385 	}
386 
387 	if (Pread_string(P, buf, sizeof (buf), lop->rl_nameaddr) > 0) {
388 		if ((fptr->file_lname = strdup(buf)) != NULL)
389 			fptr->file_lbase = basename(fptr->file_lname);
390 	} else {
391 		Pdprintf("map_iter: failed to read string at %p\n",
392 		    (void *)lop->rl_nameaddr);
393 	}
394 
395 	if ((Pfindmap(P, mptr, buf, sizeof (buf)) != NULL) &&
396 	    ((fptr->file_rname = strdup(buf)) != NULL))
397 		fptr->file_rbase = basename(fptr->file_rname);
398 
399 	Pdprintf("loaded rd object %s lmid %lx\n",
400 	    fptr->file_lname ? buf : "<NULL>", lop->rl_lmident);
401 	return (1);
402 }
403 
404 static void
map_set(struct ps_prochandle * P,map_info_t * mptr,const char * lname)405 map_set(struct ps_prochandle *P, map_info_t *mptr, const char *lname)
406 {
407 	file_info_t *fptr;
408 	char buf[PATH_MAX];
409 
410 	if ((fptr = mptr->map_file) == NULL &&
411 	    (fptr = file_info_new(P, mptr)) == NULL)
412 		return; /* Failed to allocate a new file_info_t */
413 
414 	fptr->file_map = mptr;
415 
416 	if ((fptr->file_lo == NULL) &&
417 	    (fptr->file_lo = malloc(sizeof (rd_loadobj_t))) == NULL) {
418 		file_info_free(P, fptr);
419 		return; /* Failed to allocate rd_loadobj_t */
420 	}
421 
422 	(void) memset(fptr->file_lo, 0, sizeof (rd_loadobj_t));
423 	fptr->file_lo->rl_base = mptr->map_pmap.pr_vaddr;
424 	fptr->file_lo->rl_bend =
425 	    mptr->map_pmap.pr_vaddr + mptr->map_pmap.pr_size;
426 
427 	fptr->file_lo->rl_plt_base = fptr->file_plt_base;
428 	fptr->file_lo->rl_plt_size = fptr->file_plt_size;
429 
430 	if ((fptr->file_lname == NULL) &&
431 	    (fptr->file_lname = strdup(lname)) != NULL)
432 		fptr->file_lbase = basename(fptr->file_lname);
433 
434 	if ((Pfindmap(P, mptr, buf, sizeof (buf)) != NULL) &&
435 	    ((fptr->file_rname = strdup(buf)) != NULL))
436 		fptr->file_rbase = basename(fptr->file_rname);
437 }
438 
439 static void
load_static_maps(struct ps_prochandle * P)440 load_static_maps(struct ps_prochandle *P)
441 {
442 	map_info_t *mptr;
443 
444 	/*
445 	 * Construct the map for the a.out.
446 	 */
447 	if ((mptr = object_name_to_map(P, PR_LMID_EVERY, PR_OBJ_EXEC)) != NULL)
448 		map_set(P, mptr, "a.out");
449 
450 	/*
451 	 * If the dynamic linker exists for this process,
452 	 * construct the map for it.
453 	 */
454 	if (Pgetauxval(P, AT_BASE) != -1L &&
455 	    (mptr = object_name_to_map(P, PR_LMID_EVERY, PR_OBJ_LDSO)) != NULL)
456 		map_set(P, mptr, "ld.so.1");
457 }
458 
459 int
Preadmaps(struct ps_prochandle * P,prmap_t ** Pmapp,ssize_t * nmapp)460 Preadmaps(struct ps_prochandle *P, prmap_t **Pmapp, ssize_t *nmapp)
461 {
462 	return (P->ops.pop_read_maps(P, Pmapp, nmapp, P->data));
463 }
464 
465 /*
466  * Go through all the address space mappings, validating or updating
467  * the information already gathered, or gathering new information.
468  *
469  * This function is only called when we suspect that the mappings have changed
470  * because this is the first time we're calling it or because of rtld activity.
471  */
472 void
Pupdate_maps(struct ps_prochandle * P)473 Pupdate_maps(struct ps_prochandle *P)
474 {
475 	prmap_t *Pmap = NULL;
476 	prmap_t *pmap;
477 	ssize_t nmap;
478 	int i;
479 	uint_t oldmapcount;
480 	map_info_t *newmap, *newp;
481 	map_info_t *mptr;
482 
483 	if (P->info_valid || P->state == PS_UNDEAD)
484 		return;
485 
486 	Preadauxvec(P);
487 
488 	if (Preadmaps(P, &Pmap, &nmap) != 0)
489 		return;
490 
491 	if ((newmap = calloc(1, nmap * sizeof (map_info_t))) == NULL)
492 		return;
493 
494 	/*
495 	 * We try to merge any file information we may have for existing
496 	 * mappings, to avoid having to rebuild the file info.
497 	 */
498 	mptr = P->mappings;
499 	pmap = Pmap;
500 	newp = newmap;
501 	oldmapcount = P->map_count;
502 	for (i = 0; i < nmap; i++, pmap++, newp++) {
503 
504 		if (oldmapcount == 0) {
505 			/*
506 			 * We've exhausted all the old mappings.  Every new
507 			 * mapping should be added.
508 			 */
509 			newp->map_pmap = *pmap;
510 
511 		} else if (pmap->pr_vaddr == mptr->map_pmap.pr_vaddr &&
512 		    pmap->pr_size == mptr->map_pmap.pr_size &&
513 		    pmap->pr_offset == mptr->map_pmap.pr_offset &&
514 		    (pmap->pr_mflags & ~(MA_BREAK | MA_STACK)) ==
515 		    (mptr->map_pmap.pr_mflags & ~(MA_BREAK | MA_STACK)) &&
516 		    pmap->pr_pagesize == mptr->map_pmap.pr_pagesize &&
517 		    pmap->pr_shmid == mptr->map_pmap.pr_shmid &&
518 		    strcmp(pmap->pr_mapname, mptr->map_pmap.pr_mapname) == 0) {
519 
520 			/*
521 			 * This mapping matches exactly.  Copy over the old
522 			 * mapping, taking care to get the latest flags.
523 			 * Make sure the associated file_info_t is updated
524 			 * appropriately.
525 			 */
526 			*newp = *mptr;
527 			if (P->map_exec == mptr)
528 				P->map_exec = newp;
529 			if (P->map_ldso == mptr)
530 				P->map_ldso = newp;
531 			newp->map_pmap.pr_mflags = pmap->pr_mflags;
532 			if (mptr->map_file != NULL &&
533 			    mptr->map_file->file_map == mptr)
534 				mptr->map_file->file_map = newp;
535 			oldmapcount--;
536 			mptr++;
537 
538 		} else if (pmap->pr_vaddr + pmap->pr_size >
539 		    mptr->map_pmap.pr_vaddr) {
540 
541 			/*
542 			 * The old mapping doesn't exist any more, remove it
543 			 * from the list.
544 			 */
545 			map_info_free(P, mptr);
546 			oldmapcount--;
547 			i--;
548 			newp--;
549 			pmap--;
550 			mptr++;
551 
552 		} else {
553 
554 			/*
555 			 * This is a new mapping, add it directly.
556 			 */
557 			newp->map_pmap = *pmap;
558 		}
559 	}
560 
561 	/*
562 	 * Free any old maps
563 	 */
564 	while (oldmapcount) {
565 		map_info_free(P, mptr);
566 		oldmapcount--;
567 		mptr++;
568 	}
569 
570 	free(Pmap);
571 	if (P->mappings != NULL)
572 		free(P->mappings);
573 	P->mappings = newmap;
574 	P->map_count = P->map_alloc = nmap;
575 	P->info_valid = 1;
576 
577 	/*
578 	 * Consult librtld_db to get the load object
579 	 * names for all of the shared libraries.
580 	 */
581 	if (P->rap != NULL)
582 		(void) rd_loadobj_iter(P->rap, map_iter, P);
583 }
584 
585 /*
586  * Update all of the mappings and rtld_db as if by Pupdate_maps(), and then
587  * forcibly cache all of the symbol tables associated with all object files.
588  */
589 void
Pupdate_syms(struct ps_prochandle * P)590 Pupdate_syms(struct ps_prochandle *P)
591 {
592 	file_info_t *fptr;
593 
594 	Pupdate_maps(P);
595 
596 	for (fptr = list_head(&P->file_head); fptr != NULL;
597 	    fptr = list_next(&P->file_head, fptr)) {
598 		Pbuild_file_symtab(P, fptr);
599 		(void) Pbuild_file_ctf(P, fptr);
600 	}
601 }
602 
603 /*
604  * Return the librtld_db agent handle for the victim process.
605  * The handle will become invalid at the next successful exec() and the
606  * client (caller of proc_rd_agent()) must not use it beyond that point.
607  * If the process is already dead, we've already tried our best to
608  * create the agent during core file initialization.
609  */
610 rd_agent_t *
Prd_agent(struct ps_prochandle * P)611 Prd_agent(struct ps_prochandle *P)
612 {
613 	if (P->rap == NULL && P->state != PS_DEAD && P->state != PS_IDLE) {
614 		Pupdate_maps(P);
615 		if (P->num_files == 0)
616 			load_static_maps(P);
617 		rd_log(_libproc_debug);
618 		if ((P->rap = rd_new(P)) != NULL)
619 			(void) rd_loadobj_iter(P->rap, map_iter, P);
620 	}
621 	return (P->rap);
622 }
623 
624 /*
625  * Return the prmap_t structure containing 'addr', but only if it
626  * is in the dynamic linker's link map and is the text section.
627  */
628 const prmap_t *
Paddr_to_text_map(struct ps_prochandle * P,uintptr_t addr)629 Paddr_to_text_map(struct ps_prochandle *P, uintptr_t addr)
630 {
631 	map_info_t *mptr;
632 
633 	if (!P->info_valid)
634 		Pupdate_maps(P);
635 
636 	if ((mptr = Paddr2mptr(P, addr)) != NULL) {
637 		file_info_t *fptr = build_map_symtab(P, mptr);
638 		const prmap_t *pmp = &mptr->map_pmap;
639 
640 		/*
641 		 * Assume that if rl_data_base is NULL, it means that no
642 		 * data section was found for this load object, and that
643 		 * a section must be text. Otherwise, a section will be
644 		 * text unless it ends above the start of the data
645 		 * section.
646 		 */
647 		if (fptr != NULL && fptr->file_lo != NULL &&
648 		    (fptr->file_lo->rl_data_base == (uintptr_t)NULL ||
649 		    pmp->pr_vaddr + pmp->pr_size <=
650 		    fptr->file_lo->rl_data_base))
651 			return (pmp);
652 	}
653 
654 	return (NULL);
655 }
656 
657 /*
658  * Return the prmap_t structure containing 'addr' (no restrictions on
659  * the type of mapping).
660  */
661 const prmap_t *
Paddr_to_map(struct ps_prochandle * P,uintptr_t addr)662 Paddr_to_map(struct ps_prochandle *P, uintptr_t addr)
663 {
664 	map_info_t *mptr;
665 
666 	if (!P->info_valid)
667 		Pupdate_maps(P);
668 
669 	if ((mptr = Paddr2mptr(P, addr)) != NULL)
670 		return (&mptr->map_pmap);
671 
672 	return (NULL);
673 }
674 
675 /*
676  * Convert a full or partial load object name to the prmap_t for its
677  * corresponding primary text mapping.
678  */
679 const prmap_t *
Plmid_to_map(struct ps_prochandle * P,Lmid_t lmid,const char * name)680 Plmid_to_map(struct ps_prochandle *P, Lmid_t lmid, const char *name)
681 {
682 	map_info_t *mptr;
683 
684 	if (name == PR_OBJ_EVERY)
685 		return (NULL); /* A reasonable mistake */
686 
687 	if ((mptr = object_name_to_map(P, lmid, name)) != NULL)
688 		return (&mptr->map_pmap);
689 
690 	return (NULL);
691 }
692 
693 const prmap_t *
Pname_to_map(struct ps_prochandle * P,const char * name)694 Pname_to_map(struct ps_prochandle *P, const char *name)
695 {
696 	return (Plmid_to_map(P, PR_LMID_EVERY, name));
697 }
698 
699 const rd_loadobj_t *
Paddr_to_loadobj(struct ps_prochandle * P,uintptr_t addr)700 Paddr_to_loadobj(struct ps_prochandle *P, uintptr_t addr)
701 {
702 	map_info_t *mptr;
703 
704 	if (!P->info_valid)
705 		Pupdate_maps(P);
706 
707 	if ((mptr = Paddr2mptr(P, addr)) == NULL)
708 		return (NULL);
709 
710 	/*
711 	 * By building the symbol table, we implicitly bring the PLT
712 	 * information up to date in the load object.
713 	 */
714 	(void) build_map_symtab(P, mptr);
715 
716 	return (mptr->map_file->file_lo);
717 }
718 
719 const rd_loadobj_t *
Plmid_to_loadobj(struct ps_prochandle * P,Lmid_t lmid,const char * name)720 Plmid_to_loadobj(struct ps_prochandle *P, Lmid_t lmid, const char *name)
721 {
722 	map_info_t *mptr;
723 
724 	if (name == PR_OBJ_EVERY)
725 		return (NULL);
726 
727 	if ((mptr = object_name_to_map(P, lmid, name)) == NULL)
728 		return (NULL);
729 
730 	/*
731 	 * By building the symbol table, we implicitly bring the PLT
732 	 * information up to date in the load object.
733 	 */
734 	(void) build_map_symtab(P, mptr);
735 
736 	return (mptr->map_file->file_lo);
737 }
738 
739 const rd_loadobj_t *
Pname_to_loadobj(struct ps_prochandle * P,const char * name)740 Pname_to_loadobj(struct ps_prochandle *P, const char *name)
741 {
742 	return (Plmid_to_loadobj(P, PR_LMID_EVERY, name));
743 }
744 
745 ctf_file_t *
Pbuild_file_ctf(struct ps_prochandle * P,file_info_t * fptr)746 Pbuild_file_ctf(struct ps_prochandle *P, file_info_t *fptr)
747 {
748 	ctf_sect_t ctdata, symtab, strtab;
749 	sym_tbl_t *symp;
750 	int err;
751 
752 	if (fptr->file_ctfp != NULL)
753 		return (fptr->file_ctfp);
754 
755 	Pbuild_file_symtab(P, fptr);
756 
757 	if (fptr->file_ctf_size == 0)
758 		return (NULL);
759 
760 	symp = fptr->file_ctf_dyn ? &fptr->file_dynsym : &fptr->file_symtab;
761 	if (symp->sym_data_pri == NULL)
762 		return (NULL);
763 
764 	/*
765 	 * The buffer may alread be allocated if this is a core file that
766 	 * contained CTF data for this file.
767 	 */
768 	if (fptr->file_ctf_buf == NULL) {
769 		fptr->file_ctf_buf = malloc(fptr->file_ctf_size);
770 		if (fptr->file_ctf_buf == NULL) {
771 			Pdprintf("failed to allocate ctf buffer\n");
772 			return (NULL);
773 		}
774 
775 		if (pread(fptr->file_fd, fptr->file_ctf_buf,
776 		    fptr->file_ctf_size, fptr->file_ctf_off) !=
777 		    fptr->file_ctf_size) {
778 			free(fptr->file_ctf_buf);
779 			fptr->file_ctf_buf = NULL;
780 			Pdprintf("failed to read ctf data\n");
781 			return (NULL);
782 		}
783 	}
784 
785 	ctdata.cts_name = ".SUNW_ctf";
786 	ctdata.cts_type = SHT_PROGBITS;
787 	ctdata.cts_flags = 0;
788 	ctdata.cts_data = fptr->file_ctf_buf;
789 	ctdata.cts_size = fptr->file_ctf_size;
790 	ctdata.cts_entsize = 1;
791 	ctdata.cts_offset = 0;
792 
793 	symtab.cts_name = fptr->file_ctf_dyn ? ".dynsym" : ".symtab";
794 	symtab.cts_type = symp->sym_hdr_pri.sh_type;
795 	symtab.cts_flags = symp->sym_hdr_pri.sh_flags;
796 	symtab.cts_data = symp->sym_data_pri->d_buf;
797 	symtab.cts_size = symp->sym_hdr_pri.sh_size;
798 	symtab.cts_entsize = symp->sym_hdr_pri.sh_entsize;
799 	symtab.cts_offset = symp->sym_hdr_pri.sh_offset;
800 
801 	strtab.cts_name = fptr->file_ctf_dyn ? ".dynstr" : ".strtab";
802 	strtab.cts_type = symp->sym_strhdr.sh_type;
803 	strtab.cts_flags = symp->sym_strhdr.sh_flags;
804 	strtab.cts_data = symp->sym_strs;
805 	strtab.cts_size = symp->sym_strhdr.sh_size;
806 	strtab.cts_entsize = symp->sym_strhdr.sh_entsize;
807 	strtab.cts_offset = symp->sym_strhdr.sh_offset;
808 
809 	fptr->file_ctfp = ctf_bufopen(&ctdata, &symtab, &strtab, &err);
810 	if (fptr->file_ctfp == NULL) {
811 		Pdprintf("ctf_bufopen() failed, error code %d\n", err);
812 		free(fptr->file_ctf_buf);
813 		fptr->file_ctf_buf = NULL;
814 		return (NULL);
815 	}
816 
817 	Pdprintf("loaded %lu bytes of CTF data for %s\n",
818 	    (ulong_t)fptr->file_ctf_size, fptr->file_pname);
819 
820 	return (fptr->file_ctfp);
821 }
822 
823 ctf_file_t *
Paddr_to_ctf(struct ps_prochandle * P,uintptr_t addr)824 Paddr_to_ctf(struct ps_prochandle *P, uintptr_t addr)
825 {
826 	map_info_t *mptr;
827 	file_info_t *fptr;
828 
829 	if (!P->info_valid)
830 		Pupdate_maps(P);
831 
832 	if ((mptr = Paddr2mptr(P, addr)) == NULL ||
833 	    (fptr = mptr->map_file) == NULL)
834 		return (NULL);
835 
836 	return (Pbuild_file_ctf(P, fptr));
837 }
838 
839 ctf_file_t *
Plmid_to_ctf(struct ps_prochandle * P,Lmid_t lmid,const char * name)840 Plmid_to_ctf(struct ps_prochandle *P, Lmid_t lmid, const char *name)
841 {
842 	map_info_t *mptr;
843 	file_info_t *fptr = NULL;
844 
845 	if (name == PR_OBJ_EVERY)
846 		return (NULL);
847 
848 	/*
849 	 * While most idle files are all ELF objects, not all of them have
850 	 * mapping information available. There's nothing which would make
851 	 * sense to fake up for ET_REL. Instead, if we're being asked for their
852 	 * executable object and we know that the information is valid and they
853 	 * only have a single file, we jump straight to that file pointer.
854 	 */
855 	if (P->state == PS_IDLE && name == PR_OBJ_EXEC && P->info_valid == 1 &&
856 	    P->num_files == 1 && P->mappings == NULL) {
857 		fptr = list_head(&P->file_head);
858 	}
859 
860 	if (fptr == NULL) {
861 		if ((mptr = object_name_to_map(P, lmid, name)) == NULL ||
862 		    (fptr = mptr->map_file) == NULL)
863 			return (NULL);
864 	}
865 
866 	return (Pbuild_file_ctf(P, fptr));
867 }
868 
869 ctf_file_t *
Pname_to_ctf(struct ps_prochandle * P,const char * name)870 Pname_to_ctf(struct ps_prochandle *P, const char *name)
871 {
872 	return (Plmid_to_ctf(P, PR_LMID_EVERY, name));
873 }
874 
875 void
Preadauxvec(struct ps_prochandle * P)876 Preadauxvec(struct ps_prochandle *P)
877 {
878 	if (P->auxv != NULL) {
879 		free(P->auxv);
880 		P->auxv = NULL;
881 		P->nauxv = 0;
882 	}
883 
884 	P->ops.pop_read_aux(P, &P->auxv, &P->nauxv, P->data);
885 }
886 
887 /*
888  * Return a requested element from the process's aux vector.
889  * Return -1 on failure (this is adequate for our purposes).
890  */
891 long
Pgetauxval(struct ps_prochandle * P,int type)892 Pgetauxval(struct ps_prochandle *P, int type)
893 {
894 	auxv_t *auxv;
895 
896 	if (P->auxv == NULL)
897 		Preadauxvec(P);
898 
899 	if (P->auxv == NULL)
900 		return (-1);
901 
902 	for (auxv = P->auxv; auxv->a_type != AT_NULL; auxv++) {
903 		if (auxv->a_type == type)
904 			return (auxv->a_un.a_val);
905 	}
906 
907 	return (-1);
908 }
909 
910 /*
911  * Return a pointer to our internal copy of the process's aux vector.
912  * The caller should not hold on to this pointer across any libproc calls.
913  */
914 const auxv_t *
Pgetauxvec(struct ps_prochandle * P)915 Pgetauxvec(struct ps_prochandle *P)
916 {
917 	static const auxv_t empty = { AT_NULL, 0L };
918 
919 	if (P->auxv == NULL)
920 		Preadauxvec(P);
921 
922 	if (P->auxv == NULL)
923 		return (&empty);
924 
925 	return (P->auxv);
926 }
927 
928 /*
929  * Return 1 if the given mapping corresponds to the given file_info_t's
930  * load object; return 0 otherwise.
931  */
932 static int
is_mapping_in_file(struct ps_prochandle * P,map_info_t * mptr,file_info_t * fptr)933 is_mapping_in_file(struct ps_prochandle *P, map_info_t *mptr, file_info_t *fptr)
934 {
935 	prmap_t *pmap = &mptr->map_pmap;
936 	rd_loadobj_t *lop = fptr->file_lo;
937 	uint_t i;
938 	uintptr_t mstart, mend, sstart, send;
939 
940 	/*
941 	 * We can get for free the start address of the text and data
942 	 * sections of the load object. Start by seeing if the mapping
943 	 * encloses either of these.
944 	 */
945 	if ((pmap->pr_vaddr <= lop->rl_base &&
946 	    lop->rl_base < pmap->pr_vaddr + pmap->pr_size) ||
947 	    (pmap->pr_vaddr <= lop->rl_data_base &&
948 	    lop->rl_data_base < pmap->pr_vaddr + pmap->pr_size))
949 		return (1);
950 
951 	/*
952 	 * It's still possible that this mapping correponds to the load
953 	 * object. Consider the example of a mapping whose start and end
954 	 * addresses correspond to those of the load object's text section.
955 	 * If the mapping splits, e.g. as a result of a segment demotion,
956 	 * then although both mappings are still backed by the same section,
957 	 * only one will be seen to enclose that section's start address.
958 	 * Thus, to be rigorous, we ask not whether this mapping encloses
959 	 * the start of a section, but whether there exists a section that
960 	 * overlaps this mapping.
961 	 *
962 	 * If we don't already have the section addresses, and we successfully
963 	 * get them, then we cache them in case we come here again.
964 	 */
965 	if (fptr->file_saddrs == NULL &&
966 	    (fptr->file_saddrs = get_saddrs(P,
967 	    fptr->file_map->map_pmap.pr_vaddr, &fptr->file_nsaddrs)) == NULL)
968 		return (0);
969 
970 	mstart = mptr->map_pmap.pr_vaddr;
971 	mend = mptr->map_pmap.pr_vaddr + mptr->map_pmap.pr_size;
972 	for (i = 0; i < fptr->file_nsaddrs; i += 2) {
973 		/* Does this section overlap the mapping? */
974 		sstart = fptr->file_saddrs[i];
975 		send = fptr->file_saddrs[i + 1];
976 		if (!(mend <= sstart || mstart >= send))
977 			return (1);
978 	}
979 
980 	return (0);
981 }
982 
983 /*
984  * Find or build the symbol table for the given mapping.
985  */
986 static file_info_t *
build_map_symtab(struct ps_prochandle * P,map_info_t * mptr)987 build_map_symtab(struct ps_prochandle *P, map_info_t *mptr)
988 {
989 	prmap_t *pmap = &mptr->map_pmap;
990 	file_info_t *fptr;
991 
992 	if ((fptr = mptr->map_file) != NULL) {
993 		Pbuild_file_symtab(P, fptr);
994 		return (fptr);
995 	}
996 
997 	if (pmap->pr_mapname[0] == '\0')
998 		return (NULL);
999 
1000 	/*
1001 	 * Attempt to find a matching file.
1002 	 * (A file can be mapped at several different addresses.)
1003 	 */
1004 	for (fptr = list_head(&P->file_head); fptr != NULL;
1005 	    fptr = list_next(&P->file_head, fptr)) {
1006 		if (strcmp(fptr->file_pname, pmap->pr_mapname) == 0 &&
1007 		    fptr->file_lo && is_mapping_in_file(P, mptr, fptr)) {
1008 			mptr->map_file = fptr;
1009 			fptr->file_ref++;
1010 			Pbuild_file_symtab(P, fptr);
1011 			return (fptr);
1012 		}
1013 	}
1014 
1015 	/*
1016 	 * If we need to create a new file_info structure, iterate
1017 	 * through the load objects in order to attempt to connect
1018 	 * this new file with its primary text mapping.  We again
1019 	 * need to handle ld.so as a special case because we need
1020 	 * to be able to bootstrap librtld_db.
1021 	 */
1022 	if ((fptr = file_info_new(P, mptr)) == NULL)
1023 		return (NULL);
1024 
1025 	if (P->map_ldso != mptr) {
1026 		if (P->rap != NULL)
1027 			(void) rd_loadobj_iter(P->rap, map_iter, P);
1028 		else
1029 			(void) Prd_agent(P);
1030 	} else {
1031 		fptr->file_map = mptr;
1032 	}
1033 
1034 	/*
1035 	 * If librtld_db wasn't able to help us connect the file to a primary
1036 	 * text mapping, set file_map to the current mapping because we require
1037 	 * fptr->file_map to be set in Pbuild_file_symtab.  librtld_db may be
1038 	 * unaware of what's going on in the rare case that a legitimate ELF
1039 	 * file has been mmap(2)ed into the process address space *without*
1040 	 * the use of dlopen(3x).
1041 	 */
1042 	if (fptr->file_map == NULL)
1043 		fptr->file_map = mptr;
1044 
1045 	Pbuild_file_symtab(P, fptr);
1046 
1047 	return (fptr);
1048 }
1049 
1050 static int
read_ehdr32(struct ps_prochandle * P,Elf32_Ehdr * ehdr,uint_t * phnum,uintptr_t addr)1051 read_ehdr32(struct ps_prochandle *P, Elf32_Ehdr *ehdr, uint_t *phnum,
1052     uintptr_t addr)
1053 {
1054 	if (Pread(P, ehdr, sizeof (*ehdr), addr) != sizeof (*ehdr))
1055 		return (-1);
1056 
1057 	if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
1058 	    ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
1059 	    ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1060 	    ehdr->e_ident[EI_MAG3] != ELFMAG3 ||
1061 	    ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1062 #ifdef _BIG_ENDIAN
1063 	    ehdr->e_ident[EI_DATA] != ELFDATA2MSB ||
1064 #else
1065 	    ehdr->e_ident[EI_DATA] != ELFDATA2LSB ||
1066 #endif
1067 	    ehdr->e_ident[EI_VERSION] != EV_CURRENT)
1068 		return (-1);
1069 
1070 	if ((*phnum = ehdr->e_phnum) == PN_XNUM) {
1071 		Elf32_Shdr shdr0;
1072 
1073 		if (ehdr->e_shoff == 0 || ehdr->e_shentsize < sizeof (shdr0) ||
1074 		    Pread(P, &shdr0, sizeof (shdr0), addr + ehdr->e_shoff) !=
1075 		    sizeof (shdr0))
1076 			return (-1);
1077 
1078 		if (shdr0.sh_info != 0)
1079 			*phnum = shdr0.sh_info;
1080 	}
1081 
1082 	return (0);
1083 }
1084 
1085 static int
read_dynamic_phdr32(struct ps_prochandle * P,const Elf32_Ehdr * ehdr,uint_t phnum,Elf32_Phdr * phdr,uintptr_t addr)1086 read_dynamic_phdr32(struct ps_prochandle *P, const Elf32_Ehdr *ehdr,
1087     uint_t phnum, Elf32_Phdr *phdr, uintptr_t addr)
1088 {
1089 	uint_t i;
1090 
1091 	for (i = 0; i < phnum; i++) {
1092 		uintptr_t a = addr + ehdr->e_phoff + i * ehdr->e_phentsize;
1093 		if (Pread(P, phdr, sizeof (*phdr), a) != sizeof (*phdr))
1094 			return (-1);
1095 
1096 		if (phdr->p_type == PT_DYNAMIC)
1097 			return (0);
1098 	}
1099 
1100 	return (-1);
1101 }
1102 
1103 #ifdef _LP64
1104 static int
read_ehdr64(struct ps_prochandle * P,Elf64_Ehdr * ehdr,uint_t * phnum,uintptr_t addr)1105 read_ehdr64(struct ps_prochandle *P, Elf64_Ehdr *ehdr, uint_t *phnum,
1106     uintptr_t addr)
1107 {
1108 	if (Pread(P, ehdr, sizeof (Elf64_Ehdr), addr) != sizeof (Elf64_Ehdr))
1109 		return (-1);
1110 
1111 	if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
1112 	    ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
1113 	    ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1114 	    ehdr->e_ident[EI_MAG3] != ELFMAG3 ||
1115 	    ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1116 #ifdef _BIG_ENDIAN
1117 	    ehdr->e_ident[EI_DATA] != ELFDATA2MSB ||
1118 #else
1119 	    ehdr->e_ident[EI_DATA] != ELFDATA2LSB ||
1120 #endif
1121 	    ehdr->e_ident[EI_VERSION] != EV_CURRENT)
1122 		return (-1);
1123 
1124 	if ((*phnum = ehdr->e_phnum) == PN_XNUM) {
1125 		Elf64_Shdr shdr0;
1126 
1127 		if (ehdr->e_shoff == 0 || ehdr->e_shentsize < sizeof (shdr0) ||
1128 		    Pread(P, &shdr0, sizeof (shdr0), addr + ehdr->e_shoff) !=
1129 		    sizeof (shdr0))
1130 			return (-1);
1131 
1132 		if (shdr0.sh_info != 0)
1133 			*phnum = shdr0.sh_info;
1134 	}
1135 
1136 	return (0);
1137 }
1138 
1139 static int
read_dynamic_phdr64(struct ps_prochandle * P,const Elf64_Ehdr * ehdr,uint_t phnum,Elf64_Phdr * phdr,uintptr_t addr)1140 read_dynamic_phdr64(struct ps_prochandle *P, const Elf64_Ehdr *ehdr,
1141     uint_t phnum, Elf64_Phdr *phdr, uintptr_t addr)
1142 {
1143 	uint_t i;
1144 
1145 	for (i = 0; i < phnum; i++) {
1146 		uintptr_t a = addr + ehdr->e_phoff + i * ehdr->e_phentsize;
1147 		if (Pread(P, phdr, sizeof (*phdr), a) != sizeof (*phdr))
1148 			return (-1);
1149 
1150 		if (phdr->p_type == PT_DYNAMIC)
1151 			return (0);
1152 	}
1153 
1154 	return (-1);
1155 }
1156 #endif	/* _LP64 */
1157 
1158 /*
1159  * The text segment for each load object contains the elf header and
1160  * program headers. We can use this information to determine if the
1161  * file that corresponds to the load object is the same file that
1162  * was loaded into the process's address space. There can be a discrepency
1163  * if a file is recompiled after the process is started or if the target
1164  * represents a core file from a differently configured system -- two
1165  * common examples. The DT_CHECKSUM entry in the dynamic section
1166  * provides an easy method of comparison. It is important to note that
1167  * the dynamic section usually lives in the data segment, but the meta
1168  * data we use to find the dynamic section lives in the text segment so
1169  * if either of those segments is absent we can't proceed.
1170  *
1171  * We're looking through the elf file for several items: the symbol tables
1172  * (both dynsym and symtab), the procedure linkage table (PLT) base,
1173  * size, and relocation base, and the CTF information. Most of this can
1174  * be recovered from the loaded image of the file itself, the exceptions
1175  * being the symtab and CTF data.
1176  *
1177  * First we try to open the file that we think corresponds to the load
1178  * object, if the DT_CHECKSUM values match, we're all set, and can simply
1179  * recover all the information we need from the file. If the values of
1180  * DT_CHECKSUM don't match, or if we can't access the file for whatever
1181  * reasaon, we fake up a elf file to use in its stead. If we can't read
1182  * the elf data in the process's address space, we fall back to using
1183  * the file even though it may give inaccurate information.
1184  *
1185  * The elf file that we fake up has to consist of sections for the
1186  * dynsym, the PLT and the dynamic section. Note that in the case of a
1187  * core file, we'll get the CTF data in the file_info_t later on from
1188  * a section embedded the core file (if it's present).
1189  *
1190  * file_differs() conservatively looks for mismatched files, identifying
1191  * a match when there is any ambiguity (since that's the legacy behavior).
1192  */
1193 static int
file_differs(struct ps_prochandle * P,Elf * elf,file_info_t * fptr)1194 file_differs(struct ps_prochandle *P, Elf *elf, file_info_t *fptr)
1195 {
1196 	Elf_Scn *scn;
1197 	GElf_Shdr shdr;
1198 	GElf_Dyn dyn;
1199 	Elf_Data *data;
1200 	uint_t i, ndyn;
1201 	GElf_Xword cksum;
1202 	uintptr_t addr;
1203 
1204 	if (fptr->file_map == NULL)
1205 		return (0);
1206 
1207 	if ((Pcontent(P) & (CC_CONTENT_TEXT | CC_CONTENT_DATA)) !=
1208 	    (CC_CONTENT_TEXT | CC_CONTENT_DATA))
1209 		return (0);
1210 
1211 	/*
1212 	 * First, we find the checksum value in the elf file.
1213 	 */
1214 	scn = NULL;
1215 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
1216 		if (gelf_getshdr(scn, &shdr) != NULL &&
1217 		    shdr.sh_type == SHT_DYNAMIC)
1218 			goto found_shdr;
1219 	}
1220 	return (0);
1221 
1222 found_shdr:
1223 	if ((data = elf_getdata(scn, NULL)) == NULL)
1224 		return (0);
1225 
1226 	if (P->status.pr_dmodel == PR_MODEL_ILP32)
1227 		ndyn = shdr.sh_size / sizeof (Elf32_Dyn);
1228 #ifdef _LP64
1229 	else if (P->status.pr_dmodel == PR_MODEL_LP64)
1230 		ndyn = shdr.sh_size / sizeof (Elf64_Dyn);
1231 #endif
1232 	else
1233 		return (0);
1234 
1235 	for (i = 0; i < ndyn; i++) {
1236 		if (gelf_getdyn(data, i, &dyn) != NULL &&
1237 		    dyn.d_tag == DT_CHECKSUM)
1238 			goto found_cksum;
1239 	}
1240 
1241 	/*
1242 	 * The in-memory ELF has no DT_CHECKSUM section, but we will report it
1243 	 * as matching the file anyhow.
1244 	 */
1245 	return (0);
1246 
1247 found_cksum:
1248 	cksum = dyn.d_un.d_val;
1249 	Pdprintf("elf cksum value is %llx\n", (u_longlong_t)cksum);
1250 
1251 	/*
1252 	 * Get the base of the text mapping that corresponds to this file.
1253 	 */
1254 	addr = fptr->file_map->map_pmap.pr_vaddr;
1255 
1256 	if (P->status.pr_dmodel == PR_MODEL_ILP32) {
1257 		Elf32_Ehdr ehdr;
1258 		Elf32_Phdr phdr;
1259 		Elf32_Dyn dync, *dynp;
1260 		uint_t phnum, i;
1261 
1262 		if (read_ehdr32(P, &ehdr, &phnum, addr) != 0 ||
1263 		    read_dynamic_phdr32(P, &ehdr, phnum, &phdr, addr) != 0)
1264 			return (0);
1265 
1266 		if (ehdr.e_type == ET_DYN)
1267 			phdr.p_vaddr += addr;
1268 		if ((dynp = malloc(phdr.p_filesz)) == NULL)
1269 			return (0);
1270 		dync.d_tag = DT_NULL;
1271 		if (Pread(P, dynp, phdr.p_filesz, phdr.p_vaddr) !=
1272 		    phdr.p_filesz) {
1273 			free(dynp);
1274 			return (0);
1275 		}
1276 
1277 		for (i = 0; i < phdr.p_filesz / sizeof (Elf32_Dyn); i++) {
1278 			if (dynp[i].d_tag == DT_CHECKSUM)
1279 				dync = dynp[i];
1280 		}
1281 
1282 		free(dynp);
1283 
1284 		if (dync.d_tag != DT_CHECKSUM)
1285 			return (0);
1286 
1287 		Pdprintf("image cksum value is %llx\n",
1288 		    (u_longlong_t)dync.d_un.d_val);
1289 		return (dync.d_un.d_val != cksum);
1290 #ifdef _LP64
1291 	} else if (P->status.pr_dmodel == PR_MODEL_LP64) {
1292 		Elf64_Ehdr ehdr;
1293 		Elf64_Phdr phdr;
1294 		Elf64_Dyn dync, *dynp;
1295 		uint_t phnum, i;
1296 
1297 		if (read_ehdr64(P, &ehdr, &phnum, addr) != 0 ||
1298 		    read_dynamic_phdr64(P, &ehdr, phnum, &phdr, addr) != 0)
1299 			return (0);
1300 
1301 		if (ehdr.e_type == ET_DYN)
1302 			phdr.p_vaddr += addr;
1303 		if ((dynp = malloc(phdr.p_filesz)) == NULL)
1304 			return (0);
1305 		dync.d_tag = DT_NULL;
1306 		if (Pread(P, dynp, phdr.p_filesz, phdr.p_vaddr) !=
1307 		    phdr.p_filesz) {
1308 			free(dynp);
1309 			return (0);
1310 		}
1311 
1312 		for (i = 0; i < phdr.p_filesz / sizeof (Elf64_Dyn); i++) {
1313 			if (dynp[i].d_tag == DT_CHECKSUM)
1314 				dync = dynp[i];
1315 		}
1316 
1317 		free(dynp);
1318 
1319 		if (dync.d_tag != DT_CHECKSUM)
1320 			return (0);
1321 
1322 		Pdprintf("image cksum value is %llx\n",
1323 		    (u_longlong_t)dync.d_un.d_val);
1324 		return (dync.d_un.d_val != cksum);
1325 #endif	/* _LP64 */
1326 	}
1327 
1328 	return (0);
1329 }
1330 
1331 /*
1332  * Read data from the specified process and construct an in memory
1333  * image of an ELF file that represents it well enough to let
1334  * us probe it for information.
1335  */
1336 static Elf *
fake_elf(struct ps_prochandle * P,file_info_t * fptr)1337 fake_elf(struct ps_prochandle *P, file_info_t *fptr)
1338 {
1339 	Elf *elf;
1340 	uintptr_t addr;
1341 	uint_t phnum;
1342 
1343 	if (fptr->file_map == NULL)
1344 		return (NULL);
1345 
1346 	if ((Pcontent(P) & (CC_CONTENT_TEXT | CC_CONTENT_DATA)) !=
1347 	    (CC_CONTENT_TEXT | CC_CONTENT_DATA))
1348 		return (NULL);
1349 
1350 	addr = fptr->file_map->map_pmap.pr_vaddr;
1351 
1352 	if (P->status.pr_dmodel == PR_MODEL_ILP32) {
1353 		Elf32_Ehdr ehdr;
1354 		Elf32_Phdr phdr;
1355 
1356 		if ((read_ehdr32(P, &ehdr, &phnum, addr) != 0) ||
1357 		    read_dynamic_phdr32(P, &ehdr, phnum, &phdr, addr) != 0)
1358 			return (NULL);
1359 
1360 		elf = fake_elf32(P, fptr, addr, &ehdr, phnum, &phdr);
1361 #ifdef _LP64
1362 	} else {
1363 		Elf64_Ehdr ehdr;
1364 		Elf64_Phdr phdr;
1365 
1366 		if (read_ehdr64(P, &ehdr, &phnum, addr) != 0 ||
1367 		    read_dynamic_phdr64(P, &ehdr, phnum, &phdr, addr) != 0)
1368 			return (NULL);
1369 
1370 		elf = fake_elf64(P, fptr, addr, &ehdr, phnum, &phdr);
1371 #endif
1372 	}
1373 
1374 	return (elf);
1375 }
1376 
1377 /*
1378  * We wouldn't need these if qsort(3C) took an argument for the callback...
1379  */
1380 static mutex_t sort_mtx = DEFAULTMUTEX;
1381 static char *sort_strs;
1382 static GElf_Sym *sort_syms;
1383 
1384 int
byaddr_cmp_common(GElf_Sym * a,char * aname,GElf_Sym * b,char * bname)1385 byaddr_cmp_common(GElf_Sym *a, char *aname, GElf_Sym *b, char *bname)
1386 {
1387 	if (a->st_value < b->st_value)
1388 		return (-1);
1389 	if (a->st_value > b->st_value)
1390 		return (1);
1391 
1392 	/*
1393 	 * Prefer the function to the non-function.
1394 	 */
1395 	if (GELF_ST_TYPE(a->st_info) != GELF_ST_TYPE(b->st_info)) {
1396 		if (GELF_ST_TYPE(a->st_info) == STT_FUNC)
1397 			return (-1);
1398 		if (GELF_ST_TYPE(b->st_info) == STT_FUNC)
1399 			return (1);
1400 	}
1401 
1402 	/*
1403 	 * Prefer the weak or strong global symbol to the local symbol.
1404 	 */
1405 	if (GELF_ST_BIND(a->st_info) != GELF_ST_BIND(b->st_info)) {
1406 		if (GELF_ST_BIND(b->st_info) == STB_LOCAL)
1407 			return (-1);
1408 		if (GELF_ST_BIND(a->st_info) == STB_LOCAL)
1409 			return (1);
1410 	}
1411 
1412 	/*
1413 	 * Prefer the symbol that doesn't begin with a '$' since compilers and
1414 	 * other symbol generators often use it as a prefix.
1415 	 */
1416 	if (*bname == '$')
1417 		return (-1);
1418 	if (*aname == '$')
1419 		return (1);
1420 
1421 	/*
1422 	 * Prefer the name with fewer leading underscores in the name.
1423 	 */
1424 	while (*aname == '_' && *bname == '_') {
1425 		aname++;
1426 		bname++;
1427 	}
1428 
1429 	if (*bname == '_')
1430 		return (-1);
1431 	if (*aname == '_')
1432 		return (1);
1433 
1434 	/*
1435 	 * Prefer the symbol with the smaller size.
1436 	 */
1437 	if (a->st_size < b->st_size)
1438 		return (-1);
1439 	if (a->st_size > b->st_size)
1440 		return (1);
1441 
1442 	/*
1443 	 * All other factors being equal, fall back to lexicographic order.
1444 	 */
1445 	return (strcmp(aname, bname));
1446 }
1447 
1448 static int
byaddr_cmp(const void * aa,const void * bb)1449 byaddr_cmp(const void *aa, const void *bb)
1450 {
1451 	GElf_Sym *a = &sort_syms[*(uint_t *)aa];
1452 	GElf_Sym *b = &sort_syms[*(uint_t *)bb];
1453 	char *aname = sort_strs + a->st_name;
1454 	char *bname = sort_strs + b->st_name;
1455 
1456 	return (byaddr_cmp_common(a, aname, b, bname));
1457 }
1458 
1459 static int
byname_cmp(const void * aa,const void * bb)1460 byname_cmp(const void *aa, const void *bb)
1461 {
1462 	GElf_Sym *a = &sort_syms[*(uint_t *)aa];
1463 	GElf_Sym *b = &sort_syms[*(uint_t *)bb];
1464 	char *aname = sort_strs + a->st_name;
1465 	char *bname = sort_strs + b->st_name;
1466 
1467 	return (strcmp(aname, bname));
1468 }
1469 
1470 /*
1471  * Given a symbol index, look up the corresponding symbol from the
1472  * given symbol table.
1473  *
1474  * This function allows the caller to treat the symbol table as a single
1475  * logical entity even though there may be 2 actual ELF symbol tables
1476  * involved. See the comments in Pcontrol.h for details.
1477  */
1478 static GElf_Sym *
symtab_getsym(sym_tbl_t * symtab,int ndx,GElf_Sym * dst)1479 symtab_getsym(sym_tbl_t *symtab, int ndx, GElf_Sym *dst)
1480 {
1481 	/* If index is in range of primary symtab, look it up there */
1482 	if (ndx >= symtab->sym_symn_aux) {
1483 		return (gelf_getsym(symtab->sym_data_pri,
1484 		    ndx - symtab->sym_symn_aux, dst));
1485 	}
1486 
1487 	/* Not in primary: Look it up in the auxiliary symtab */
1488 	return (gelf_getsym(symtab->sym_data_aux, ndx, dst));
1489 }
1490 
1491 void
optimize_symtab(sym_tbl_t * symtab)1492 optimize_symtab(sym_tbl_t *symtab)
1493 {
1494 	GElf_Sym *symp, *syms;
1495 	uint_t i, *indexa, *indexb;
1496 	size_t symn, strsz, count;
1497 
1498 	if (symtab == NULL || symtab->sym_data_pri == NULL ||
1499 	    symtab->sym_byaddr != NULL)
1500 		return;
1501 
1502 	symn = symtab->sym_symn;
1503 	strsz = symtab->sym_strsz;
1504 
1505 	symp = syms = malloc(sizeof (GElf_Sym) * symn);
1506 	if (symp == NULL) {
1507 		Pdprintf("optimize_symtab: failed to malloc symbol array");
1508 		return;
1509 	}
1510 
1511 	/*
1512 	 * First record all the symbols into a table and count up the ones
1513 	 * that we're interested in. We mark symbols as invalid by setting
1514 	 * the st_name to an illegal value.
1515 	 */
1516 	for (i = 0, count = 0; i < symn; i++, symp++) {
1517 		if (symtab_getsym(symtab, i, symp) != NULL &&
1518 		    symp->st_name < strsz &&
1519 		    IS_DATA_TYPE(GELF_ST_TYPE(symp->st_info)))
1520 			count++;
1521 		else
1522 			symp->st_name = strsz;
1523 	}
1524 
1525 	/*
1526 	 * Allocate sufficient space for both tables and populate them
1527 	 * with the same symbols we just counted.
1528 	 */
1529 	symtab->sym_count = count;
1530 	indexa = symtab->sym_byaddr = calloc(sizeof (uint_t), count);
1531 	indexb = symtab->sym_byname = calloc(sizeof (uint_t), count);
1532 	if (indexa == NULL || indexb == NULL) {
1533 		Pdprintf(
1534 		    "optimize_symtab: failed to malloc symbol index arrays");
1535 		symtab->sym_count = 0;
1536 		if (indexa != NULL) {	/* First alloc succeeded. Free it */
1537 			free(indexa);
1538 			symtab->sym_byaddr = NULL;
1539 		}
1540 		free(syms);
1541 		return;
1542 	}
1543 	for (i = 0, symp = syms; i < symn; i++, symp++) {
1544 		if (symp->st_name < strsz)
1545 			*indexa++ = *indexb++ = i;
1546 	}
1547 
1548 	/*
1549 	 * Sort the two tables according to the appropriate criteria,
1550 	 * unless the user has overridden this behaviour.
1551 	 *
1552 	 * An example where we might not sort the tables is the relatively
1553 	 * unusual case of a process with very large symbol tables in which
1554 	 * we perform few lookups. In such a case the total time would be
1555 	 * dominated by the sort. It is difficult to determine a priori
1556 	 * how many lookups an arbitrary client will perform, and
1557 	 * hence whether the symbol tables should be sorted. We therefore
1558 	 * sort the tables by default, but provide the user with a
1559 	 * "chicken switch" in the form of the LIBPROC_NO_QSORT
1560 	 * environment variable.
1561 	 */
1562 	if (!_libproc_no_qsort) {
1563 		(void) mutex_lock(&sort_mtx);
1564 		sort_strs = symtab->sym_strs;
1565 		sort_syms = syms;
1566 
1567 		qsort(symtab->sym_byaddr, count, sizeof (uint_t), byaddr_cmp);
1568 		qsort(symtab->sym_byname, count, sizeof (uint_t), byname_cmp);
1569 
1570 		sort_strs = NULL;
1571 		sort_syms = NULL;
1572 		(void) mutex_unlock(&sort_mtx);
1573 	}
1574 
1575 	free(syms);
1576 }
1577 
1578 
1579 static Elf *
build_fake_elf(struct ps_prochandle * P,file_info_t * fptr,GElf_Ehdr * ehdr,size_t * nshdrs,Elf_Data ** shdata)1580 build_fake_elf(struct ps_prochandle *P, file_info_t *fptr, GElf_Ehdr *ehdr,
1581     size_t *nshdrs, Elf_Data **shdata)
1582 {
1583 	size_t shstrndx;
1584 	Elf_Scn *scn;
1585 	Elf *elf;
1586 
1587 	if ((elf = fake_elf(P, fptr)) == NULL ||
1588 	    elf_kind(elf) != ELF_K_ELF ||
1589 	    gelf_getehdr(elf, ehdr) == NULL ||
1590 	    elf_getshdrnum(elf, nshdrs) == -1 ||
1591 	    elf_getshdrstrndx(elf, &shstrndx) == -1 ||
1592 	    (scn = elf_getscn(elf, shstrndx)) == NULL ||
1593 	    (*shdata = elf_getdata(scn, NULL)) == NULL) {
1594 		if (elf != NULL)
1595 			(void) elf_end(elf);
1596 		Pdprintf("failed to fake up ELF file\n");
1597 		return (NULL);
1598 	}
1599 
1600 	return (elf);
1601 }
1602 
1603 /*
1604  * Try and find the file described by path in the file system and validate that
1605  * it matches our CRC before we try and process it for symbol information. If we
1606  * instead have an ELF data section, then that means we're checking a build-id
1607  * section instead. In that case we just need to find and bcmp the corresponding
1608  * section.
1609  *
1610  * Before we validate if it's a valid CRC or data section, we check to ensure
1611  * that it's a normal file and not anything else.
1612  */
1613 static boolean_t
build_alt_debug(file_info_t * fptr,const char * path,uint32_t crc,Elf_Data * data)1614 build_alt_debug(file_info_t *fptr, const char *path, uint32_t crc,
1615     Elf_Data *data)
1616 {
1617 	int fd;
1618 	struct stat st;
1619 	Elf *elf;
1620 	Elf_Scn *scn;
1621 	GElf_Shdr symshdr, strshdr;
1622 	Elf_Data *symdata, *strdata;
1623 	boolean_t valid;
1624 	uint32_t c = -1U;
1625 
1626 	if ((fd = open(path, O_RDONLY)) < 0)
1627 		return (B_FALSE);
1628 
1629 	if (fstat(fd, &st) != 0) {
1630 		(void) close(fd);
1631 		return (B_FALSE);
1632 	}
1633 
1634 	if (S_ISREG(st.st_mode) == 0) {
1635 		(void) close(fd);
1636 		return (B_FALSE);
1637 	}
1638 
1639 	/*
1640 	 * Only check the CRC if we've come here through a GNU debug link
1641 	 * section as opposed to the build id. This is indicated by having the
1642 	 * value of data be NULL.
1643 	 */
1644 	if (data == NULL) {
1645 		for (;;) {
1646 			char buf[4096];
1647 			ssize_t ret = read(fd, buf, sizeof (buf));
1648 			if (ret == -1) {
1649 				if (ret == EINTR)
1650 					continue;
1651 				(void) close(fd);
1652 				return (B_FALSE);
1653 			}
1654 			if (ret == 0) {
1655 				c = ~c;
1656 				if (c != crc) {
1657 					Pdprintf("crc mismatch, found: 0x%x "
1658 					    "expected 0x%x\n", c, crc);
1659 					(void) close(fd);
1660 					return (B_FALSE);
1661 				}
1662 				break;
1663 			}
1664 			CRC32(c, buf, ret, c, psym_crc32);
1665 		}
1666 	}
1667 
1668 	elf = elf_begin(fd, ELF_C_READ, NULL);
1669 	if (elf == NULL) {
1670 		(void) close(fd);
1671 		return (B_FALSE);
1672 	}
1673 
1674 	if (elf_kind(elf) != ELF_K_ELF) {
1675 		goto fail;
1676 	}
1677 
1678 	/*
1679 	 * If we have a data section, that indicates we have a build-id which
1680 	 * means we need to find the corresponding build-id section and compare
1681 	 * it.
1682 	 */
1683 	scn = NULL;
1684 	valid = B_FALSE;
1685 	for (scn = elf_nextscn(elf, scn); data != NULL && scn != NULL;
1686 	    scn = elf_nextscn(elf, scn)) {
1687 		GElf_Shdr hdr;
1688 		Elf_Data *ntdata;
1689 
1690 		if (gelf_getshdr(scn, &hdr) == NULL)
1691 			goto fail;
1692 
1693 		if (hdr.sh_type != SHT_NOTE)
1694 			continue;
1695 
1696 		if ((ntdata = elf_getdata(scn, NULL)) == NULL)
1697 			goto fail;
1698 
1699 		/*
1700 		 * First verify the data section sizes are equal, then the
1701 		 * section name. If that's all true, then we can just do a bcmp.
1702 		 */
1703 		if (data->d_size != ntdata->d_size)
1704 			continue;
1705 
1706 		Pdprintf("found corresponding section in alternate file\n");
1707 		if (bcmp(ntdata->d_buf, data->d_buf, data->d_size) != 0)
1708 			goto fail;
1709 
1710 		valid = B_TRUE;
1711 		break;
1712 	}
1713 	if (data != NULL && valid == B_FALSE) {
1714 		Pdprintf("failed to find a matching %s section in %s\n",
1715 		    BUILDID_NAME, path);
1716 		goto fail;
1717 	}
1718 
1719 
1720 	/*
1721 	 * Do two passes, first see if we have a symbol header, then see if we
1722 	 * can find the corresponding linked string table.
1723 	 */
1724 	scn = NULL;
1725 	for (scn = elf_nextscn(elf, scn); scn != NULL;
1726 	    scn = elf_nextscn(elf, scn)) {
1727 
1728 		if (gelf_getshdr(scn, &symshdr) == NULL)
1729 			goto fail;
1730 
1731 		if (symshdr.sh_type != SHT_SYMTAB)
1732 			continue;
1733 
1734 		if ((symdata = elf_getdata(scn, NULL)) == NULL)
1735 			goto fail;
1736 
1737 		break;
1738 	}
1739 	if (scn == NULL)
1740 		goto fail;
1741 
1742 	if ((scn = elf_getscn(elf, symshdr.sh_link)) == NULL)
1743 		goto fail;
1744 
1745 	if (gelf_getshdr(scn, &strshdr) == NULL)
1746 		goto fail;
1747 
1748 	if ((strdata = elf_getdata(scn, NULL)) == NULL)
1749 		goto fail;
1750 
1751 	fptr->file_symtab.sym_data_pri = symdata;
1752 	fptr->file_symtab.sym_symn += symshdr.sh_size / symshdr.sh_entsize;
1753 	fptr->file_symtab.sym_strs = strdata->d_buf;
1754 	fptr->file_symtab.sym_strsz = strdata->d_size;
1755 	fptr->file_symtab.sym_hdr_pri = symshdr;
1756 	fptr->file_symtab.sym_strhdr = strshdr;
1757 
1758 	Pdprintf(
1759 	    "successfully loaded additional debug symbols for %s from %s\n",
1760 	    fptr->file_rname, path);
1761 
1762 	fptr->file_dbgfile = fd;
1763 	fptr->file_dbgelf = elf;
1764 	return (B_TRUE);
1765 fail:
1766 	(void) elf_end(elf);
1767 	(void) close(fd);
1768 	return (B_FALSE);
1769 }
1770 
1771 /*
1772  * We're here because the object in question has no symbol information, that's a
1773  * bit unfortunate. However, we've found that there's a .gnu_debuglink sitting
1774  * around. By convention that means that given the current location of the
1775  * object on disk, and the debug name that we found in the binary we need to
1776  * search the following locations for a matching file.
1777  *
1778  * <dirname>/.debug/<debug-name>
1779  * /usr/lib/debug/<dirname>/<debug-name>
1780  *
1781  * In the future, we should consider supporting looking in the prefix's
1782  * lib/debug directory for a matching object or supporting an arbitrary user
1783  * defined set of places to look.
1784  */
1785 static void
find_alt_debuglink(file_info_t * fptr,const char * name,uint32_t crc)1786 find_alt_debuglink(file_info_t *fptr, const char *name, uint32_t crc)
1787 {
1788 	boolean_t r;
1789 	char *dup = NULL, *path = NULL, *dname;
1790 
1791 	Pdprintf("find_alt_debug: looking for %s, crc 0x%x\n", name, crc);
1792 	if (fptr->file_rname == NULL) {
1793 		Pdprintf("find_alt_debug: encountered null file_rname\n");
1794 		return;
1795 	}
1796 
1797 	dup = strdup(fptr->file_rname);
1798 	if (dup == NULL)
1799 		return;
1800 
1801 	dname = dirname(dup);
1802 	if (asprintf(&path, "%s/.debug/%s", dname, name) != -1) {
1803 		Pdprintf("attempting to load alternate debug information "
1804 		    "from %s\n", path);
1805 		r = build_alt_debug(fptr, path, crc, NULL);
1806 		free(path);
1807 		if (r == B_TRUE)
1808 			goto out;
1809 	}
1810 
1811 	if (asprintf(&path, "/usr/lib/debug/%s/%s", dname, name) != -1) {
1812 		Pdprintf("attempting to load alternate debug information "
1813 		    "from %s\n", path);
1814 		r = build_alt_debug(fptr, path, crc, NULL);
1815 		free(path);
1816 		if (r == B_TRUE)
1817 			goto out;
1818 	}
1819 out:
1820 	free(dup);
1821 }
1822 
1823 /*
1824  * Build the symbol table for the given mapped file.
1825  */
1826 void
Pbuild_file_symtab(struct ps_prochandle * P,file_info_t * fptr)1827 Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr)
1828 {
1829 	char objectfile[PATH_MAX];
1830 	uint_t i;
1831 
1832 	GElf_Ehdr ehdr;
1833 	GElf_Sym s;
1834 
1835 	Elf_Data *shdata;
1836 	Elf_Scn *scn;
1837 	Elf *elf;
1838 	size_t nshdrs, shstrndx;
1839 
1840 	struct {
1841 		GElf_Shdr c_shdr;
1842 		Elf_Data *c_data;
1843 		const char *c_name;
1844 	} *cp, *cache = NULL, *dyn = NULL, *plt = NULL, *ctf = NULL,
1845 	*dbglink = NULL, *buildid = NULL;
1846 
1847 	if (fptr->file_init)
1848 		return;	/* We've already processed this file */
1849 
1850 	/*
1851 	 * Mark the file_info struct as having the symbol table initialized
1852 	 * even if we fail below.  We tried once; we don't try again.
1853 	 */
1854 	fptr->file_init = 1;
1855 
1856 	if (elf_version(EV_CURRENT) == EV_NONE) {
1857 		Pdprintf("libproc ELF version is more recent than libelf\n");
1858 		return;
1859 	}
1860 
1861 	if (P->state == PS_DEAD || P->state == PS_IDLE) {
1862 		char *name;
1863 		/*
1864 		 * If we're a not live, we can't open files from the /proc
1865 		 * object directory; we have only the mapping and file names
1866 		 * to guide us.  We prefer the file_lname, but need to handle
1867 		 * the case of it being NULL in order to bootstrap: we first
1868 		 * come here during rd_new() when the only information we have
1869 		 * is interpreter name associated with the AT_BASE mapping.
1870 		 *
1871 		 * Also, if the zone associated with the core file seems
1872 		 * to exists on this machine we'll try to open the object
1873 		 * file within the zone.
1874 		 */
1875 		if (fptr->file_rname != NULL)
1876 			name = fptr->file_rname;
1877 		else if (fptr->file_lname != NULL)
1878 			name = fptr->file_lname;
1879 		else
1880 			name = fptr->file_pname;
1881 		(void) strlcpy(objectfile, name, sizeof (objectfile));
1882 	} else {
1883 		(void) snprintf(objectfile, sizeof (objectfile),
1884 		    "%s/%d/object/%s",
1885 		    procfs_path, (int)P->pid, fptr->file_pname);
1886 	}
1887 
1888 	/*
1889 	 * Open the object file, create the elf file, and then get the elf
1890 	 * header and .shstrtab data buffer so we can process sections by
1891 	 * name. If anything goes wrong try to fake up an elf file from
1892 	 * the in-core elf image.
1893 	 */
1894 
1895 	if (_libproc_incore_elf || (P->flags & INCORE)) {
1896 		Pdprintf("Pbuild_file_symtab: using in-core data for: %s\n",
1897 		    fptr->file_pname);
1898 
1899 		if ((elf = build_fake_elf(P, fptr, &ehdr, &nshdrs, &shdata)) ==
1900 		    NULL)
1901 			return;
1902 
1903 	} else if ((fptr->file_fd = open(objectfile, O_RDONLY)) < 0) {
1904 		Pdprintf("Pbuild_file_symtab: failed to open %s: %s\n",
1905 		    objectfile, strerror(errno));
1906 
1907 		if ((elf = build_fake_elf(P, fptr, &ehdr, &nshdrs, &shdata)) ==
1908 		    NULL)
1909 			return;
1910 
1911 	} else if ((elf = elf_begin(fptr->file_fd, ELF_C_READ, NULL)) == NULL ||
1912 	    elf_kind(elf) != ELF_K_ELF ||
1913 	    gelf_getehdr(elf, &ehdr) == NULL ||
1914 	    elf_getshdrnum(elf, &nshdrs) == -1 ||
1915 	    elf_getshdrstrndx(elf, &shstrndx) == -1 ||
1916 	    (scn = elf_getscn(elf, shstrndx)) == NULL ||
1917 	    (shdata = elf_getdata(scn, NULL)) == NULL) {
1918 		int err = elf_errno();
1919 
1920 		Pdprintf("failed to process ELF file %s: %s\n",
1921 		    objectfile, (err == 0) ? "<null>" : elf_errmsg(err));
1922 		(void) elf_end(elf);
1923 
1924 		if ((elf = build_fake_elf(P, fptr, &ehdr, &nshdrs, &shdata)) ==
1925 		    NULL)
1926 			return;
1927 
1928 	} else if (file_differs(P, elf, fptr)) {
1929 		Elf *newelf;
1930 
1931 		/*
1932 		 * Before we get too excited about this elf file, we'll check
1933 		 * its checksum value against the value we have in memory. If
1934 		 * they don't agree, we try to fake up a new elf file and
1935 		 * proceed with that instead.
1936 		 */
1937 		Pdprintf("ELF file %s (%lx) doesn't match in-core image\n",
1938 		    fptr->file_pname,
1939 		    (ulong_t)fptr->file_map->map_pmap.pr_vaddr);
1940 
1941 		if ((newelf = build_fake_elf(P, fptr, &ehdr, &nshdrs, &shdata))
1942 		    != NULL) {
1943 			(void) elf_end(elf);
1944 			elf = newelf;
1945 			Pdprintf("switched to faked up ELF file\n");
1946 
1947 			/*
1948 			 * Check to see if the file that we just discovered
1949 			 * to be an imposter matches the execname that was
1950 			 * determined by Pfindexec().  If it does, we (clearly)
1951 			 * don't have the right binary, and we zero out
1952 			 * execname before anyone gets hurt.
1953 			 */
1954 			if (fptr->file_rname != NULL && P->execname != NULL &&
1955 			    strcmp(fptr->file_rname, P->execname) == 0) {
1956 				Pdprintf("file/in-core image mismatch was "
1957 				    "on P->execname; discarding\n");
1958 				free(P->execname);
1959 				P->execname = NULL;
1960 			}
1961 		}
1962 	}
1963 
1964 	if ((cache = malloc(nshdrs * sizeof (*cache))) == NULL) {
1965 		Pdprintf("failed to malloc section cache for %s\n", objectfile);
1966 		goto bad;
1967 	}
1968 
1969 	Pdprintf("processing ELF file %s\n", objectfile);
1970 	fptr->file_class = ehdr.e_ident[EI_CLASS];
1971 	fptr->file_etype = ehdr.e_type;
1972 	fptr->file_elf = elf;
1973 	fptr->file_shstrs = shdata->d_buf;
1974 	fptr->file_shstrsz = shdata->d_size;
1975 
1976 	/*
1977 	 * Iterate through each section, caching its section header, data
1978 	 * pointer, and name.  We use this for handling sh_link values below.
1979 	 */
1980 	for (cp = cache + 1, scn = NULL; scn = elf_nextscn(elf, scn); cp++) {
1981 		if (gelf_getshdr(scn, &cp->c_shdr) == NULL) {
1982 			Pdprintf("Pbuild_file_symtab: Failed to get section "
1983 			    "header\n");
1984 			goto bad; /* Failed to get section header */
1985 		}
1986 
1987 		if ((cp->c_data = elf_getdata(scn, NULL)) == NULL) {
1988 			Pdprintf("Pbuild_file_symtab: Failed to get section "
1989 			    "data\n");
1990 			goto bad; /* Failed to get section data */
1991 		}
1992 
1993 		if (cp->c_shdr.sh_name >= shdata->d_size) {
1994 			Pdprintf("Pbuild_file_symtab: corrupt section name");
1995 			goto bad; /* Corrupt section name */
1996 		}
1997 
1998 		cp->c_name = (const char *)shdata->d_buf + cp->c_shdr.sh_name;
1999 	}
2000 
2001 	/*
2002 	 * Now iterate through the section cache in order to locate info
2003 	 * for the .symtab, .dynsym, .SUNW_ldynsym, .dynamic, .plt,
2004 	 * and .SUNW_ctf sections:
2005 	 */
2006 	for (i = 1, cp = cache + 1; i < nshdrs; i++, cp++) {
2007 		GElf_Shdr *shp = &cp->c_shdr;
2008 
2009 		if (shp->sh_type == SHT_SYMTAB || shp->sh_type == SHT_DYNSYM) {
2010 			sym_tbl_t *symp = shp->sh_type == SHT_SYMTAB ?
2011 			    &fptr->file_symtab : &fptr->file_dynsym;
2012 			/*
2013 			 * It's possible that the we already got the symbol
2014 			 * table from the core file itself. Either the file
2015 			 * differs in which case our faked up elf file will
2016 			 * only contain the dynsym (not the symtab) or the
2017 			 * file matches in which case we'll just be replacing
2018 			 * the symbol table we pulled out of the core file
2019 			 * with an equivalent one. In either case, this
2020 			 * check isn't essential, but it's a good idea.
2021 			 */
2022 			if (symp->sym_data_pri == NULL) {
2023 				Pdprintf("Symbol table found for %s\n",
2024 				    objectfile);
2025 				symp->sym_data_pri = cp->c_data;
2026 				symp->sym_symn +=
2027 				    shp->sh_size / shp->sh_entsize;
2028 				symp->sym_strs =
2029 				    cache[shp->sh_link].c_data->d_buf;
2030 				symp->sym_strsz =
2031 				    cache[shp->sh_link].c_data->d_size;
2032 				symp->sym_hdr_pri = cp->c_shdr;
2033 				symp->sym_strhdr = cache[shp->sh_link].c_shdr;
2034 			} else {
2035 				Pdprintf("Symbol table already there for %s\n",
2036 				    objectfile);
2037 			}
2038 		} else if (shp->sh_type == SHT_SUNW_LDYNSYM) {
2039 			/* .SUNW_ldynsym section is auxiliary to .dynsym */
2040 			if (fptr->file_dynsym.sym_data_aux == NULL) {
2041 				Pdprintf(".SUNW_ldynsym symbol table"
2042 				    " found for %s\n", objectfile);
2043 				fptr->file_dynsym.sym_data_aux = cp->c_data;
2044 				fptr->file_dynsym.sym_symn_aux =
2045 				    shp->sh_size / shp->sh_entsize;
2046 				fptr->file_dynsym.sym_symn +=
2047 				    fptr->file_dynsym.sym_symn_aux;
2048 				fptr->file_dynsym.sym_hdr_aux = cp->c_shdr;
2049 			} else {
2050 				Pdprintf(".SUNW_ldynsym symbol table already"
2051 				    " there for %s\n", objectfile);
2052 			}
2053 		} else if (shp->sh_type == SHT_DYNAMIC) {
2054 			dyn = cp;
2055 		} else if (strcmp(cp->c_name, ".plt") == 0) {
2056 			plt = cp;
2057 		} else if (strcmp(cp->c_name, ".SUNW_ctf") == 0) {
2058 			/*
2059 			 * Skip over bogus CTF sections so they don't come back
2060 			 * to haunt us later.
2061 			 */
2062 			if (shp->sh_link == 0 ||
2063 			    shp->sh_link >= nshdrs ||
2064 			    (cache[shp->sh_link].c_shdr.sh_type != SHT_DYNSYM &&
2065 			    cache[shp->sh_link].c_shdr.sh_type != SHT_SYMTAB)) {
2066 				Pdprintf("Bad sh_link %d for "
2067 				    "CTF\n", shp->sh_link);
2068 				continue;
2069 			}
2070 			ctf = cp;
2071 		} else if (strcmp(cp->c_name, BUILDID_NAME) == 0) {
2072 			Pdprintf("Found a %s section for %s\n", BUILDID_NAME,
2073 			    fptr->file_rname);
2074 			/* The ElfXX_Nhdr is 32/64-bit neutral */
2075 			if (cp->c_shdr.sh_type == SHT_NOTE &&
2076 			    cp->c_data->d_buf != NULL &&
2077 			    cp->c_data->d_size >= sizeof (Elf32_Nhdr)) {
2078 				Elf32_Nhdr *hdr = cp->c_data->d_buf;
2079 				if (hdr->n_type != 3)
2080 					continue;
2081 				if (hdr->n_namesz != 4)
2082 					continue;
2083 				if (hdr->n_descsz < MINBUILDID)
2084 					continue;
2085 				/* Set a reasonable upper bound */
2086 				if (hdr->n_descsz > MAXBUILDID) {
2087 					Pdprintf("Skipped %s as too large "
2088 					    "(%ld)\n", BUILDID_NAME,
2089 					    (unsigned long)hdr->n_descsz);
2090 					continue;
2091 				}
2092 
2093 				if (cp->c_data->d_size < sizeof (hdr) +
2094 				    hdr->n_namesz + hdr->n_descsz)
2095 					continue;
2096 				buildid = cp;
2097 			}
2098 		} else if (strcmp(cp->c_name, DBGLINK_NAME) == 0) {
2099 			Pdprintf("found %s section for %s\n", DBGLINK_NAME,
2100 			    fptr->file_rname);
2101 			/*
2102 			 * Let's make sure of a few things before we do this.
2103 			 */
2104 			if (cp->c_shdr.sh_type == SHT_PROGBITS &&
2105 			    cp->c_data->d_buf != NULL &&
2106 			    cp->c_data->d_size) {
2107 				dbglink = cp;
2108 			}
2109 		}
2110 	}
2111 
2112 	/*
2113 	 * If we haven't found any symbol table information and we have found
2114 	 * either a .note.gnu.build-id or a .gnu_debuglink, it's time to try and
2115 	 * figure out where we might find this. Originally, GNU used the
2116 	 * .gnu_debuglink solely, but then they added a .note.gnu.build-id. The
2117 	 * build-id is some size, usually 16 or 20 bytes, often a SHA1 sum of
2118 	 * parts of the original file. This is maintained across all versions of
2119 	 * the subsequent file.
2120 	 *
2121 	 * For the .note.gnu.build-id, we're going to check a few things before
2122 	 * using it, first that the name is 4 bytes, and is GNU and that the
2123 	 * type is 3, which they say is the build-id identifier.
2124 	 *
2125 	 * To verify that the elf data for the .gnu_debuglink seems somewhat
2126 	 * sane, eg. the elf data should be a string, so we want to verify we
2127 	 * have a null-terminator.
2128 	 */
2129 	if (fptr->file_symtab.sym_data_pri == NULL && buildid != NULL) {
2130 		int i, bo;
2131 		uint8_t *dp;
2132 		char buf[BUILDID_STRLEN], *path;
2133 		Elf32_Nhdr *hdr = buildid->c_data->d_buf;
2134 
2135 		/*
2136 		 * This was checked for validity when assigning the buildid
2137 		 * variable.
2138 		 */
2139 		bzero(buf, sizeof (buf));
2140 		dp = (uint8_t *)((uintptr_t)hdr + sizeof (*hdr) +
2141 		    hdr->n_namesz);
2142 		for (i = 0, bo = 0; i < hdr->n_descsz; i++, bo += 2, dp++) {
2143 			assert(sizeof (buf) - bo > 0);
2144 
2145 			/*
2146 			 * Recall that the build-id is structured as a series of
2147 			 * bytes. However, the first two characters are supposed
2148 			 * to represent a directory. Hence, once we reach offset
2149 			 * two, we insert a '/' character.
2150 			 */
2151 			if (bo == 2) {
2152 				buf[bo] = '/';
2153 				bo++;
2154 			}
2155 			(void) snprintf(buf + bo, sizeof (buf) - bo, "%2x",
2156 			    *dp);
2157 		}
2158 
2159 		if (asprintf(&path, "/usr/lib/debug/.build-id/%s.debug",
2160 		    buf) != -1) {
2161 			boolean_t r;
2162 			Pdprintf("attempting to find build id alternate debug "
2163 			    "file at %s\n", path);
2164 			r = build_alt_debug(fptr, path, 0, buildid->c_data);
2165 			Pdprintf("attempt %s\n", r == B_TRUE ?
2166 			    "succeeded" : "failed");
2167 			free(path);
2168 		} else {
2169 			Pdprintf("failed to construct build id path: %s\n",
2170 			    strerror(errno));
2171 		}
2172 	}
2173 
2174 	if (fptr->file_symtab.sym_data_pri == NULL && dbglink != NULL) {
2175 		char *c = dbglink->c_data->d_buf;
2176 		size_t i;
2177 		boolean_t found = B_FALSE;
2178 		Elf_Data *ed = dbglink->c_data;
2179 		uint32_t crc;
2180 
2181 		for (i = 0; i < ed->d_size; i++) {
2182 			if (c[i] == '\0') {
2183 				uintptr_t off;
2184 				Pdprintf("got .gnu_debuglink terminator at "
2185 				    "offset %lu\n", (unsigned long)i);
2186 				/*
2187 				 * After the null terminator, there should be
2188 				 * padding, followed by a 4 byte CRC of the
2189 				 * file. If we don't see this, we're going to
2190 				 * assume this is bogus.
2191 				 */
2192 				if ((i % sizeof (uint32_t)) == 0) {
2193 					i += 4;
2194 				} else {
2195 					i += sizeof (uint32_t) -
2196 					    (i % sizeof (uint32_t));
2197 				}
2198 				if (i + sizeof (uint32_t) ==
2199 				    dbglink->c_data->d_size) {
2200 					found = B_TRUE;
2201 					off = (uintptr_t)ed->d_buf + i;
2202 					crc = *(uint32_t *)off;
2203 				} else {
2204 					Pdprintf(".gnu_debuglink size mismatch,"
2205 					    " expected: %lu, found: %lu\n",
2206 					    (unsigned long)i,
2207 					    (unsigned long)ed->d_size);
2208 				}
2209 				break;
2210 			}
2211 		}
2212 
2213 		if (found == B_TRUE)
2214 			find_alt_debuglink(fptr, dbglink->c_data->d_buf, crc);
2215 	}
2216 
2217 	/*
2218 	 * At this point, we've found all the symbol tables we're ever going
2219 	 * to find: the ones in the loop above and possibly the symtab that
2220 	 * was included in the core file. Before we perform any lookups, we
2221 	 * create sorted versions to optimize for lookups.
2222 	 */
2223 	optimize_symtab(&fptr->file_symtab);
2224 	optimize_symtab(&fptr->file_dynsym);
2225 
2226 	/*
2227 	 * Fill in the base address of the text mapping for shared libraries.
2228 	 * This allows us to translate symbols before librtld_db is ready.
2229 	 */
2230 	if (fptr->file_etype == ET_DYN) {
2231 		fptr->file_dyn_base = fptr->file_map->map_pmap.pr_vaddr -
2232 		    fptr->file_map->map_pmap.pr_offset;
2233 		Pdprintf("setting file_dyn_base for %s to %lx\n",
2234 		    objectfile, (long)fptr->file_dyn_base);
2235 	}
2236 
2237 	/*
2238 	 * Record the CTF section information in the file info structure.
2239 	 */
2240 	if (ctf != NULL) {
2241 		fptr->file_ctf_off = ctf->c_shdr.sh_offset;
2242 		fptr->file_ctf_size = ctf->c_shdr.sh_size;
2243 		if (ctf->c_shdr.sh_link != 0 &&
2244 		    cache[ctf->c_shdr.sh_link].c_shdr.sh_type == SHT_DYNSYM)
2245 			fptr->file_ctf_dyn = 1;
2246 	}
2247 
2248 	if (fptr->file_lo == NULL)
2249 		goto done; /* Nothing else to do if no load object info */
2250 
2251 	/*
2252 	 * If the object is a shared library and we have a different rl_base
2253 	 * value, reset file_dyn_base according to librtld_db's information.
2254 	 */
2255 	if (fptr->file_etype == ET_DYN &&
2256 	    fptr->file_lo->rl_base != fptr->file_dyn_base) {
2257 		Pdprintf("resetting file_dyn_base for %s to %lx\n",
2258 		    objectfile, (long)fptr->file_lo->rl_base);
2259 		fptr->file_dyn_base = fptr->file_lo->rl_base;
2260 	}
2261 
2262 	/*
2263 	 * Fill in the PLT information for this file if a PLT symbol is found.
2264 	 */
2265 	if (sym_by_name(&fptr->file_dynsym, "_PROCEDURE_LINKAGE_TABLE_", &s,
2266 	    NULL) != NULL) {
2267 		fptr->file_plt_base = s.st_value + fptr->file_dyn_base;
2268 		fptr->file_plt_size = (plt != NULL) ? plt->c_shdr.sh_size : 0;
2269 
2270 		/*
2271 		 * Bring the load object up to date; it is the only way the
2272 		 * user has to access the PLT data. The PLT information in the
2273 		 * rd_loadobj_t is not set in the call to map_iter() (the
2274 		 * callback for rd_loadobj_iter) where we set file_lo.
2275 		 */
2276 		fptr->file_lo->rl_plt_base = fptr->file_plt_base;
2277 		fptr->file_lo->rl_plt_size = fptr->file_plt_size;
2278 
2279 		Pdprintf("PLT found at %p, size = %lu\n",
2280 		    (void *)fptr->file_plt_base, (ulong_t)fptr->file_plt_size);
2281 	}
2282 
2283 	/*
2284 	 * Fill in the PLT information.
2285 	 */
2286 	if (dyn != NULL) {
2287 		uintptr_t dynaddr = dyn->c_shdr.sh_addr + fptr->file_dyn_base;
2288 		size_t ndyn = dyn->c_shdr.sh_size / dyn->c_shdr.sh_entsize;
2289 		GElf_Dyn d;
2290 
2291 		for (i = 0; i < ndyn; i++) {
2292 			if (gelf_getdyn(dyn->c_data, i, &d) == NULL)
2293 				continue;
2294 
2295 			switch (d.d_tag) {
2296 			case DT_JMPREL:
2297 				Pdprintf("DT_JMPREL is %p\n",
2298 				    (void *)(uintptr_t)d.d_un.d_ptr);
2299 				fptr->file_jmp_rel =
2300 				    d.d_un.d_ptr + fptr->file_dyn_base;
2301 				break;
2302 			case DT_STRTAB:
2303 				Pdprintf("DT_STRTAB is %p\n",
2304 				    (void *)(uintptr_t)d.d_un.d_ptr);
2305 				break;
2306 			case DT_PLTGOT:
2307 				Pdprintf("DT_PLTGOT is %p\n",
2308 				    (void *)(uintptr_t)d.d_un.d_ptr);
2309 				break;
2310 			case DT_SUNW_SYMTAB:
2311 				Pdprintf("DT_SUNW_SYMTAB is %p\n",
2312 				    (void *)(uintptr_t)d.d_un.d_ptr);
2313 				break;
2314 			case DT_SYMTAB:
2315 				Pdprintf("DT_SYMTAB is %p\n",
2316 				    (void *)(uintptr_t)d.d_un.d_ptr);
2317 				break;
2318 			case DT_HASH:
2319 				Pdprintf("DT_HASH is %p\n",
2320 				    (void *)(uintptr_t)d.d_un.d_ptr);
2321 				break;
2322 			}
2323 		}
2324 
2325 		Pdprintf("_DYNAMIC found at %p, %lu entries, DT_JMPREL = %p\n",
2326 		    (void *)dynaddr, (ulong_t)ndyn, (void *)fptr->file_jmp_rel);
2327 	}
2328 
2329 done:
2330 	free(cache);
2331 	return;
2332 
2333 bad:
2334 	if (cache != NULL)
2335 		free(cache);
2336 
2337 	(void) elf_end(elf);
2338 	fptr->file_elf = NULL;
2339 	if (fptr->file_elfmem != NULL) {
2340 		free(fptr->file_elfmem);
2341 		fptr->file_elfmem = NULL;
2342 	}
2343 	(void) close(fptr->file_fd);
2344 	if (fptr->file_dbgelf != NULL)
2345 		(void) elf_end(fptr->file_dbgelf);
2346 	fptr->file_dbgelf = NULL;
2347 	if (fptr->file_dbgfile >= 0)
2348 		(void) close(fptr->file_dbgfile);
2349 	fptr->file_fd = -1;
2350 	fptr->file_dbgfile = -1;
2351 }
2352 
2353 /*
2354  * Given a process virtual address, return the map_info_t containing it.
2355  * If none found, return NULL.
2356  */
2357 map_info_t *
Paddr2mptr(struct ps_prochandle * P,uintptr_t addr)2358 Paddr2mptr(struct ps_prochandle *P, uintptr_t addr)
2359 {
2360 	int lo = 0;
2361 	int hi = P->map_count - 1;
2362 	int mid;
2363 	map_info_t *mp;
2364 
2365 	while (lo <= hi) {
2366 
2367 		mid = (lo + hi) / 2;
2368 		mp = &P->mappings[mid];
2369 
2370 		/* check that addr is in [vaddr, vaddr + size) */
2371 		if ((addr - mp->map_pmap.pr_vaddr) < mp->map_pmap.pr_size)
2372 			return (mp);
2373 
2374 		if (addr < mp->map_pmap.pr_vaddr)
2375 			hi = mid - 1;
2376 		else
2377 			lo = mid + 1;
2378 	}
2379 
2380 	return (NULL);
2381 }
2382 
2383 /*
2384  * Return the map_info_t for the executable file.
2385  * If not found, return NULL.
2386  */
2387 static map_info_t *
exec_map(struct ps_prochandle * P)2388 exec_map(struct ps_prochandle *P)
2389 {
2390 	uint_t i;
2391 	map_info_t *mptr;
2392 	map_info_t *mold = NULL;
2393 	file_info_t *fptr;
2394 	uintptr_t base;
2395 
2396 	for (i = 0, mptr = P->mappings; i < P->map_count; i++, mptr++) {
2397 		if (mptr->map_pmap.pr_mapname[0] == '\0')
2398 			continue;
2399 		if (strcmp(mptr->map_pmap.pr_mapname, "a.out") == 0) {
2400 			if ((fptr = mptr->map_file) != NULL &&
2401 			    fptr->file_lo != NULL) {
2402 				base = fptr->file_lo->rl_base;
2403 				if (base >= mptr->map_pmap.pr_vaddr &&
2404 				    base < mptr->map_pmap.pr_vaddr +
2405 				    mptr->map_pmap.pr_size)	/* text space */
2406 					return (mptr);
2407 				mold = mptr;	/* must be the data */
2408 				continue;
2409 			}
2410 			/* This is a poor way to test for text space */
2411 			if (!(mptr->map_pmap.pr_mflags & MA_EXEC) ||
2412 			    (mptr->map_pmap.pr_mflags & MA_WRITE)) {
2413 				mold = mptr;
2414 				continue;
2415 			}
2416 			return (mptr);
2417 		}
2418 	}
2419 
2420 	return (mold);
2421 }
2422 
2423 /*
2424  * Given a shared object name, return the map_info_t for it.  If no matching
2425  * object is found, return NULL.  Normally, the link maps contain the full
2426  * object pathname, e.g. /usr/lib/libc.so.1.  We allow the object name to
2427  * take one of the following forms:
2428  *
2429  * 1. An exact match (i.e. a full pathname): "/usr/lib/libc.so.1"
2430  * 2. An exact basename match: "libc.so.1"
2431  * 3. An initial basename match up to a '.' suffix: "libc.so" or "libc"
2432  * 4. The literal string "a.out" is an alias for the executable mapping
2433  *
2434  * The third case is a convenience for callers and may not be necessary.
2435  *
2436  * As the exact same object name may be loaded on different link maps (see
2437  * dlmopen(3C)), we also allow the caller to resolve the object name by
2438  * specifying a particular link map id.  If lmid is PR_LMID_EVERY, the
2439  * first matching name will be returned, regardless of the link map id.
2440  */
2441 static map_info_t *
object_to_map(struct ps_prochandle * P,Lmid_t lmid,const char * objname)2442 object_to_map(struct ps_prochandle *P, Lmid_t lmid, const char *objname)
2443 {
2444 	map_info_t *mp;
2445 	file_info_t *fp;
2446 	size_t objlen;
2447 	uint_t i;
2448 
2449 	/*
2450 	 * If we have no rtld_db, then always treat a request as one for all
2451 	 * link maps.
2452 	 */
2453 	if (P->rap == NULL)
2454 		lmid = PR_LMID_EVERY;
2455 
2456 	/*
2457 	 * First pass: look for exact matches of the entire pathname or
2458 	 * basename (cases 1 and 2 above):
2459 	 */
2460 	for (i = 0, mp = P->mappings; i < P->map_count; i++, mp++) {
2461 
2462 		if (mp->map_pmap.pr_mapname[0] == '\0' ||
2463 		    (fp = mp->map_file) == NULL ||
2464 		    ((fp->file_lname == NULL) && (fp->file_rname == NULL)))
2465 			continue;
2466 
2467 		if (lmid != PR_LMID_EVERY &&
2468 		    (fp->file_lo == NULL || lmid != fp->file_lo->rl_lmident))
2469 			continue;
2470 
2471 		/*
2472 		 * If we match, return the primary text mapping; otherwise
2473 		 * just return the mapping we matched.
2474 		 */
2475 		if ((fp->file_lbase && strcmp(fp->file_lbase, objname) == 0) ||
2476 		    (fp->file_rbase && strcmp(fp->file_rbase, objname) == 0) ||
2477 		    (fp->file_lname && strcmp(fp->file_lname, objname) == 0) ||
2478 		    (fp->file_rname && strcmp(fp->file_rname, objname) == 0))
2479 			return (fp->file_map ? fp->file_map : mp);
2480 	}
2481 
2482 	objlen = strlen(objname);
2483 
2484 	/*
2485 	 * Second pass: look for partial matches (case 3 above):
2486 	 */
2487 	for (i = 0, mp = P->mappings; i < P->map_count; i++, mp++) {
2488 
2489 		if (mp->map_pmap.pr_mapname[0] == '\0' ||
2490 		    (fp = mp->map_file) == NULL ||
2491 		    ((fp->file_lname == NULL) && (fp->file_rname == NULL)))
2492 			continue;
2493 
2494 		if (lmid != PR_LMID_EVERY &&
2495 		    (fp->file_lo == NULL || lmid != fp->file_lo->rl_lmident))
2496 			continue;
2497 
2498 		/*
2499 		 * If we match, return the primary text mapping; otherwise
2500 		 * just return the mapping we matched.
2501 		 */
2502 		if ((fp->file_lbase != NULL) &&
2503 		    (strncmp(fp->file_lbase, objname, objlen) == 0) &&
2504 		    (fp->file_lbase[objlen] == '.'))
2505 			return (fp->file_map ? fp->file_map : mp);
2506 		if ((fp->file_rbase != NULL) &&
2507 		    (strncmp(fp->file_rbase, objname, objlen) == 0) &&
2508 		    (fp->file_rbase[objlen] == '.'))
2509 			return (fp->file_map ? fp->file_map : mp);
2510 	}
2511 
2512 	/*
2513 	 * One last check: we allow "a.out" to always alias the executable,
2514 	 * assuming this name was not in use for something else.
2515 	 */
2516 	if ((lmid == PR_LMID_EVERY || lmid == LM_ID_BASE) &&
2517 	    (strcmp(objname, "a.out") == 0))
2518 		return (P->map_exec);
2519 
2520 	return (NULL);
2521 }
2522 
2523 static map_info_t *
object_name_to_map(struct ps_prochandle * P,Lmid_t lmid,const char * name)2524 object_name_to_map(struct ps_prochandle *P, Lmid_t lmid, const char *name)
2525 {
2526 	map_info_t *mptr;
2527 
2528 	if (!P->info_valid)
2529 		Pupdate_maps(P);
2530 
2531 	if (P->map_exec == NULL && ((mptr = Paddr2mptr(P,
2532 	    Pgetauxval(P, AT_ENTRY))) != NULL || (mptr = exec_map(P)) != NULL))
2533 		P->map_exec = mptr;
2534 
2535 	if (P->map_ldso == NULL && (mptr = Paddr2mptr(P,
2536 	    Pgetauxval(P, AT_BASE))) != NULL)
2537 		P->map_ldso = mptr;
2538 
2539 	if (name == PR_OBJ_EXEC)
2540 		mptr = P->map_exec;
2541 	else if (name == PR_OBJ_LDSO)
2542 		mptr = P->map_ldso;
2543 	else if (Prd_agent(P) != NULL || P->state == PS_IDLE)
2544 		mptr = object_to_map(P, lmid, name);
2545 	else
2546 		mptr = NULL;
2547 
2548 	return (mptr);
2549 }
2550 
2551 /*
2552  * When two symbols are found by address, decide which one is to be preferred.
2553  */
2554 static GElf_Sym *
sym_prefer(GElf_Sym * sym1,char * name1,GElf_Sym * sym2,char * name2)2555 sym_prefer(GElf_Sym *sym1, char *name1, GElf_Sym *sym2, char *name2)
2556 {
2557 	/*
2558 	 * Prefer the non-NULL symbol.
2559 	 */
2560 	if (sym1 == NULL)
2561 		return (sym2);
2562 	if (sym2 == NULL)
2563 		return (sym1);
2564 
2565 	/*
2566 	 * Defer to the sort ordering...
2567 	 */
2568 	return (byaddr_cmp_common(sym1, name1, sym2, name2) <= 0 ? sym1 : sym2);
2569 }
2570 
2571 /*
2572  * Use a binary search to do the work of sym_by_addr().
2573  */
2574 static GElf_Sym *
sym_by_addr_binary(sym_tbl_t * symtab,GElf_Addr addr,GElf_Sym * symp,uint_t * idp)2575 sym_by_addr_binary(sym_tbl_t *symtab, GElf_Addr addr, GElf_Sym *symp,
2576     uint_t *idp)
2577 {
2578 	GElf_Sym sym, osym;
2579 	uint_t i, oid, *byaddr = symtab->sym_byaddr;
2580 	int min, max, mid, omid, found = 0;
2581 
2582 	if (symtab->sym_data_pri == NULL || symtab->sym_count == 0)
2583 		return (NULL);
2584 
2585 	min = 0;
2586 	max = symtab->sym_count - 1;
2587 	osym.st_value = 0;
2588 
2589 	/*
2590 	 * We can't return when we've found a match, we have to continue
2591 	 * searching for the closest matching symbol.
2592 	 */
2593 	while (min <= max) {
2594 		mid = (max + min) / 2;
2595 
2596 		i = byaddr[mid];
2597 		(void) symtab_getsym(symtab, i, &sym);
2598 
2599 		if (addr >= sym.st_value &&
2600 		    addr < sym.st_value + sym.st_size &&
2601 		    (!found || sym.st_value > osym.st_value)) {
2602 			osym = sym;
2603 			omid = mid;
2604 			oid = i;
2605 			found = 1;
2606 		}
2607 
2608 		if (addr < sym.st_value)
2609 			max = mid - 1;
2610 		else
2611 			min = mid + 1;
2612 	}
2613 
2614 	if (!found)
2615 		return (NULL);
2616 
2617 	/*
2618 	 * There may be many symbols with identical values so we walk
2619 	 * backward in the byaddr table to find the best match.
2620 	 */
2621 	do {
2622 		sym = osym;
2623 		i = oid;
2624 
2625 		if (omid == 0)
2626 			break;
2627 
2628 		oid = byaddr[--omid];
2629 		(void) symtab_getsym(symtab, oid, &osym);
2630 	} while (addr >= osym.st_value &&
2631 	    addr < sym.st_value + osym.st_size &&
2632 	    osym.st_value == sym.st_value);
2633 
2634 	*symp = sym;
2635 	if (idp != NULL)
2636 		*idp = i;
2637 	return (symp);
2638 }
2639 
2640 /*
2641  * Use a linear search to do the work of sym_by_addr().
2642  */
2643 static GElf_Sym *
sym_by_addr_linear(sym_tbl_t * symtab,GElf_Addr addr,GElf_Sym * symbolp,uint_t * idp)2644 sym_by_addr_linear(sym_tbl_t *symtab, GElf_Addr addr, GElf_Sym *symbolp,
2645     uint_t *idp)
2646 {
2647 	size_t symn = symtab->sym_symn;
2648 	char *strs = symtab->sym_strs;
2649 	GElf_Sym sym, *symp = NULL;
2650 	GElf_Sym osym, *osymp = NULL;
2651 	int i, id;
2652 
2653 	if (symtab->sym_data_pri == NULL || symn == 0 || strs == NULL)
2654 		return (NULL);
2655 
2656 	for (i = 0; i < symn; i++) {
2657 		if ((symp = symtab_getsym(symtab, i, &sym)) != NULL) {
2658 			if (addr >= sym.st_value &&
2659 			    addr < sym.st_value + sym.st_size) {
2660 				if (osymp)
2661 					symp = sym_prefer(
2662 					    symp, strs + symp->st_name,
2663 					    osymp, strs + osymp->st_name);
2664 				if (symp != osymp) {
2665 					osym = sym;
2666 					osymp = &osym;
2667 					id = i;
2668 				}
2669 			}
2670 		}
2671 	}
2672 	if (osymp) {
2673 		*symbolp = osym;
2674 		if (idp)
2675 			*idp = id;
2676 		return (symbolp);
2677 	}
2678 	return (NULL);
2679 }
2680 
2681 /*
2682  * Look up a symbol by address in the specified symbol table.
2683  * Adjustment to 'addr' must already have been made for the
2684  * offset of the symbol if this is a dynamic library symbol table.
2685  *
2686  * Use a linear or a binary search depending on whether or not we
2687  * chose to sort the table in optimize_symtab().
2688  */
2689 static GElf_Sym *
sym_by_addr(sym_tbl_t * symtab,GElf_Addr addr,GElf_Sym * symp,uint_t * idp)2690 sym_by_addr(sym_tbl_t *symtab, GElf_Addr addr, GElf_Sym *symp, uint_t *idp)
2691 {
2692 	if (_libproc_no_qsort) {
2693 		return (sym_by_addr_linear(symtab, addr, symp, idp));
2694 	} else {
2695 		return (sym_by_addr_binary(symtab, addr, symp, idp));
2696 	}
2697 }
2698 
2699 /*
2700  * Use a binary search to do the work of sym_by_name().
2701  */
2702 static GElf_Sym *
sym_by_name_binary(sym_tbl_t * symtab,const char * name,GElf_Sym * symp,uint_t * idp)2703 sym_by_name_binary(sym_tbl_t *symtab, const char *name, GElf_Sym *symp,
2704     uint_t *idp)
2705 {
2706 	char *strs = symtab->sym_strs;
2707 	uint_t i, *byname = symtab->sym_byname;
2708 	int min, mid, max, cmp;
2709 
2710 	if (symtab->sym_data_pri == NULL || strs == NULL ||
2711 	    symtab->sym_count == 0)
2712 		return (NULL);
2713 
2714 	min = 0;
2715 	max = symtab->sym_count - 1;
2716 
2717 	while (min <= max) {
2718 		mid = (max + min) / 2;
2719 
2720 		i = byname[mid];
2721 		(void) symtab_getsym(symtab, i, symp);
2722 
2723 		if ((cmp = strcmp(name, strs + symp->st_name)) == 0) {
2724 			if (idp != NULL)
2725 				*idp = i;
2726 			return (symp);
2727 		}
2728 
2729 		if (cmp < 0)
2730 			max = mid - 1;
2731 		else
2732 			min = mid + 1;
2733 	}
2734 
2735 	return (NULL);
2736 }
2737 
2738 /*
2739  * Use a linear search to do the work of sym_by_name().
2740  */
2741 static GElf_Sym *
sym_by_name_linear(sym_tbl_t * symtab,const char * name,GElf_Sym * symp,uint_t * idp)2742 sym_by_name_linear(sym_tbl_t *symtab, const char *name, GElf_Sym *symp,
2743     uint_t *idp)
2744 {
2745 	size_t symn = symtab->sym_symn;
2746 	char *strs = symtab->sym_strs;
2747 	int i;
2748 
2749 	if (symtab->sym_data_pri == NULL || symn == 0 || strs == NULL)
2750 		return (NULL);
2751 
2752 	for (i = 0; i < symn; i++) {
2753 		if (symtab_getsym(symtab, i, symp) &&
2754 		    strcmp(name, strs + symp->st_name) == 0) {
2755 			if (idp)
2756 				*idp = i;
2757 			return (symp);
2758 		}
2759 	}
2760 
2761 	return (NULL);
2762 }
2763 
2764 /*
2765  * Look up a symbol by name in the specified symbol table.
2766  *
2767  * Use a linear or a binary search depending on whether or not we
2768  * chose to sort the table in optimize_symtab().
2769  */
2770 static GElf_Sym *
sym_by_name(sym_tbl_t * symtab,const char * name,GElf_Sym * symp,uint_t * idp)2771 sym_by_name(sym_tbl_t *symtab, const char *name, GElf_Sym *symp, uint_t *idp)
2772 {
2773 	if (_libproc_no_qsort) {
2774 		return (sym_by_name_linear(symtab, name, symp, idp));
2775 	} else {
2776 		return (sym_by_name_binary(symtab, name, symp, idp));
2777 	}
2778 }
2779 
2780 /*
2781  * Search the process symbol tables looking for a symbol whose
2782  * value to value+size contain the address specified by addr.
2783  * Return values are:
2784  *	sym_name_buffer containing the symbol name
2785  *	GElf_Sym symbol table entry
2786  *	prsyminfo_t ancillary symbol information
2787  * Returns 0 on success, -1 on failure.
2788  */
2789 static int
i_Pxlookup_by_addr(struct ps_prochandle * P,int lmresolve,uintptr_t addr,char * sym_name_buffer,size_t bufsize,GElf_Sym * symbolp,prsyminfo_t * sip)2790 i_Pxlookup_by_addr(
2791 	struct ps_prochandle *P,
2792 	int lmresolve,			/* use resolve linker object names */
2793 	uintptr_t addr,			/* process address being sought */
2794 	char *sym_name_buffer,		/* buffer for the symbol name */
2795 	size_t bufsize,			/* size of sym_name_buffer */
2796 	GElf_Sym *symbolp,		/* returned symbol table entry */
2797 	prsyminfo_t *sip)		/* returned symbol info */
2798 {
2799 	GElf_Sym	*symp;
2800 	char		*name;
2801 	GElf_Sym	sym1, *sym1p = NULL;
2802 	GElf_Sym	sym2, *sym2p = NULL;
2803 	char		*name1 = NULL;
2804 	char		*name2 = NULL;
2805 	uint_t		i1;
2806 	uint_t		i2;
2807 	map_info_t	*mptr;
2808 	file_info_t	*fptr;
2809 
2810 	(void) Prd_agent(P);
2811 
2812 	if ((mptr = Paddr2mptr(P, addr)) == NULL ||	/* no such address */
2813 	    (fptr = build_map_symtab(P, mptr)) == NULL || /* no mapped file */
2814 	    fptr->file_elf == NULL)			/* not an ELF file */
2815 		return (-1);
2816 
2817 	/*
2818 	 * Adjust the address by the load object base address in
2819 	 * case the address turns out to be in a shared library.
2820 	 */
2821 	addr -= fptr->file_dyn_base;
2822 
2823 	/*
2824 	 * Search both symbol tables, symtab first, then dynsym.
2825 	 */
2826 	if ((sym1p = sym_by_addr(&fptr->file_symtab, addr, &sym1, &i1)) != NULL)
2827 		name1 = fptr->file_symtab.sym_strs + sym1.st_name;
2828 	if ((sym2p = sym_by_addr(&fptr->file_dynsym, addr, &sym2, &i2)) != NULL)
2829 		name2 = fptr->file_dynsym.sym_strs + sym2.st_name;
2830 
2831 	if ((symp = sym_prefer(sym1p, name1, sym2p, name2)) == NULL)
2832 		return (-1);
2833 
2834 	name = (symp == sym1p) ? name1 : name2;
2835 	if (bufsize > 0) {
2836 		(void) strncpy(sym_name_buffer, name, bufsize);
2837 		sym_name_buffer[bufsize - 1] = '\0';
2838 	}
2839 
2840 	*symbolp = *symp;
2841 	if (sip != NULL) {
2842 		sip->prs_name = bufsize == 0 ? NULL : sym_name_buffer;
2843 		if (lmresolve && (fptr->file_rname != NULL))
2844 			sip->prs_object = fptr->file_rbase;
2845 		else
2846 			sip->prs_object = fptr->file_lbase;
2847 		sip->prs_id = (symp == sym1p) ? i1 : i2;
2848 		sip->prs_table = (symp == sym1p) ? PR_SYMTAB : PR_DYNSYM;
2849 		sip->prs_lmid = (fptr->file_lo == NULL) ? LM_ID_BASE :
2850 		    fptr->file_lo->rl_lmident;
2851 	}
2852 
2853 	if (GELF_ST_TYPE(symbolp->st_info) != STT_TLS)
2854 		symbolp->st_value += fptr->file_dyn_base;
2855 
2856 	return (0);
2857 }
2858 
2859 int
Pxlookup_by_addr(struct ps_prochandle * P,uintptr_t addr,char * buf,size_t bufsize,GElf_Sym * symp,prsyminfo_t * sip)2860 Pxlookup_by_addr(struct ps_prochandle *P, uintptr_t addr, char *buf,
2861     size_t bufsize, GElf_Sym *symp, prsyminfo_t *sip)
2862 {
2863 	return (i_Pxlookup_by_addr(P, B_FALSE, addr, buf, bufsize, symp, sip));
2864 }
2865 
2866 int
Pxlookup_by_addr_resolved(struct ps_prochandle * P,uintptr_t addr,char * buf,size_t bufsize,GElf_Sym * symp,prsyminfo_t * sip)2867 Pxlookup_by_addr_resolved(struct ps_prochandle *P, uintptr_t addr, char *buf,
2868     size_t bufsize, GElf_Sym *symp, prsyminfo_t *sip)
2869 {
2870 	return (i_Pxlookup_by_addr(P, B_TRUE, addr, buf, bufsize, symp, sip));
2871 }
2872 
2873 int
Plookup_by_addr(struct ps_prochandle * P,uintptr_t addr,char * buf,size_t size,GElf_Sym * symp)2874 Plookup_by_addr(struct ps_prochandle *P, uintptr_t addr, char *buf,
2875     size_t size, GElf_Sym *symp)
2876 {
2877 	return (i_Pxlookup_by_addr(P, B_FALSE, addr, buf, size, symp, NULL));
2878 }
2879 
2880 /*
2881  * Search the process symbol tables looking for a symbol whose name matches the
2882  * specified name and whose object and link map optionally match the specified
2883  * parameters.  On success, the function returns 0 and fills in the GElf_Sym
2884  * symbol table entry.  On failure, -1 is returned.
2885  */
2886 int
Pxlookup_by_name(struct ps_prochandle * P,Lmid_t lmid,const char * oname,const char * sname,GElf_Sym * symp,prsyminfo_t * sip)2887 Pxlookup_by_name(
2888 	struct ps_prochandle *P,
2889 	Lmid_t lmid,			/* link map to match, or -1 for any */
2890 	const char *oname,		/* load object name */
2891 	const char *sname,		/* symbol name */
2892 	GElf_Sym *symp,			/* returned symbol table entry */
2893 	prsyminfo_t *sip)		/* returned symbol info */
2894 {
2895 	map_info_t *mptr;
2896 	file_info_t *fptr;
2897 	int cnt;
2898 
2899 	GElf_Sym sym;
2900 	prsyminfo_t si;
2901 	int rv = -1;
2902 	uint_t id;
2903 
2904 	if (oname == PR_OBJ_EVERY) {
2905 		/* create all the file_info_t's for all the mappings */
2906 		(void) Prd_agent(P);
2907 		cnt = P->num_files;
2908 		fptr = list_head(&P->file_head);
2909 	} else {
2910 		cnt = 1;
2911 		if ((mptr = object_name_to_map(P, lmid, oname)) == NULL ||
2912 		    (fptr = build_map_symtab(P, mptr)) == NULL)
2913 			return (-1);
2914 	}
2915 
2916 	/*
2917 	 * Iterate through the loaded object files and look for the symbol
2918 	 * name in the .symtab and .dynsym of each.  If we encounter a match
2919 	 * with SHN_UNDEF, keep looking in hopes of finding a better match.
2920 	 * This means that a name such as "puts" will match the puts function
2921 	 * in libc instead of matching the puts PLT entry in the a.out file.
2922 	 */
2923 	for (; cnt > 0; cnt--, fptr = list_next(&P->file_head, fptr)) {
2924 		Pbuild_file_symtab(P, fptr);
2925 
2926 		if (fptr->file_elf == NULL)
2927 			continue;
2928 
2929 		if (lmid != PR_LMID_EVERY && fptr->file_lo != NULL &&
2930 		    lmid != fptr->file_lo->rl_lmident)
2931 			continue;
2932 
2933 		if (fptr->file_symtab.sym_data_pri != NULL &&
2934 		    sym_by_name(&fptr->file_symtab, sname, symp, &id)) {
2935 			if (sip != NULL) {
2936 				sip->prs_id = id;
2937 				sip->prs_table = PR_SYMTAB;
2938 				sip->prs_object = oname;
2939 				sip->prs_name = sname;
2940 				sip->prs_lmid = fptr->file_lo == NULL ?
2941 				    LM_ID_BASE : fptr->file_lo->rl_lmident;
2942 			}
2943 		} else if (fptr->file_dynsym.sym_data_pri != NULL &&
2944 		    sym_by_name(&fptr->file_dynsym, sname, symp, &id)) {
2945 			if (sip != NULL) {
2946 				sip->prs_id = id;
2947 				sip->prs_table = PR_DYNSYM;
2948 				sip->prs_object = oname;
2949 				sip->prs_name = sname;
2950 				sip->prs_lmid = fptr->file_lo == NULL ?
2951 				    LM_ID_BASE : fptr->file_lo->rl_lmident;
2952 			}
2953 		} else {
2954 			continue;
2955 		}
2956 
2957 		if (GELF_ST_TYPE(symp->st_info) != STT_TLS)
2958 			symp->st_value += fptr->file_dyn_base;
2959 
2960 		if (symp->st_shndx != SHN_UNDEF)
2961 			return (0);
2962 
2963 		if (rv != 0) {
2964 			if (sip != NULL)
2965 				si = *sip;
2966 			sym = *symp;
2967 			rv = 0;
2968 		}
2969 	}
2970 
2971 	if (rv == 0) {
2972 		if (sip != NULL)
2973 			*sip = si;
2974 		*symp = sym;
2975 	}
2976 
2977 	return (rv);
2978 }
2979 
2980 /*
2981  * Search the process symbol tables looking for a symbol whose name matches the
2982  * specified name, but without any restriction on the link map id.
2983  */
2984 int
Plookup_by_name(struct ps_prochandle * P,const char * object,const char * symbol,GElf_Sym * symp)2985 Plookup_by_name(struct ps_prochandle *P, const char *object,
2986     const char *symbol, GElf_Sym *symp)
2987 {
2988 	return (Pxlookup_by_name(P, PR_LMID_EVERY, object, symbol, symp, NULL));
2989 }
2990 
2991 /*
2992  * Iterate over the process's address space mappings.
2993  */
2994 static int
i_Pmapping_iter(struct ps_prochandle * P,boolean_t lmresolve,proc_map_f * func,void * cd)2995 i_Pmapping_iter(struct ps_prochandle *P, boolean_t lmresolve,
2996     proc_map_f *func, void *cd)
2997 {
2998 	map_info_t *mptr;
2999 	file_info_t *fptr;
3000 	char *object_name;
3001 	int rc = 0;
3002 	int i;
3003 
3004 	/* create all the file_info_t's for all the mappings */
3005 	(void) Prd_agent(P);
3006 
3007 	for (i = 0, mptr = P->mappings; i < P->map_count; i++, mptr++) {
3008 		if ((fptr = mptr->map_file) == NULL)
3009 			object_name = NULL;
3010 		else if (lmresolve && (fptr->file_rname != NULL))
3011 			object_name = fptr->file_rname;
3012 		else
3013 			object_name = fptr->file_lname;
3014 		if ((rc = func(cd, &mptr->map_pmap, object_name)) != 0)
3015 			return (rc);
3016 	}
3017 	return (0);
3018 }
3019 
3020 int
Pmapping_iter(struct ps_prochandle * P,proc_map_f * func,void * cd)3021 Pmapping_iter(struct ps_prochandle *P, proc_map_f *func, void *cd)
3022 {
3023 	return (i_Pmapping_iter(P, B_FALSE, func, cd));
3024 }
3025 
3026 int
Pmapping_iter_resolved(struct ps_prochandle * P,proc_map_f * func,void * cd)3027 Pmapping_iter_resolved(struct ps_prochandle *P, proc_map_f *func, void *cd)
3028 {
3029 	return (i_Pmapping_iter(P, B_TRUE, func, cd));
3030 }
3031 
3032 /*
3033  * Iterate over the process's mapped objects.
3034  */
3035 static int
i_Pobject_iter(struct ps_prochandle * P,boolean_t lmresolve,proc_map_f * func,void * cd)3036 i_Pobject_iter(struct ps_prochandle *P, boolean_t lmresolve,
3037     proc_map_f *func, void *cd)
3038 {
3039 	map_info_t *mptr;
3040 	file_info_t *fptr;
3041 	int rc = 0;
3042 
3043 	(void) Prd_agent(P); /* create file_info_t's for all the mappings */
3044 	Pupdate_maps(P);
3045 
3046 	for (fptr = list_head(&P->file_head); fptr != NULL;
3047 	    fptr = list_next(&P->file_head, fptr)) {
3048 		const char *lname;
3049 
3050 		if (lmresolve && (fptr->file_rname != NULL))
3051 			lname = fptr->file_rname;
3052 		else if (fptr->file_lname != NULL)
3053 			lname = fptr->file_lname;
3054 		else
3055 			lname = "";
3056 
3057 		if ((mptr = fptr->file_map) == NULL)
3058 			continue;
3059 
3060 		if ((rc = func(cd, &mptr->map_pmap, lname)) != 0)
3061 			return (rc);
3062 
3063 		if (!P->info_valid)
3064 			Pupdate_maps(P);
3065 	}
3066 	return (0);
3067 }
3068 
3069 int
Pobject_iter(struct ps_prochandle * P,proc_map_f * func,void * cd)3070 Pobject_iter(struct ps_prochandle *P, proc_map_f *func, void *cd)
3071 {
3072 	return (i_Pobject_iter(P, B_FALSE, func, cd));
3073 }
3074 
3075 int
Pobject_iter_resolved(struct ps_prochandle * P,proc_map_f * func,void * cd)3076 Pobject_iter_resolved(struct ps_prochandle *P, proc_map_f *func, void *cd)
3077 {
3078 	return (i_Pobject_iter(P, B_TRUE, func, cd));
3079 }
3080 
3081 static char *
i_Pobjname(struct ps_prochandle * P,boolean_t lmresolve,uintptr_t addr,char * buffer,size_t bufsize)3082 i_Pobjname(struct ps_prochandle *P, boolean_t lmresolve, uintptr_t addr,
3083     char *buffer, size_t bufsize)
3084 {
3085 	map_info_t *mptr;
3086 	file_info_t *fptr;
3087 
3088 	/* create all the file_info_t's for all the mappings */
3089 	(void) Prd_agent(P);
3090 
3091 	if ((mptr = Paddr2mptr(P, addr)) == NULL)
3092 		return (NULL);
3093 
3094 	if (!lmresolve) {
3095 		if (((fptr = mptr->map_file) == NULL) ||
3096 		    (fptr->file_lname == NULL))
3097 			return (NULL);
3098 		(void) strlcpy(buffer, fptr->file_lname, bufsize);
3099 		return (buffer);
3100 	}
3101 
3102 	/* Check for a cached copy of the resolved path */
3103 	if (Pfindmap(P, mptr, buffer, bufsize) != NULL)
3104 		return (buffer);
3105 
3106 	return (NULL);
3107 }
3108 
3109 /*
3110  * Given a virtual address, return the name of the underlying
3111  * mapped object (file) as provided by the dynamic linker.
3112  * Return NULL if we can't find any name information for the object.
3113  */
3114 char *
Pobjname(struct ps_prochandle * P,uintptr_t addr,char * buffer,size_t bufsize)3115 Pobjname(struct ps_prochandle *P, uintptr_t addr,
3116     char *buffer, size_t bufsize)
3117 {
3118 	return (i_Pobjname(P, B_FALSE, addr, buffer, bufsize));
3119 }
3120 
3121 /*
3122  * Given a virtual address, try to return a filesystem path to the
3123  * underlying mapped object (file).  If we're in the global zone,
3124  * this path could resolve to an object in another zone.  If we're
3125  * unable return a valid filesystem path, we'll fall back to providing
3126  * the mapped object (file) name provided by the dynamic linker in
3127  * the target process (ie, the object reported by Pobjname()).
3128  */
3129 char *
Pobjname_resolved(struct ps_prochandle * P,uintptr_t addr,char * buffer,size_t bufsize)3130 Pobjname_resolved(struct ps_prochandle *P, uintptr_t addr,
3131     char *buffer, size_t bufsize)
3132 {
3133 	return (i_Pobjname(P, B_TRUE, addr, buffer, bufsize));
3134 }
3135 
3136 /*
3137  * Given a virtual address, return the link map id of the underlying mapped
3138  * object (file), as provided by the dynamic linker.  Return -1 on failure.
3139  */
3140 int
Plmid(struct ps_prochandle * P,uintptr_t addr,Lmid_t * lmidp)3141 Plmid(struct ps_prochandle *P, uintptr_t addr, Lmid_t *lmidp)
3142 {
3143 	map_info_t *mptr;
3144 	file_info_t *fptr;
3145 
3146 	/* create all the file_info_t's for all the mappings */
3147 	(void) Prd_agent(P);
3148 
3149 	if ((mptr = Paddr2mptr(P, addr)) != NULL &&
3150 	    (fptr = mptr->map_file) != NULL && fptr->file_lo != NULL) {
3151 		*lmidp = fptr->file_lo->rl_lmident;
3152 		return (0);
3153 	}
3154 
3155 	return (-1);
3156 }
3157 
3158 /*
3159  * Given an object name and optional lmid, iterate over the object's symbols.
3160  * If which == PR_SYMTAB, search the normal symbol table.
3161  * If which == PR_DYNSYM, search the dynamic symbol table.
3162  */
3163 static int
Psymbol_iter_com(struct ps_prochandle * P,Lmid_t lmid,const char * object_name,int which,int mask,pr_order_t order,proc_xsym_f * func,void * cd)3164 Psymbol_iter_com(struct ps_prochandle *P, Lmid_t lmid, const char *object_name,
3165     int which, int mask, pr_order_t order, proc_xsym_f *func, void *cd)
3166 {
3167 #if STT_NUM != (STT_TLS + 1)
3168 #error "STT_NUM has grown. update Psymbol_iter_com()"
3169 #endif
3170 
3171 	GElf_Sym sym;
3172 	GElf_Shdr shdr;
3173 	map_info_t *mptr;
3174 	file_info_t *fptr;
3175 	sym_tbl_t *symtab;
3176 	size_t symn;
3177 	const char *strs;
3178 	size_t strsz;
3179 	prsyminfo_t si;
3180 	int rv;
3181 	uint_t *map, i, count, ndx;
3182 
3183 	if ((mptr = object_name_to_map(P, lmid, object_name)) == NULL)
3184 		return (-1);
3185 
3186 	if ((fptr = build_map_symtab(P, mptr)) == NULL || /* no mapped file */
3187 	    fptr->file_elf == NULL)			/* not an ELF file */
3188 		return (-1);
3189 
3190 	/*
3191 	 * Search the specified symbol table.
3192 	 */
3193 	switch (which) {
3194 	case PR_SYMTAB:
3195 		symtab = &fptr->file_symtab;
3196 		si.prs_table = PR_SYMTAB;
3197 		break;
3198 	case PR_DYNSYM:
3199 		symtab = &fptr->file_dynsym;
3200 		si.prs_table = PR_DYNSYM;
3201 		break;
3202 	default:
3203 		return (-1);
3204 	}
3205 
3206 	si.prs_object = object_name;
3207 	si.prs_lmid = fptr->file_lo == NULL ?
3208 	    LM_ID_BASE : fptr->file_lo->rl_lmident;
3209 
3210 	symn = symtab->sym_symn;
3211 	strs = symtab->sym_strs;
3212 	strsz = symtab->sym_strsz;
3213 
3214 	switch (order) {
3215 	case PRO_NATURAL:
3216 		map = NULL;
3217 		count = symn;
3218 		break;
3219 	case PRO_BYNAME:
3220 		map = symtab->sym_byname;
3221 		count = symtab->sym_count;
3222 		break;
3223 	case PRO_BYADDR:
3224 		map = symtab->sym_byaddr;
3225 		count = symtab->sym_count;
3226 		break;
3227 	default:
3228 		return (-1);
3229 	}
3230 
3231 	if (symtab->sym_data_pri == NULL || strs == NULL || count == 0)
3232 		return (-1);
3233 
3234 	rv = 0;
3235 
3236 	for (i = 0; i < count; i++) {
3237 		ndx = map == NULL ? i : map[i];
3238 		if (symtab_getsym(symtab, ndx, &sym) != NULL) {
3239 			uint_t s_bind, s_type, type;
3240 
3241 			if (sym.st_name >= strsz)	/* invalid st_name */
3242 				continue;
3243 
3244 			s_bind = GELF_ST_BIND(sym.st_info);
3245 			s_type = GELF_ST_TYPE(sym.st_info);
3246 
3247 			/*
3248 			 * In case you haven't already guessed, this relies on
3249 			 * the bitmask used in <libproc.h> for encoding symbol
3250 			 * type and binding matching the order of STB and STT
3251 			 * constants in <sys/elf.h>.  Changes to ELF must
3252 			 * maintain binary compatibility, so I think this is
3253 			 * reasonably fair game.
3254 			 */
3255 			if (s_bind < STB_NUM && s_type < STT_NUM) {
3256 				type = (1 << (s_type + 8)) | (1 << s_bind);
3257 				if ((type & ~mask) != 0)
3258 					continue;
3259 			} else
3260 				continue; /* Invalid type or binding */
3261 
3262 			if (GELF_ST_TYPE(sym.st_info) != STT_TLS)
3263 				sym.st_value += fptr->file_dyn_base;
3264 
3265 			si.prs_name = strs + sym.st_name;
3266 
3267 			/*
3268 			 * If symbol's type is STT_SECTION, then try to lookup
3269 			 * the name of the corresponding section.
3270 			 */
3271 			if (GELF_ST_TYPE(sym.st_info) == STT_SECTION &&
3272 			    fptr->file_shstrs != NULL &&
3273 			    gelf_getshdr(elf_getscn(fptr->file_elf,
3274 			    sym.st_shndx), &shdr) != NULL &&
3275 			    shdr.sh_name != 0 &&
3276 			    shdr.sh_name < fptr->file_shstrsz)
3277 				si.prs_name = fptr->file_shstrs + shdr.sh_name;
3278 
3279 			si.prs_id = ndx;
3280 			if ((rv = func(cd, &sym, si.prs_name, &si)) != 0)
3281 				break;
3282 		}
3283 	}
3284 
3285 	return (rv);
3286 }
3287 
3288 int
Pxsymbol_iter(struct ps_prochandle * P,Lmid_t lmid,const char * object_name,int which,int mask,proc_xsym_f * func,void * cd)3289 Pxsymbol_iter(struct ps_prochandle *P, Lmid_t lmid, const char *object_name,
3290     int which, int mask, proc_xsym_f *func, void *cd)
3291 {
3292 	return (Psymbol_iter_com(P, lmid, object_name, which, mask,
3293 	    PRO_NATURAL, func, cd));
3294 }
3295 
3296 int
Psymbol_iter_by_lmid(struct ps_prochandle * P,Lmid_t lmid,const char * object_name,int which,int mask,proc_sym_f * func,void * cd)3297 Psymbol_iter_by_lmid(struct ps_prochandle *P, Lmid_t lmid,
3298     const char *object_name, int which, int mask, proc_sym_f *func, void *cd)
3299 {
3300 	return (Psymbol_iter_com(P, lmid, object_name, which, mask,
3301 	    PRO_NATURAL, (proc_xsym_f *)(uintptr_t)func, cd));
3302 }
3303 
3304 int
Psymbol_iter(struct ps_prochandle * P,const char * object_name,int which,int mask,proc_sym_f * func,void * cd)3305 Psymbol_iter(struct ps_prochandle *P,
3306     const char *object_name, int which, int mask, proc_sym_f *func, void *cd)
3307 {
3308 	return (Psymbol_iter_com(P, PR_LMID_EVERY, object_name, which, mask,
3309 	    PRO_NATURAL, (proc_xsym_f *)(uintptr_t)func, cd));
3310 }
3311 
3312 int
Psymbol_iter_by_addr(struct ps_prochandle * P,const char * object_name,int which,int mask,proc_sym_f * func,void * cd)3313 Psymbol_iter_by_addr(struct ps_prochandle *P,
3314     const char *object_name, int which, int mask, proc_sym_f *func, void *cd)
3315 {
3316 	return (Psymbol_iter_com(P, PR_LMID_EVERY, object_name, which, mask,
3317 	    PRO_BYADDR, (proc_xsym_f *)(uintptr_t)func, cd));
3318 }
3319 
3320 int
Psymbol_iter_by_name(struct ps_prochandle * P,const char * object_name,int which,int mask,proc_sym_f * func,void * cd)3321 Psymbol_iter_by_name(struct ps_prochandle *P,
3322     const char *object_name, int which, int mask, proc_sym_f *func, void *cd)
3323 {
3324 	return (Psymbol_iter_com(P, PR_LMID_EVERY, object_name, which, mask,
3325 	    PRO_BYNAME, (proc_xsym_f *)(uintptr_t)func, cd));
3326 }
3327 
3328 /*
3329  * Get the platform string.
3330  */
3331 char *
Pplatform(struct ps_prochandle * P,char * s,size_t n)3332 Pplatform(struct ps_prochandle *P, char *s, size_t n)
3333 {
3334 	return (P->ops.pop_platform(P, s, n, P->data));
3335 }
3336 
3337 /*
3338  * Get the uname(2) information.
3339  */
3340 int
Puname(struct ps_prochandle * P,struct utsname * u)3341 Puname(struct ps_prochandle *P, struct utsname *u)
3342 {
3343 	return (P->ops.pop_uname(P, u, P->data));
3344 }
3345 
3346 /*
3347  * Called from Pcreate(), Pgrab(), and Pfgrab_core() to initialize
3348  * the symbol table heads in the new ps_prochandle.
3349  */
3350 void
Pinitsym(struct ps_prochandle * P)3351 Pinitsym(struct ps_prochandle *P)
3352 {
3353 	P->num_files = 0;
3354 	list_create(&P->file_head, sizeof (file_info_t),
3355 	    offsetof(file_info_t, file_list));
3356 }
3357 
3358 /*
3359  * Called from Prelease() to destroy the symbol tables.
3360  * Must be called by the client after an exec() in the victim process.
3361  */
3362 void
Preset_maps(struct ps_prochandle * P)3363 Preset_maps(struct ps_prochandle *P)
3364 {
3365 	int i;
3366 
3367 	if (P->rap != NULL) {
3368 		rd_delete(P->rap);
3369 		P->rap = NULL;
3370 	}
3371 
3372 	if (P->execname != NULL) {
3373 		free(P->execname);
3374 		P->execname = NULL;
3375 	}
3376 
3377 	if (P->auxv != NULL) {
3378 		free(P->auxv);
3379 		P->auxv = NULL;
3380 		P->nauxv = 0;
3381 	}
3382 
3383 	for (i = 0; i < P->map_count; i++)
3384 		map_info_free(P, &P->mappings[i]);
3385 
3386 	if (P->mappings != NULL) {
3387 		free(P->mappings);
3388 		P->mappings = NULL;
3389 	}
3390 	P->map_count = P->map_alloc = 0;
3391 
3392 	P->info_valid = 0;
3393 }
3394 
3395 typedef struct getenv_data {
3396 	char *buf;
3397 	size_t bufsize;
3398 	const char *search;
3399 	size_t searchlen;
3400 } getenv_data_t;
3401 
3402 /*ARGSUSED*/
3403 static int
getenv_func(void * data,struct ps_prochandle * P,uintptr_t addr,const char * nameval)3404 getenv_func(void *data, struct ps_prochandle *P, uintptr_t addr,
3405     const char *nameval)
3406 {
3407 	getenv_data_t *d = data;
3408 	size_t len;
3409 
3410 	if (nameval == NULL)
3411 		return (0);
3412 
3413 	if (d->searchlen < strlen(nameval) &&
3414 	    strncmp(nameval, d->search, d->searchlen) == 0 &&
3415 	    nameval[d->searchlen] == '=') {
3416 		len = MIN(strlen(nameval), d->bufsize - 1);
3417 		(void) strncpy(d->buf, nameval, len);
3418 		d->buf[len] = '\0';
3419 		return (1);
3420 	}
3421 
3422 	return (0);
3423 }
3424 
3425 char *
Pgetenv(struct ps_prochandle * P,const char * name,char * buf,size_t buflen)3426 Pgetenv(struct ps_prochandle *P, const char *name, char *buf, size_t buflen)
3427 {
3428 	getenv_data_t d;
3429 
3430 	d.buf = buf;
3431 	d.bufsize = buflen;
3432 	d.search = name;
3433 	d.searchlen = strlen(name);
3434 
3435 	if (Penv_iter(P, getenv_func, &d) == 1) {
3436 		char *equals = strchr(d.buf, '=');
3437 
3438 		if (equals != NULL) {
3439 			(void) memmove(d.buf, equals + 1,
3440 			    d.buf + buflen - equals - 1);
3441 			d.buf[d.buf + buflen - equals] = '\0';
3442 
3443 			return (buf);
3444 		}
3445 	}
3446 
3447 	return (NULL);
3448 }
3449 
3450 /* number of argument or environment pointers to read all at once */
3451 #define	NARG	100
3452 
3453 int
Penv_iter(struct ps_prochandle * P,proc_env_f * func,void * data)3454 Penv_iter(struct ps_prochandle *P, proc_env_f *func, void *data)
3455 {
3456 	const psinfo_t *psp;
3457 	uintptr_t envpoff;
3458 	GElf_Sym sym;
3459 	int ret;
3460 	char *buf, *nameval;
3461 	size_t buflen;
3462 
3463 	int nenv = NARG;
3464 	long envp[NARG];
3465 
3466 	/*
3467 	 * Attempt to find the "_environ" variable in the process.
3468 	 * Failing that, use the original value provided by Ppsinfo().
3469 	 *
3470 	 * The "_environ" variable is initialized by the CRT. We use a rough
3471 	 * heuristic to try and figure out if we have started running before the
3472 	 * CRT has executed by checking if the _environ pointer points to NULL
3473 	 * or not. Once initialized, it will never point to NULL absent an
3474 	 * application manipulating it directly, libc does not do so, even if
3475 	 * one calls clearenv(). There is a rare chance that an application is
3476 	 * messing with the _environ pointer directly; however, in practice that
3477 	 * is much rarer than this case and if someone is, libc is unlikely to
3478 	 * have a good day.
3479 	 *
3480 	 * While it's tempting to look towards libc variables such as
3481 	 * initenv_done and related, we have to remember that we're here because
3482 	 * we haven't actually called  libc_init() or even loaded it!
3483 	 */
3484 	if ((psp = Ppsinfo(P)) == NULL)
3485 		return (-1);
3486 
3487 	envpoff = psp->pr_envp; /* Default if no _environ found */
3488 
3489 	if (Plookup_by_name(P, PR_OBJ_EXEC, "_environ", &sym) == 0) {
3490 		if (P->status.pr_dmodel == PR_MODEL_NATIVE) {
3491 			if (Pread(P, &envpoff, sizeof (envpoff),
3492 			    sym.st_value) != sizeof (envpoff))
3493 				envpoff = psp->pr_envp;
3494 		} else if (P->status.pr_dmodel == PR_MODEL_ILP32) {
3495 			uint32_t envpoff32;
3496 
3497 			if (Pread(P, &envpoff32, sizeof (envpoff32),
3498 			    sym.st_value) != sizeof (envpoff32))
3499 				envpoff = psp->pr_envp;
3500 			else
3501 				envpoff = envpoff32;
3502 		}
3503 
3504 		if (envpoff == 0) {
3505 			envpoff = psp->pr_envp;
3506 		}
3507 	}
3508 
3509 	buflen = 128;
3510 	buf = malloc(buflen);
3511 
3512 	ret = 0;
3513 	for (;;) {
3514 		uintptr_t envoff;
3515 
3516 		if (nenv == NARG) {
3517 			(void) memset(envp, 0, sizeof (envp));
3518 			if (P->status.pr_dmodel == PR_MODEL_NATIVE) {
3519 				if (Pread(P, envp,
3520 				    sizeof (envp), envpoff) <= 0) {
3521 					ret = -1;
3522 					break;
3523 				}
3524 			} else if (P->status.pr_dmodel == PR_MODEL_ILP32) {
3525 				uint32_t e32[NARG];
3526 				int i;
3527 
3528 				(void) memset(e32, 0, sizeof (e32));
3529 				if (Pread(P, e32, sizeof (e32), envpoff) <= 0) {
3530 					ret = -1;
3531 					break;
3532 				}
3533 				for (i = 0; i < NARG; i++)
3534 					envp[i] = e32[i];
3535 			}
3536 			nenv = 0;
3537 		}
3538 
3539 		if ((envoff = envp[nenv++]) == (uintptr_t)NULL)
3540 			break;
3541 
3542 		/*
3543 		 * Attempt to read the string from the process.
3544 		 */
3545 again:
3546 		ret = Pread_string(P, buf, buflen, envoff);
3547 
3548 		if (ret <= 0) {
3549 			nameval = NULL;
3550 		} else if (ret == buflen - 1) {
3551 			free(buf);
3552 			/*
3553 			 * Bail if we have a corrupted environment
3554 			 */
3555 			if (buflen >= ARG_MAX)
3556 				return (-1);
3557 			buflen *= 2;
3558 			buf = malloc(buflen);
3559 			goto again;
3560 		} else {
3561 			nameval = buf;
3562 		}
3563 
3564 		if ((ret = func(data, P, envoff, nameval)) != 0)
3565 			break;
3566 
3567 		envpoff += (P->status.pr_dmodel == PR_MODEL_LP64)? 8 : 4;
3568 	}
3569 
3570 	free(buf);
3571 
3572 	return (ret);
3573 }
3574