xref: /illumos-gate/usr/src/cmd/ptools/pmadvise/pmadvise.c (revision e443d926fa6c5807f868cf128c5cc66e3e171630)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2015, Joyent, Inc. All rights reserved.
29  * Copyright 2025 Oxide Computer Company
30  */
31 
32 /*
33  * pmadvise
34  *
35  * ptool wrapper for madvise(3C) to apply memory advice to running processes
36  *
37  * usage:	pmadvise -o option[,option] [-v] [-F] pid ...
38  *  (Give "advice" about a process's memory)
39  *  -o option[,option]: options are
40  *      private=<advice>
41  *      shared=<advice>
42  *      heap=<advice>
43  *      stack=<advice>
44  *      <segaddr>[:<length>]=<advice>
45  *     valid <advice> is one of:
46  *      normal, random, sequential, willneed, dontneed,
47  *      free, access_lwp, access_many, access_default
48  *  -v: verbose output
49  *  -F: force grabbing of the target process(es)
50  *  -l: show unresolved dynamic linker map names
51  *  pid: process id list
52  *
53  *
54  * Advice passed to this tool are organized into various lists described here:
55  *  rawadv_list: includes all specific advice from command line (specific
56  *               advice being those given to a particular address range rather
57  *               than a type like "heap" or "stack".  In contrast, these
58  *               types are referred to as generic advice). Duplicates allowed.
59  *               List ordered by addr, then by size (largest size first).
60  *               Created once per run.
61  *  merged_list: includes all specific advice from the rawadv_list as well as
62  *               all generic advice.  This must be recreated for each process
63  *               as the generic advice will apply to different regions for
64  *               different processes. Duplicates allowed. List ordered by addr,
65  *               then by size (largest size first). Created once per pid.
66  *  chopped_list: used for verbose output only. This list parses the merged
67  *                list such that it eliminates any overlap and combines the
68  *                advice. Easiest to think of this visually: if you take all
69  *                the advice in the merged list and lay them down on a memory
70  *                range of the entire process (laying on top of each other when
71  *                necessary), then flatten them into one layer, combining advice
72  *                in the case of overlap, you get the chopped_list of advice.
73  *                Duplicate entries not allowed (since there is no overlap by
74  *                definition in this list).  List ordered by addr. Created once
75  *                per pid.
76  *
77  *                Example:
78  *                   merged_list:   |-----adv1----|---------adv3---------|
79  *                                       |--adv2--|--adv4--|-----adv5----|
80  *                                                  ||
81  *                                                  \/
82  *                   chopped_list:  |adv1|-adv1,2-|-adv3,4-|----adv3,5---|
83  *
84  *  maplist: list of memory mappings for a particular process. Used to create
85  *           generic advice entries for merged_list and for pmap like verbose
86  *           output. Created once per pid.
87  *
88  * Multiple lists are necessary because the actual advice applied given a set
89  * of generic and specific advice changes from process to process, so for each
90  * pid pmadvise is passed, it must create a new merged_list from which to apply
91  * advice (and a new chopped_list if verbose output is requested).
92  *
93  * Pseudo-code:
94  * I.	Input advice from command line
95  * II.	Create [raw advice list] of specific advice
96  * III.	Iterate through PIDs:
97  *	A.	Create [map list]
98  *	B.	Merge generic advice and [raw advice list] into [merged list]
99  *	C.	Apply advice from [merged list]; upon error:
100  *		i.	output madvise error message
101  *		ii.	remove element from [merged list]
102  *	D.	If verbose output:
103  *		i.	Create [chopped list] from [merged list]
104  *		ii.	Iterate through [map list]:
105  *			a.	output advice as given by [merged list]
106  *		iii.	Delete [chopped list]
107  *	E.	Delete [merged list]
108  *	F.	Delete [map list]
109  */
110 
111 #include <stdio.h>
112 #include <stdlib.h>
113 #include <unistd.h>
114 #include <ctype.h>
115 #include <fcntl.h>
116 #include <string.h>
117 #include <dirent.h>
118 #include <limits.h>
119 #include <link.h>
120 #include <libelf.h>
121 #include <locale.h>
122 #include <sys/types.h>
123 #include <sys/mman.h>
124 #include <sys/stat.h>
125 #include <sys/mkdev.h>
126 #include <assert.h>
127 #include <libproc.h>
128 #include <libgen.h>
129 #include <signal.h>
130 
131 #include "pmap_common.h"
132 
133 #ifndef	TEXT_DOMAIN			/* should be defined by cc -D */
134 #define	TEXT_DOMAIN	"SYS_TEST"	/* use this only if it wasn't */
135 #endif
136 
137 #define	KILOBYTE	1024
138 
139 /*
140  * Round up the value to the nearest kilobyte
141  */
142 #define	ROUNDUP_KB(x)	(((x) + (KILOBYTE - 1)) / KILOBYTE)
143 
144 #define	INVALID_ADDRESS		(uintptr_t)(-1)
145 
146 #define	NO_ADVICE		0
147 
148 /*
149  * The following definitions are used as the third argument in insert_addr()
150  *   NODUPS = no duplicates are not allowed, thus if the addr being inserted
151  *   already exists in the list, return without inserting again.
152  *
153  *   YESDUPS = yes duplicates are allowed, thus always insert the addr
154  *   regardless of whether it already exists in the list or not.
155  */
156 #define	NODUPS	1
157 #define	YESDUPS	0
158 
159 /*
160  * Advice that can be passed to madvise fit into three groups that each
161  * contain 3 mutually exclusive options.  These groups are defined below:
162  *   Group 1: normal, random, sequential
163  *   Group 2: willneed, dontneed, free, purge
164  *   Group 3: default, accesslwp, accessmany
165  * Thus, advice that includes (at most) one from each group is valid.
166  *
167  * The following #define's are used as masks to determine which group(s) a
168  * particular advice fall under.
169  */
170 
171 #define	GRP1_ADV	(1 << MADV_NORMAL | 1 << MADV_RANDOM | \
172 			1 << MADV_SEQUENTIAL)
173 #define	GRP2_ADV	(1 << MADV_WILLNEED | 1 << MADV_DONTNEED | \
174 			1 << MADV_FREE | 1 << MADV_PURGE)
175 #define	GRP3_ADV	(1 << MADV_ACCESS_DEFAULT | 1 << MADV_ACCESS_LWP | \
176 			1 << MADV_ACCESS_MANY)
177 
178 static	int	create_maplist(void *, const prmap_t *, const char *);
179 static	int	pr_madvise(struct ps_prochandle *, caddr_t, size_t, int);
180 
181 static	char	*mflags(uint_t);
182 static	char	*advtostr(int);
183 
184 static	int	lflag = 0;
185 
186 static	int	addr_width, size_width;
187 static	char	*progname;
188 static	struct ps_prochandle *Pr;
189 
190 static	lwpstack_t *stacks;
191 static	uint_t	nstacks;
192 
193 static uintptr_t comm_page = INVALID_ADDRESS;
194 
195 static char	*suboptstr[] = {
196 	"private",
197 	"shared",
198 	"heap",
199 	"stack",
200 	NULL
201 };
202 
203 
204 int	generic_adv[] = {NO_ADVICE, NO_ADVICE, NO_ADVICE, NO_ADVICE};
205 int	at_map = 0;
206 
207 typedef struct saddr_struct {
208 	uintptr_t	addr;
209 	size_t		length;
210 	int		adv;
211 	struct saddr_struct	*next;
212 } saddr_t;
213 static int	apply_advice(saddr_t **);
214 static void	set_advice(int *, int);
215 static void	create_choplist(saddr_t **, saddr_t *);
216 
217 /*
218  * The segment address advice from the command line
219  */
220 saddr_t	*rawadv_list = NULL;
221 /*
222  * The rawadv_list + list entries for the generic advice (if any).
223  * This must be recreated for each PID as the memory maps might be different.
224  */
225 saddr_t *merged_list = NULL;
226 /*
227  * The merged_list cut up so as to remove all overlap
228  * e.g. if merged_list contained two entries:
229  *
230  * [0x38000:0x3e000) = adv1
231  * [0x3a000:0x3c000) = adv2
232  *
233  * the chopped list will contain three entries:
234  *
235  * [0x38000:0x3a000) = adv1
236  * [0x3a000:0x3c000) = adv1,adv2
237  * [0x3c000:0x3e000) = adv1
238  *
239  */
240 saddr_t *chopped_list = NULL;
241 
242 typedef struct mapnode_struct {
243 	prmap_t			*pmp;
244 	char			label[PATH_MAX];
245 	int			mtypes;
246 	struct mapnode_struct	*next;
247 } mapnode_t;
248 
249 mapnode_t *maplist_head = NULL;
250 mapnode_t *maplist_tail = NULL;
251 static void	print_advice(saddr_t *, mapnode_t *);
252 
253 int	opt_verbose;
254 
255 static char	*advicestr[] = {
256 	"normal",
257 	"random",
258 	"sequential",
259 	"willneed",
260 	"dontneed",
261 	"free",
262 	"access_default",
263 	"access_lwp",
264 	"access_many"
265 };
266 
267 /*
268  * How many signals caught from terminal
269  * We bail out as soon as possible when interrupt is set
270  */
271 static int	interrupt = 0;
272 
273 /*
274  * Interrupt handler
275  */
276 static void	intr(int);
277 
278 /*
279  * Iterative function passed to Plwp_iter to
280  * get alt and main stacks for given lwp.
281  */
282 static int
getstack(void * data,const lwpstatus_t * lsp)283 getstack(void *data, const lwpstatus_t *lsp)
284 {
285 	int *np = (int *)data;
286 
287 	if (Plwp_alt_stack(Pr, lsp->pr_lwpid, &stacks[*np].lwps_stack) == 0) {
288 		stacks[*np].lwps_stack.ss_flags |= SS_ONSTACK;
289 		stacks[*np].lwps_lwpid = lsp->pr_lwpid;
290 		(*np)++;
291 	}
292 
293 	if (Plwp_main_stack(Pr, lsp->pr_lwpid, &stacks[*np].lwps_stack) == 0) {
294 		stacks[*np].lwps_lwpid = lsp->pr_lwpid;
295 		(*np)++;
296 	}
297 
298 	return (0);
299 }
300 
301 /*
302  * Prints usage and exits
303  */
304 static void
usage()305 usage()
306 {
307 	(void) fprintf(stderr,
308 	    gettext("usage:\t%s [-o option[,option]] [-Flv] pid ...\n"),
309 	    progname);
310 	(void) fprintf(stderr,
311 	    gettext("    (Give \"advice\" about a process's memory)\n"
312 	    "    -o option[,option]: options are\n"
313 	    "        private=<advice>\n"
314 	    "        shared=<advice>\n"
315 	    "        heap=<advice>\n"
316 	    "        stack=<advice>\n"
317 	    "        <segaddr>[:<length>]=<advice>\n"
318 	    "       valid <advice> is one of:\n"
319 	    "        normal, random, sequential, willneed, dontneed,\n"
320 	    "        free, access_lwp, access_many, access_default\n"
321 	    "    -v: verbose output\n"
322 	    "    -F: force grabbing of the target process(es)\n"
323 	    "    -l: show unresolved dynamic linker map names\n"
324 	    "    pid: process id list\n"));
325 	exit(2);
326 }
327 
328 /*
329  * Function to parse advice from options string
330  */
331 static int
get_advice(char * optarg)332 get_advice(char *optarg)
333 {
334 	/*
335 	 * Determine which advice is given, we use shifted values as
336 	 * multiple pieces of advice may apply for a particular region.
337 	 * (See comment above regarding GRP[1,2,3]_ADV definitions for
338 	 * breakdown of advice groups).
339 	 */
340 	if (strcmp(optarg, "access_default") == 0)
341 		return (1 << MADV_ACCESS_DEFAULT);
342 	else if (strcmp(optarg, "access_many") == 0)
343 		return (1 << MADV_ACCESS_MANY);
344 	else if (strcmp(optarg, "access_lwp") == 0)
345 		return (1 << MADV_ACCESS_LWP);
346 	else if (strcmp(optarg, "sequential") == 0)
347 		return (1 << MADV_SEQUENTIAL);
348 	else if (strcmp(optarg, "willneed") == 0)
349 		return (1 << MADV_WILLNEED);
350 	else if (strcmp(optarg, "dontneed") == 0)
351 		return (1 << MADV_DONTNEED);
352 	else if (strcmp(optarg, "random") == 0)
353 		return (1 << MADV_RANDOM);
354 	else if (strcmp(optarg, "normal") == 0)
355 		return (1 << MADV_NORMAL);
356 	else if (strcmp(optarg, "free") == 0)
357 		return (1 << MADV_FREE);
358 	else if (strcmp(optarg, "purge") == 0)
359 		return (1 << MADV_PURGE);
360 	else {
361 		(void) fprintf(stderr, gettext("%s: invalid advice: %s\n"),
362 		    progname, optarg);
363 		usage();
364 		return (-1);
365 	}
366 }
367 
368 /*
369  * Function to convert character size indicators into actual size
370  * (i.e., 123M => sz = 123 * 1024 * 1024)
371  */
372 static size_t
atosz(char * optarg,char ** endptr)373 atosz(char *optarg, char **endptr)
374 {
375 	size_t	sz = 0;
376 
377 	if (optarg == NULL || optarg[0] == '\0')
378 		return (0);
379 
380 	sz = strtoll(optarg, endptr, 0);
381 
382 	switch (**endptr) {
383 	case 'E':
384 	case 'e':
385 		sz *= KILOBYTE;
386 		/* FALLTHRU */
387 	case 'P':
388 	case 'p':
389 		sz *= KILOBYTE;
390 		/* FALLTHRU */
391 	case 'T':
392 	case 't':
393 		sz *= KILOBYTE;
394 		/* FALLTHRU */
395 	case 'G':
396 	case 'g':
397 		sz *= KILOBYTE;
398 		/* FALLTHRU */
399 	case 'M':
400 	case 'm':
401 		sz *= KILOBYTE;
402 		/* FALLTHRU */
403 	case 'K':
404 	case 'k':
405 		sz *= KILOBYTE;
406 		/* FALLTHRU */
407 	case 'B':
408 	case 'b':
409 		(*endptr)++;
410 		/* FALLTHRU */
411 	default:
412 		break;
413 	}
414 	return (sz);
415 }
416 
417 /*
418  * Inserts newaddr into list.  dups indicates whether we allow duplicate
419  * addr entries in the list (valid values are NODUPS and YESDUPS).
420  */
421 static void
insert_addr(saddr_t ** list,saddr_t * newaddr,int dups)422 insert_addr(saddr_t **list, saddr_t *newaddr, int dups)
423 {
424 	saddr_t *prev = *list;
425 	saddr_t *psaddr;
426 
427 	if (*list == NULL) {
428 		newaddr->next = *list;
429 		*list = newaddr;
430 		return;
431 	}
432 
433 	for (psaddr = (*list)->next; psaddr != NULL; psaddr = psaddr->next) {
434 		if ((dups == NODUPS) && (psaddr->addr == newaddr->addr)) {
435 			free(newaddr);
436 			return;
437 		}
438 
439 		/*
440 		 * primary level of comparison is by address; smaller addr 1st
441 		 * secondary level of comparison is by length; bigger length 1st
442 		 */
443 		if ((psaddr->addr > newaddr->addr) ||
444 		    (psaddr->addr == newaddr->addr &&
445 		    psaddr->length < newaddr->length))
446 			break;
447 
448 		prev = psaddr;
449 	}
450 
451 	prev->next = newaddr;
452 	newaddr->next = psaddr;
453 }
454 
455 /*
456  * Deletes given element from list
457  */
458 static void
delete_addr(saddr_t ** list,saddr_t * delme)459 delete_addr(saddr_t **list, saddr_t *delme)
460 {
461 	saddr_t	*prev = *list;
462 
463 	if (delme == *list) {
464 		*list = delme->next;
465 		free(delme);
466 		return;
467 	}
468 
469 	while (prev != NULL && prev->next != delme) {
470 		prev = prev->next;
471 	}
472 
473 	if (prev) {
474 		prev->next = delme->next;
475 		free(delme);
476 	}
477 }
478 
479 /*
480  * Delete entire list
481  */
482 static void
delete_list(saddr_t ** list)483 delete_list(saddr_t **list)
484 {
485 	saddr_t *psaddr = *list;
486 
487 	while (psaddr != NULL) {
488 		saddr_t *temp = psaddr;
489 
490 		psaddr = psaddr->next;
491 		free(temp);
492 	}
493 	*list = NULL;
494 }
495 
496 static saddr_t *
parse_suboptions(char * value)497 parse_suboptions(char *value)
498 {
499 	char	*endptr;
500 	saddr_t *psaddr = malloc(sizeof (saddr_t));
501 
502 	/*
503 	 * This must (better) be a segment addr
504 	 */
505 	psaddr->addr =
506 	    strtoull(value, &endptr, 16);
507 
508 	/*
509 	 * Check to make sure strtoul worked correctly (a properly formatted
510 	 * string will terminate in a ':' (if size is given) or an '=' (if size
511 	 * is not specified). Also check to make sure a 0 addr wasn't returned
512 	 * indicating strtoll was unable to convert).
513 	 */
514 	if ((psaddr->addr == 0) || (*endptr != ':' && *endptr != '=')) {
515 		free(psaddr);
516 		(void) fprintf(stderr,
517 		    gettext("%s: invalid option %s\n"),
518 		    progname, value);
519 		usage();
520 	} else {
521 		/* init other fields */
522 		psaddr->length = 0;
523 		psaddr->adv = NO_ADVICE;
524 		psaddr->next = NULL;
525 
526 		/* skip past address */
527 		value = endptr;
528 
529 		/* check for length */
530 		if (*value == ':') {
531 			/* skip the ":" */
532 			value++;
533 			psaddr->length = atosz(value, &endptr);
534 		}
535 
536 		if (*endptr != '=') {
537 			(void) fprintf(stderr,
538 			    gettext("%s: invalid option %s\n"),
539 			    progname, value);
540 			/*
541 			 * if improperly formatted, free mem, print usage, and
542 			 * exit Note: usage ends with a call to exit()
543 			 */
544 			free(psaddr);
545 			usage();
546 		}
547 		/* skip the "=" */
548 		value = endptr + 1;
549 		at_map |= (1 << AT_SEG);
550 		psaddr->adv =
551 		    get_advice(value);
552 	}
553 
554 	return (psaddr);
555 }
556 
557 /*
558  * Create linked list of mappings for current process
559  * In addition, add generic advice and raw advice
560  * entries to merged_list.
561  */
562 /* ARGSUSED */
563 static int
create_maplist(void * arg,const prmap_t * pmp,const char * object_name)564 create_maplist(void *arg, const prmap_t *pmp, const char *object_name)
565 {
566 	const pstatus_t *Psp = Pstatus(Pr);
567 	mapnode_t *newmap = malloc(sizeof (mapnode_t));
568 	saddr_t	*newaddr;
569 	saddr_t	*psaddr;
570 	char	*lname = NULL;
571 	int	i;
572 
573 	if (interrupt)
574 		return (0);
575 
576 	newmap->pmp = malloc(sizeof (prmap_t));
577 	newmap->label[0] = '\0';
578 	newmap->mtypes = 0;
579 	newmap->next = NULL;
580 	(void) memcpy(newmap->pmp, pmp, sizeof (prmap_t));
581 
582 	/*
583 	 * If the mapping is not anon or not part of the heap, make a name
584 	 * for it.  We don't want to report the heap as a.out's data.
585 	 */
586 	if (!(pmp->pr_mflags & MA_ANON) ||
587 	    (pmp->pr_vaddr + pmp->pr_size <= Psp->pr_brkbase ||
588 	    pmp->pr_vaddr >= Psp->pr_brkbase + Psp->pr_brksize)) {
589 		lname = make_name(Pr, lflag, pmp->pr_vaddr, pmp->pr_mapname,
590 		    newmap->label, sizeof (newmap->label));
591 		if (pmp->pr_mflags & MA_SHARED)
592 			newmap->mtypes |= 1 << AT_SHARED;
593 		else
594 			newmap->mtypes |= 1 << AT_PRIVM;
595 	}
596 
597 	if (lname == NULL && (pmp->pr_mflags & MA_ANON)) {
598 		lname = anon_name(newmap->label, Psp, stacks, nstacks,
599 		    pmp->pr_vaddr, pmp->pr_size, pmp->pr_mflags, pmp->pr_shmid,
600 		    &newmap->mtypes);
601 	}
602 
603 	if (lname == NULL && comm_page != INVALID_ADDRESS &&
604 	    pmp->pr_vaddr == comm_page) {
605 		(void) strlcpy(newmap->label, "  [ comm ]",
606 		    sizeof (newmap->label));
607 		lname = newmap->label;
608 	}
609 
610 	/*
611 	 * Add raw advice that applies to this mapping to the merged_list
612 	 */
613 	psaddr = rawadv_list;
614 	/*
615 	 * Advance to point in rawadv_list that applies to this mapping
616 	 */
617 	while (psaddr && psaddr->addr < pmp->pr_vaddr)
618 		psaddr = psaddr->next;
619 	/*
620 	 * Copy over to merged_list, check to see if size needs to be filled in
621 	 */
622 	while (psaddr && psaddr->addr < (pmp->pr_vaddr + pmp->pr_size)) {
623 		newaddr = malloc(sizeof (saddr_t));
624 		(void) memcpy(newaddr, psaddr, sizeof (saddr_t));
625 		insert_addr(&merged_list, newaddr, YESDUPS);
626 		/*
627 		 * For raw advice that is given without size, try to default
628 		 * size to size of mapping (only allowed if raw adv addr is
629 		 * equal to beginning of mapping). Don't change the entry
630 		 * in rawadv_list, only in the merged_list as the mappings
631 		 * (and thus the default sizes) will be different for
632 		 * different processes.
633 		 */
634 		if ((pmp->pr_vaddr == psaddr->addr) && (psaddr->length == 0))
635 			newaddr->length = pmp->pr_size;
636 		psaddr = psaddr->next;
637 	}
638 
639 	/*
640 	 * Put mapping into merged list with no advice, then
641 	 * check to see if any generic advice applies.
642 	 */
643 	newaddr = malloc(sizeof (saddr_t));
644 	newaddr->addr = pmp->pr_vaddr;
645 	newaddr->length = pmp->pr_size;
646 	newaddr->adv = NO_ADVICE;
647 	insert_addr(&merged_list, newaddr, YESDUPS);
648 
649 	newmap->mtypes &= at_map;
650 	for (i = AT_STACK; i >= AT_PRIVM; i--) {
651 		if (newmap->mtypes & (1 << i)) {
652 			assert(generic_adv[i] != NO_ADVICE);
653 			newaddr->adv = generic_adv[i];
654 			break;
655 		}
656 	}
657 
658 	/*
659 	 * Add to linked list of mappings
660 	 */
661 	if (maplist_tail == NULL) {
662 		maplist_head = maplist_tail = newmap;
663 	} else {
664 		maplist_tail->next = newmap;
665 		maplist_tail = newmap;
666 	}
667 
668 
669 	return (0);
670 }
671 
672 /*
673  * Traverse advice list and apply all applicable advice to each region
674  */
675 static int
apply_advice(saddr_t ** advicelist)676 apply_advice(saddr_t **advicelist)
677 {
678 	saddr_t	*psaddr = *advicelist;
679 	saddr_t	*next;
680 	int	i;
681 
682 
683 	while (!interrupt && psaddr != NULL) {
684 		/*
685 		 * Save next pointer since element may be removed before
686 		 * we get a chance to advance psaddr.
687 		 */
688 		next = psaddr->next;
689 
690 		/*
691 		 * Since mappings have been added to the merged list
692 		 * even if no generic advice was given for the map,
693 		 * check to make sure advice exists before bothering
694 		 * with the for loop.
695 		 */
696 		if (psaddr->adv != NO_ADVICE) {
697 			for (i = MADV_NORMAL; i <= MADV_PURGE; i++) {
698 				if ((psaddr->adv & (1 << i)) &&
699 				    (pr_madvise(Pr, (caddr_t)psaddr->addr,
700 				    psaddr->length, i) < 0)) {
701 					/*
702 					 * madvise(3C) call failed trying to
703 					 * apply advice output error and remove
704 					 * from advice list
705 					 */
706 					(void) fprintf(stderr,
707 					    gettext("Error applying "
708 					    "advice (%s) to memory range "
709 					    "[%lx, %lx):\n"),
710 					    advicestr[i], (ulong_t)psaddr->addr,
711 					    (ulong_t)psaddr->addr +
712 					    psaddr->length);
713 					perror("madvise");
714 					/*
715 					 * Clear this advice from the advice
716 					 * mask. If no more advice is given
717 					 * for this element, remove element
718 					 * from list.
719 					 */
720 					psaddr->adv &= ~(1 << i);
721 					if (psaddr->adv == 0) {
722 						delete_addr(advicelist, psaddr);
723 						break;
724 					}
725 				}
726 			}
727 		}
728 		psaddr = next;
729 	}
730 	return (0);
731 }
732 
733 /*
734  * Set advice but keep mutual exclusive property of advice groupings
735  */
736 static void
set_advice(int * combined_adv,int new_adv)737 set_advice(int *combined_adv, int new_adv)
738 {
739 	/*
740 	 * Since advice falls in 3 groups of mutually exclusive options,
741 	 * clear previous value if new advice overwrites that group.
742 	 */
743 
744 	/*
745 	 * If this is the first advice to be applied, clear invalid value (-1)
746 	 */
747 	if (*combined_adv == -1)
748 		*combined_adv = 0;
749 
750 	if (new_adv & GRP1_ADV)
751 		*combined_adv &= ~GRP1_ADV;
752 	else if (new_adv & GRP2_ADV)
753 		*combined_adv &= ~GRP2_ADV;
754 	else
755 		*combined_adv &= ~GRP3_ADV;
756 
757 	*combined_adv |= new_adv;
758 }
759 
760 /*
761  * Create chopped list from merged list for use with verbose output
762  */
763 static void
create_choplist(saddr_t ** choppedlist,saddr_t * mergedlist)764 create_choplist(saddr_t **choppedlist, saddr_t *mergedlist)
765 {
766 	saddr_t	*mlptr, *clptr;
767 
768 	for (mlptr = mergedlist; mlptr != NULL; mlptr = mlptr->next) {
769 		clptr = malloc(sizeof (saddr_t));
770 		clptr->addr = mlptr->addr;
771 		clptr->length = 0;
772 		/*
773 		 * Initialize the adv to -1 as an indicator for invalid
774 		 * elements in the chopped list (created from gaps between
775 		 * memory maps).
776 		 */
777 		clptr->adv = -1;
778 		clptr->next = NULL;
779 		insert_addr(choppedlist, clptr, NODUPS);
780 
781 		clptr = malloc(sizeof (saddr_t));
782 		clptr->addr = mlptr->addr + mlptr->length;
783 		clptr->length = 0;
784 		/*
785 		 * Again, initialize to -1 as an indicatorfor invalid elements
786 		 */
787 		clptr->adv = -1;
788 		clptr->next = NULL;
789 		insert_addr(choppedlist, clptr, NODUPS);
790 	}
791 
792 	for (clptr = *choppedlist; clptr != NULL; clptr = clptr->next) {
793 		if (clptr->next) {
794 			clptr->length = clptr->next->addr - clptr->addr;
795 		} else {
796 			/*
797 			 * must be last element, now that we've calculated
798 			 * all segment lengths, we can remove this node
799 			 */
800 			delete_addr(choppedlist, clptr);
801 			break;
802 		}
803 	}
804 
805 	for (mlptr = mergedlist; mlptr != NULL; mlptr = mlptr->next) {
806 		for (clptr = *choppedlist; clptr != NULL; clptr = clptr->next) {
807 			if (mlptr->addr <= clptr->addr &&
808 			    mlptr->addr + mlptr->length >=
809 			    clptr->addr + clptr->length)
810 				/*
811 				 * set_advice() will take care of conflicting
812 				 * advice by taking only the last advice
813 				 * applied for each of the 3 groups of advice.
814 				 */
815 				set_advice(&clptr->adv, mlptr->adv);
816 			if (mlptr->addr + mlptr->length <
817 			    clptr->addr)
818 				break;
819 		}
820 	}
821 }
822 
823 /*
824  * Print advice in pmap style for verbose output
825  */
826 static void
print_advice(saddr_t * advlist,mapnode_t * maplist)827 print_advice(saddr_t *advlist, mapnode_t *maplist)
828 {
829 	saddr_t		*psaddr = advlist;
830 	mapnode_t	*pmapnode;
831 	char		*advice;
832 
833 	pmapnode = maplist;
834 
835 	while (psaddr) {
836 		/*
837 		 * Using indicator flag from create_choppedlist, we know
838 		 * which entries in the chopped_list are gaps and should
839 		 * not be printed.
840 		 */
841 		if (psaddr->adv == -1) {
842 			psaddr = psaddr->next;
843 			continue;
844 		}
845 
846 		while (pmapnode && (pmapnode->pmp->pr_vaddr +
847 		    pmapnode->pmp->pr_size <= psaddr->addr))
848 			pmapnode = pmapnode->next;
849 
850 		advice = advtostr(psaddr->adv);
851 
852 		/*
853 		 * Print segment mapping and advice if there is any, or just a
854 		 * segment mapping.
855 		 */
856 		if (strlen(advice) > 0) {
857 			(void) printf("%.*lX %*uK %6s %s\t%s\n",
858 			    addr_width, (ulong_t)psaddr->addr, size_width - 1,
859 			    (int)ROUNDUP_KB(psaddr->length),
860 			    mflags(pmapnode->pmp->pr_mflags), pmapnode->label,
861 			    advice);
862 		} else {
863 			(void) printf("%.*lX %*uK %6s %s\n",
864 			    addr_width, (ulong_t)psaddr->addr, size_width - 1,
865 			    (int)ROUNDUP_KB(psaddr->length),
866 			    mflags(pmapnode->pmp->pr_mflags), pmapnode->label);
867 		}
868 		psaddr = psaddr->next;
869 
870 	}
871 }
872 
873 /*
874  * Call madvise(3c) in the context of the target process
875  */
876 static int
pr_madvise(struct ps_prochandle * Pr,caddr_t addr,size_t len,int advice)877 pr_madvise(struct ps_prochandle *Pr, caddr_t addr, size_t len, int advice)
878 {
879 	return (pr_memcntl(Pr, addr, len, MC_ADVISE,
880 	    (caddr_t)(uintptr_t)advice, 0, 0));
881 }
882 
883 static char *
mflags(uint_t arg)884 mflags(uint_t arg)
885 {
886 	static char code_buf[80];
887 
888 	/*
889 	 * rwxsR
890 	 *
891 	 * r - segment is readable
892 	 * w - segment is writable
893 	 * x - segment is executable
894 	 * s - segment is shared
895 	 * R - segment is mapped MAP_NORESERVE
896 	 *
897 	 */
898 	(void) snprintf(code_buf, sizeof (code_buf), "%c%c%c%c%c ",
899 	    arg & MA_READ ? 'r' : '-',
900 	    arg & MA_WRITE ? 'w' : '-',
901 	    arg & MA_EXEC ? 'x' : '-',
902 	    arg & MA_SHARED ? 's' : '-',
903 	    arg & MA_NORESERVE ? 'R' : '-');
904 
905 	return (code_buf);
906 }
907 
908 /*
909  * Convert advice to a string containing a commented list of applicable advice
910  */
911 static char *
advtostr(int adv)912 advtostr(int adv)
913 {
914 	static char buf[50];
915 	int i;
916 
917 	*buf = '\0';
918 
919 	if (adv != NO_ADVICE) {
920 		for (i = MADV_NORMAL; i <= MADV_PURGE; i++) {
921 			if (adv & (1 << i)) {
922 				/*
923 				 * check if it's the first advice entry
924 				 */
925 				if (*buf == '\0') {
926 					(void) snprintf(buf, sizeof (buf) - 1,
927 					    "<= %s", advicestr[i]);
928 				} else {
929 					(void) strlcat(buf, ",", sizeof (buf));
930 					(void) strlcat(buf, advicestr[i],
931 					    sizeof (buf));
932 				}
933 			}
934 		}
935 	}
936 
937 	return (buf);
938 }
939 
940 /*
941  * Handler for catching signals from terminal
942  */
943 /* ARGSUSED */
944 static void
intr(int sig)945 intr(int sig)
946 {
947 	interrupt++;
948 }
949 
950 int
main(int argc,char ** argv)951 main(int argc, char **argv)
952 {
953 	int Fflag = 0;
954 	int rc = 0;
955 	int opt, subopt;
956 	int tmpadv;
957 	char	*options, *value;
958 	saddr_t	*psaddr;
959 	mapnode_t *pmapnode, *tempmapnode;
960 
961 	(void) setlocale(LC_ALL, "");
962 	(void) textdomain(TEXT_DOMAIN);
963 
964 	/*
965 	 * Get name of program for error messages
966 	 */
967 	progname = basename(argv[0]);
968 
969 	/*
970 	 * Not much to do when only name of program given
971 	 */
972 	if (argc == 1)
973 		usage();
974 
975 	/*
976 	 * Catch signals from terminal, so they can be handled asynchronously
977 	 * when we're ready instead of when we're not (;-)
978 	 */
979 	if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
980 		(void) sigset(SIGHUP, intr);
981 	if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
982 		(void) sigset(SIGINT, intr);
983 	if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
984 		(void) sigset(SIGQUIT, intr);
985 	(void) sigset(SIGPIPE, intr);
986 	(void) sigset(SIGTERM, intr);
987 
988 	/*
989 	 * Parse options, record generic advice if any and create
990 	 * rawadv_list from specific address advice.
991 	 */
992 
993 	while ((opt = getopt(argc, argv, "Flo:v")) != EOF) {
994 		switch (opt) {
995 		case 'o':
996 			options = optarg;
997 			while (*options != '\0') {
998 				subopt = getsubopt(&options, suboptstr,
999 				    &value);
1000 				switch (subopt) {
1001 				case AT_PRIVM:
1002 				case AT_HEAP:
1003 				case AT_SHARED:
1004 				case AT_STACK:
1005 					at_map |= (1 << subopt);
1006 					tmpadv = get_advice(value);
1007 					set_advice(&generic_adv[subopt],
1008 					    tmpadv);
1009 					break;
1010 				default:
1011 					at_map |= (1 << AT_SEG);
1012 					psaddr = parse_suboptions(value);
1013 					if (psaddr == NULL) {
1014 						usage();
1015 					} else {
1016 						insert_addr(&rawadv_list,
1017 						    psaddr, YESDUPS);
1018 					}
1019 					break;
1020 				}
1021 			}
1022 			break;
1023 		case 'v':
1024 			opt_verbose = 1;
1025 			break;
1026 		case 'F':		/* force grabbing (no O_EXCL) */
1027 			Fflag = PGRAB_FORCE;
1028 			break;
1029 		case 'l':		/* show unresolved link map names */
1030 			lflag = 1;
1031 			break;
1032 		default:
1033 			usage();
1034 			break;
1035 		}
1036 	}
1037 
1038 	argc -= optind;
1039 	argv += optind;
1040 
1041 	if (argc <= 0) {
1042 		usage();
1043 	}
1044 
1045 	(void) proc_initstdio();
1046 
1047 	/*
1048 	 * Iterate through all pid arguments, create new merged_list, maplist,
1049 	 * (and chopped_list if using verbose output) based on each process'
1050 	 * memory map.
1051 	 */
1052 
1053 	while (!interrupt && argc-- > 0) {
1054 		char *arg;
1055 		int gcode;
1056 		psinfo_t psinfo;
1057 
1058 		(void) proc_flushstdio();
1059 
1060 		if ((Pr = proc_arg_grab(arg = *argv++, PR_ARG_PIDS,
1061 		    PGRAB_RETAIN | Fflag, &gcode)) == NULL) {
1062 			(void) fprintf(stderr,
1063 			    gettext("%s: cannot examine %s: %s\n"),
1064 			    progname, arg, Pgrab_error(gcode));
1065 			rc++;
1066 			continue;
1067 		}
1068 
1069 
1070 		addr_width =
1071 		    (Pstatus(Pr)->pr_dmodel == PR_MODEL_LP64) ? 16 : 8;
1072 		size_width =
1073 		    (Pstatus(Pr)->pr_dmodel == PR_MODEL_LP64) ? 11 : 8;
1074 		(void) memcpy(&psinfo, Ppsinfo(Pr), sizeof (psinfo_t));
1075 
1076 		if (opt_verbose) {
1077 			proc_unctrl_psinfo(&psinfo);
1078 			(void) printf("%d:\t%.70s\n",
1079 			    (int)psinfo.pr_pid, psinfo.pr_psargs);
1080 		}
1081 
1082 		/*
1083 		 * Get mappings for a process unless it is a system process.
1084 		 */
1085 		if (!(Pstatus(Pr)->pr_flags & PR_ISSYS)) {
1086 			nstacks = psinfo.pr_nlwp * 2;
1087 			stacks = calloc(nstacks, sizeof (stacks[0]));
1088 			if (stacks != NULL) {
1089 				int n = 0;
1090 				(void) Plwp_iter(Pr, getstack, &n);
1091 				qsort(stacks, nstacks, sizeof (stacks[0]),
1092 				    cmpstacks);
1093 			}
1094 
1095 			if (Pgetauxval(Pr, AT_BASE) != -1L &&
1096 			    Prd_agent(Pr) == NULL) {
1097 				(void) fprintf(stderr,
1098 				    gettext("%s: warning: "
1099 				    "librtld_db failed to initialize; "
1100 				    "shared library information will not "
1101 				    "be available\n"),
1102 				    progname);
1103 			}
1104 
1105 			comm_page = Pgetauxval(Pr, AT_SUN_COMMPAGE);
1106 
1107 			/*
1108 			 * Create linked list of mappings for current process
1109 			 * In addition, add generic advice and raw advice
1110 			 * entries to merged_list.
1111 			 * e.g. if rawadv_list contains:
1112 			 *   [0x38000,0x3a000) = adv1
1113 			 *   [0x3a000,0x3c000) = adv2
1114 			 * and there is generic advice:
1115 			 *   heap = adv3
1116 			 * where heap corresponds to 0x38000, then merged_list
1117 			 * will contain:
1118 			 *   ... (include all other mappings from process)
1119 			 *   [0x38000,0x3c000) = adv3
1120 			 *   [0x38000,0x3a000) = adv1
1121 			 *   [0x3a000,0x3c000) = adv2
1122 			 *   ... (include all other mappings from process)
1123 			 */
1124 			assert(merged_list == NULL);
1125 			maplist_head = maplist_tail = NULL;
1126 			rc += Pmapping_iter(Pr, (proc_map_f *)create_maplist,
1127 			    NULL);
1128 
1129 			/*
1130 			 * Apply advice by iterating through merged list
1131 			 */
1132 			(void) apply_advice(&merged_list);
1133 
1134 			if (opt_verbose) {
1135 				assert(chopped_list == NULL);
1136 				/*
1137 				 * Create chopped_list from merged_list
1138 				 */
1139 				create_choplist(&chopped_list, merged_list);
1140 
1141 				/*
1142 				 * Iterate through maplist and output as
1143 				 * given by chopped_list
1144 				 */
1145 				print_advice(chopped_list, maplist_head);
1146 				delete_list(&chopped_list);
1147 			}
1148 
1149 			delete_list(&merged_list);
1150 
1151 			/*
1152 			 * Clear maplist
1153 			 */
1154 			pmapnode = maplist_head;
1155 			while (pmapnode) {
1156 				tempmapnode = pmapnode;
1157 				pmapnode = pmapnode->next;
1158 				free(tempmapnode);
1159 			}
1160 
1161 			if (stacks != NULL) {
1162 				free(stacks);
1163 				stacks = NULL;
1164 			}
1165 		}
1166 
1167 		Prelease(Pr, 0);
1168 	}
1169 
1170 	(void) proc_finistdio();
1171 
1172 	return (rc);
1173 }
1174