xref: /titanic_41/usr/src/cmd/ptools/pmadvise/pmadvise.c (revision e67272d0d0c6a150b2455a71938e8de04a5af292)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2015, Joyent, Inc. All rights reserved.
29  */
30 
31 /*
32  * pmadvise
33  *
34  * ptool wrapper for madvise(3C) to apply memory advice to running processes
35  *
36  * usage:	pmadvise -o option[,option] [-v] [-F] pid ...
37  *  (Give "advice" about a process's memory)
38  *  -o option[,option]: options are
39  *      private=<advice>
40  *      shared=<advice>
41  *      heap=<advice>
42  *      stack=<advice>
43  *      <segaddr>[:<length>]=<advice>
44  *     valid <advice> is one of:
45  *      normal, random, sequential, willneed, dontneed,
46  *      free, access_lwp, access_many, access_default
47  *  -v: verbose output
48  *  -F: force grabbing of the target process(es)
49  *  -l: show unresolved dynamic linker map names
50  *  pid: process id list
51  *
52  *
53  * Advice passed to this tool are organized into various lists described here:
54  *  rawadv_list: includes all specific advice from command line (specific
55  *               advice being those given to a particular address range rather
56  *               than a type like "heap" or "stack".  In contrast, these
57  *               types are referred to as generic advice). Duplicates allowed.
58  *               List ordered by addr, then by size (largest size first).
59  *               Created once per run.
60  *  merged_list: includes all specific advice from the rawadv_list as well as
61  *               all generic advice.  This must be recreated for each process
62  *               as the generic advice will apply to different regions for
63  *               different processes. Duplicates allowed. List ordered by addr,
64  *               then by size (largest size first). Created once per pid.
65  *  chopped_list: used for verbose output only. This list parses the merged
66  *                list such that it eliminates any overlap and combines the
67  *                advice. Easiest to think of this visually: if you take all
68  *                the advice in the merged list and lay them down on a memory
69  *                range of the entire process (laying on top of each other when
70  *                necessary), then flatten them into one layer, combining advice
71  *                in the case of overlap, you get the chopped_list of advice.
72  *                Duplicate entries not allowed (since there is no overlap by
73  *                definition in this list).  List ordered by addr. Created once
74  *                per pid.
75  *
76  *                Example:
77  *                   merged_list:   |-----adv1----|---------adv3---------|
78  *                                       |--adv2--|--adv4--|-----adv5----|
79  *                                                  ||
80  *                                                  \/
81  *                   chopped_list:  |adv1|-adv1,2-|-adv3,4-|----adv3,5---|
82  *
83  *  maplist: list of memory mappings for a particular process. Used to create
84  *           generic advice entries for merged_list and for pmap like verbose
85  *           output. Created once per pid.
86  *
87  * Multiple lists are necessary because the actual advice applied given a set
88  * of generic and specific advice changes from process to process, so for each
89  * pid pmadvise is passed, it must create a new merged_list from which to apply
90  * advice (and a new chopped_list if verbose output is requested).
91  *
92  * Pseudo-code:
93  * I.	Input advice from command line
94  * II.	Create [raw advice list] of specific advice
95  * III.	Iterate through PIDs:
96  *	A.	Create [map list]
97  *	B.	Merge generic advice and [raw advice list] into [merged list]
98  *	C.	Apply advice from [merged list]; upon error:
99  *		i.	output madvise error message
100  *		ii.	remove element from [merged list]
101  *	D.	If verbose output:
102  *		i.	Create [chopped list] from [merged list]
103  *		ii.	Iterate through [map list]:
104  *			a.	output advice as given by [merged list]
105  *		iii.	Delete [chopped list]
106  *	E.	Delete [merged list]
107  *	F.	Delete [map list]
108  */
109 
110 #include <stdio.h>
111 #include <stdlib.h>
112 #include <unistd.h>
113 #include <ctype.h>
114 #include <fcntl.h>
115 #include <string.h>
116 #include <dirent.h>
117 #include <limits.h>
118 #include <link.h>
119 #include <libelf.h>
120 #include <locale.h>
121 #include <sys/types.h>
122 #include <sys/mman.h>
123 #include <sys/stat.h>
124 #include <sys/mkdev.h>
125 #include <assert.h>
126 #include <libproc.h>
127 #include <libgen.h>
128 #include <signal.h>
129 
130 #include "pmap_common.h"
131 
132 #ifndef	TEXT_DOMAIN			/* should be defined by cc -D */
133 #define	TEXT_DOMAIN	"SYS_TEST"	/* use this only if it wasn't */
134 #endif
135 
136 #define	KILOBYTE	1024
137 
138 /*
139  * Round up the value to the nearest kilobyte
140  */
141 #define	ROUNDUP_KB(x)	(((x) + (KILOBYTE - 1)) / KILOBYTE)
142 
143 #define	NO_ADVICE		0
144 
145 /*
146  * The following definitions are used as the third argument in insert_addr()
147  *   NODUPS = no duplicates are not allowed, thus if the addr being inserted
148  *   already exists in the list, return without inserting again.
149  *
150  *   YESDUPS = yes duplicates are allowed, thus always insert the addr
151  *   regardless of whether it already exists in the list or not.
152  */
153 #define	NODUPS	1
154 #define	YESDUPS	0
155 
156 /*
157  * Advice that can be passed to madvise fit into three groups that each
158  * contain 3 mutually exclusive options.  These groups are defined below:
159  *   Group 1: normal, random, sequential
160  *   Group 2: willneed, dontneed, free, purge
161  *   Group 3: default, accesslwp, accessmany
162  * Thus, advice that includes (at most) one from each group is valid.
163  *
164  * The following #define's are used as masks to determine which group(s) a
165  * particular advice fall under.
166  */
167 
168 #define	GRP1_ADV	(1 << MADV_NORMAL | 1 << MADV_RANDOM | \
169 			1 << MADV_SEQUENTIAL)
170 #define	GRP2_ADV	(1 << MADV_WILLNEED | 1 << MADV_DONTNEED | \
171 			1 << MADV_FREE | 1 << MADV_PURGE)
172 #define	GRP3_ADV	(1 << MADV_ACCESS_DEFAULT | 1 << MADV_ACCESS_LWP | \
173 			1 << MADV_ACCESS_MANY)
174 
175 static	int	create_maplist(void *, const prmap_t *, const char *);
176 static	int	pr_madvise(struct ps_prochandle *, caddr_t, size_t, int);
177 
178 static	char	*mflags(uint_t);
179 static	char	*advtostr(int);
180 
181 static	int	lflag = 0;
182 
183 static	int	addr_width, size_width;
184 static	char	*progname;
185 static	struct ps_prochandle *Pr;
186 
187 static	lwpstack_t *stacks;
188 static	uint_t	nstacks;
189 
190 static char	*suboptstr[] = {
191 	"private",
192 	"shared",
193 	"heap",
194 	"stack",
195 	NULL
196 };
197 
198 
199 int	generic_adv[] = {NO_ADVICE, NO_ADVICE, NO_ADVICE, NO_ADVICE};
200 int	at_map = 0;
201 
202 typedef struct saddr_struct {
203 	uintptr_t	addr;
204 	size_t		length;
205 	int		adv;
206 	struct saddr_struct	*next;
207 } saddr_t;
208 static int	apply_advice(saddr_t **);
209 static void	set_advice(int *, int);
210 static void	create_choplist(saddr_t **, saddr_t *);
211 
212 /*
213  * The segment address advice from the command line
214  */
215 saddr_t	*rawadv_list = NULL;
216 /*
217  * The rawadv_list + list entries for the generic advice (if any).
218  * This must be recreated for each PID as the memory maps might be different.
219  */
220 saddr_t *merged_list = NULL;
221 /*
222  * The merged_list cut up so as to remove all overlap
223  * e.g. if merged_list contained two entries:
224  *
225  * [0x38000:0x3e000) = adv1
226  * [0x3a000:0x3c000) = adv2
227  *
228  * the chopped list will contain three entries:
229  *
230  * [0x38000:0x3a000) = adv1
231  * [0x3a000:0x3c000) = adv1,adv2
232  * [0x3c000:0x3e000) = adv1
233  *
234  */
235 saddr_t *chopped_list = NULL;
236 
237 typedef struct mapnode_struct {
238 	prmap_t			*pmp;
239 	char			label[PATH_MAX];
240 	int			mtypes;
241 	struct mapnode_struct	*next;
242 } mapnode_t;
243 
244 mapnode_t *maplist_head = NULL;
245 mapnode_t *maplist_tail = NULL;
246 static void	print_advice(saddr_t *, mapnode_t *);
247 
248 int	opt_verbose;
249 
250 static char	*advicestr[] = {
251 	"normal",
252 	"random",
253 	"sequential",
254 	"willneed",
255 	"dontneed",
256 	"free",
257 	"access_default",
258 	"access_lwp",
259 	"access_many"
260 };
261 
262 /*
263  * How many signals caught from terminal
264  * We bail out as soon as possible when interrupt is set
265  */
266 static int	interrupt = 0;
267 
268 /*
269  * Interrupt handler
270  */
271 static void	intr(int);
272 
273 /*
274  * Iterative function passed to Plwp_iter to
275  * get alt and main stacks for given lwp.
276  */
277 static int
getstack(void * data,const lwpstatus_t * lsp)278 getstack(void *data, const lwpstatus_t *lsp)
279 {
280 	int *np = (int *)data;
281 
282 	if (Plwp_alt_stack(Pr, lsp->pr_lwpid, &stacks[*np].lwps_stack) == 0) {
283 		stacks[*np].lwps_stack.ss_flags |= SS_ONSTACK;
284 		stacks[*np].lwps_lwpid = lsp->pr_lwpid;
285 		(*np)++;
286 	}
287 
288 	if (Plwp_main_stack(Pr, lsp->pr_lwpid, &stacks[*np].lwps_stack) == 0) {
289 		stacks[*np].lwps_lwpid = lsp->pr_lwpid;
290 		(*np)++;
291 	}
292 
293 	return (0);
294 }
295 
296 /*
297  * Prints usage and exits
298  */
299 static void
usage()300 usage()
301 {
302 	(void) fprintf(stderr,
303 	    gettext("usage:\t%s [-o option[,option]] [-Flv] pid ...\n"),
304 	    progname);
305 	(void) fprintf(stderr,
306 	    gettext("    (Give \"advice\" about a process's memory)\n"
307 	    "    -o option[,option]: options are\n"
308 	    "        private=<advice>\n"
309 	    "        shared=<advice>\n"
310 	    "        heap=<advice>\n"
311 	    "        stack=<advice>\n"
312 	    "        <segaddr>[:<length>]=<advice>\n"
313 	    "       valid <advice> is one of:\n"
314 	    "        normal, random, sequential, willneed, dontneed,\n"
315 	    "        free, access_lwp, access_many, access_default\n"
316 	    "    -v: verbose output\n"
317 	    "    -F: force grabbing of the target process(es)\n"
318 	    "    -l: show unresolved dynamic linker map names\n"
319 	    "    pid: process id list\n"));
320 	exit(2);
321 }
322 
323 /*
324  * Function to parse advice from options string
325  */
326 static int
get_advice(char * optarg)327 get_advice(char *optarg)
328 {
329 	/*
330 	 * Determine which advice is given, we use shifted values as
331 	 * multiple pieces of advice may apply for a particular region.
332 	 * (See comment above regarding GRP[1,2,3]_ADV definitions for
333 	 * breakdown of advice groups).
334 	 */
335 	if (strcmp(optarg, "access_default") == 0)
336 		return (1 << MADV_ACCESS_DEFAULT);
337 	else if (strcmp(optarg, "access_many") == 0)
338 		return (1 << MADV_ACCESS_MANY);
339 	else if (strcmp(optarg, "access_lwp") == 0)
340 		return (1 << MADV_ACCESS_LWP);
341 	else if (strcmp(optarg, "sequential") == 0)
342 		return (1 << MADV_SEQUENTIAL);
343 	else if (strcmp(optarg, "willneed") == 0)
344 		return (1 << MADV_WILLNEED);
345 	else if (strcmp(optarg, "dontneed") == 0)
346 		return (1 << MADV_DONTNEED);
347 	else if (strcmp(optarg, "random") == 0)
348 		return (1 << MADV_RANDOM);
349 	else if (strcmp(optarg, "normal") == 0)
350 		return (1 << MADV_NORMAL);
351 	else if (strcmp(optarg, "free") == 0)
352 		return (1 << MADV_FREE);
353 	else if (strcmp(optarg, "purge") == 0)
354 		return (1 << MADV_PURGE);
355 	else {
356 		(void) fprintf(stderr, gettext("%s: invalid advice: %s\n"),
357 		    progname, optarg);
358 		usage();
359 		return (-1);
360 	}
361 }
362 
363 /*
364  * Function to convert character size indicators into actual size
365  * (i.e., 123M => sz = 123 * 1024 * 1024)
366  */
367 static size_t
atosz(char * optarg,char ** endptr)368 atosz(char *optarg, char **endptr)
369 {
370 	size_t	sz = 0;
371 
372 	if (optarg == NULL || optarg[0] == '\0')
373 		return (0);
374 
375 	sz = strtoll(optarg, endptr, 0);
376 
377 	switch (**endptr) {
378 	case 'E':
379 	case 'e':
380 		sz *= KILOBYTE;
381 		/* FALLTHRU */
382 	case 'P':
383 	case 'p':
384 		sz *= KILOBYTE;
385 		/* FALLTHRU */
386 	case 'T':
387 	case 't':
388 		sz *= KILOBYTE;
389 		/* FALLTHRU */
390 	case 'G':
391 	case 'g':
392 		sz *= KILOBYTE;
393 		/* FALLTHRU */
394 	case 'M':
395 	case 'm':
396 		sz *= KILOBYTE;
397 		/* FALLTHRU */
398 	case 'K':
399 	case 'k':
400 		sz *= KILOBYTE;
401 		/* FALLTHRU */
402 	case 'B':
403 	case 'b':
404 		(*endptr)++;
405 		/* FALLTHRU */
406 	default:
407 		break;
408 	}
409 	return (sz);
410 }
411 
412 /*
413  * Inserts newaddr into list.  dups indicates whether we allow duplicate
414  * addr entries in the list (valid values are NODUPS and YESDUPS).
415  */
416 static void
insert_addr(saddr_t ** list,saddr_t * newaddr,int dups)417 insert_addr(saddr_t **list, saddr_t *newaddr, int dups)
418 {
419 	saddr_t *prev = *list;
420 	saddr_t *psaddr;
421 
422 	if (*list == NULL) {
423 		newaddr->next = *list;
424 		*list = newaddr;
425 		return;
426 	}
427 
428 	for (psaddr = (*list)->next; psaddr != NULL; psaddr = psaddr->next) {
429 		if ((dups == NODUPS) && (psaddr->addr == newaddr->addr)) {
430 			free(newaddr);
431 			return;
432 		}
433 
434 		/*
435 		 * primary level of comparison is by address; smaller addr 1st
436 		 * secondary level of comparison is by length; bigger length 1st
437 		 */
438 		if ((psaddr->addr > newaddr->addr) ||
439 		    (psaddr->addr == newaddr->addr &&
440 		    psaddr->length < newaddr->length))
441 			break;
442 
443 		prev = psaddr;
444 	}
445 
446 	prev->next = newaddr;
447 	newaddr->next = psaddr;
448 }
449 
450 /*
451  * Deletes given element from list
452  */
453 static void
delete_addr(saddr_t ** list,saddr_t * delme)454 delete_addr(saddr_t **list, saddr_t *delme)
455 {
456 	saddr_t	*prev = *list;
457 
458 	if (delme == *list) {
459 		*list = delme->next;
460 		free(delme);
461 		return;
462 	}
463 
464 	while (prev != NULL && prev->next != delme) {
465 		prev = prev->next;
466 	}
467 
468 	if (prev) {
469 		prev->next = delme->next;
470 		free(delme);
471 	}
472 }
473 
474 /*
475  * Delete entire list
476  */
477 static void
delete_list(saddr_t ** list)478 delete_list(saddr_t **list)
479 {
480 	saddr_t *psaddr = *list;
481 
482 	while (psaddr != NULL) {
483 		saddr_t *temp = psaddr;
484 
485 		psaddr = psaddr->next;
486 		free(temp);
487 	}
488 	*list = NULL;
489 }
490 
491 static saddr_t *
parse_suboptions(char * value)492 parse_suboptions(char *value)
493 {
494 	char	*endptr;
495 	saddr_t *psaddr = malloc(sizeof (saddr_t));
496 
497 	/*
498 	 * This must (better) be a segment addr
499 	 */
500 	psaddr->addr =
501 	    strtoull(value, &endptr, 16);
502 
503 	/*
504 	 * Check to make sure strtoul worked correctly (a properly formatted
505 	 * string will terminate in a ':' (if size is given) or an '=' (if size
506 	 * is not specified). Also check to make sure a 0 addr wasn't returned
507 	 * indicating strtoll was unable to convert).
508 	 */
509 	if ((psaddr->addr == 0) || (*endptr != ':' && *endptr != '=')) {
510 		free(psaddr);
511 		(void) fprintf(stderr,
512 		    gettext("%s: invalid option %s\n"),
513 		    progname, value);
514 		usage();
515 	} else {
516 		/* init other fields */
517 		psaddr->length = 0;
518 		psaddr->adv = NO_ADVICE;
519 		psaddr->next = NULL;
520 
521 		/* skip past address */
522 		value = endptr;
523 
524 		/* check for length */
525 		if (*value == ':') {
526 			/* skip the ":" */
527 			value++;
528 			psaddr->length = atosz(value, &endptr);
529 		}
530 
531 		if (*endptr != '=') {
532 			(void) fprintf(stderr,
533 			    gettext("%s: invalid option %s\n"),
534 			    progname, value);
535 			/*
536 			 * if improperly formatted, free mem, print usage, and
537 			 * exit Note: usage ends with a call to exit()
538 			 */
539 			free(psaddr);
540 			usage();
541 		}
542 		/* skip the "=" */
543 		value = endptr + 1;
544 		at_map |= (1 << AT_SEG);
545 		psaddr->adv =
546 		    get_advice(value);
547 	}
548 
549 	return (psaddr);
550 }
551 
552 /*
553  * Create linked list of mappings for current process
554  * In addition, add generic advice and raw advice
555  * entries to merged_list.
556  */
557 /* ARGSUSED */
558 static int
create_maplist(void * arg,const prmap_t * pmp,const char * object_name)559 create_maplist(void *arg, const prmap_t *pmp, const char *object_name)
560 {
561 	const 		pstatus_t *Psp = Pstatus(Pr);
562 	mapnode_t *newmap = malloc(sizeof (mapnode_t));
563 	saddr_t	*newaddr;
564 	saddr_t	*psaddr;
565 	char	*lname = NULL;
566 	int	i;
567 
568 	if (interrupt)
569 		return (0);
570 
571 	newmap->pmp = malloc(sizeof (prmap_t));
572 	newmap->label[0] = '\0';
573 	newmap->mtypes = 0;
574 	newmap->next = NULL;
575 	(void) memcpy(newmap->pmp, pmp, sizeof (prmap_t));
576 
577 	/*
578 	 * If the mapping is not anon or not part of the heap, make a name
579 	 * for it.  We don't want to report the heap as a.out's data.
580 	 */
581 	if (!(pmp->pr_mflags & MA_ANON) ||
582 	    (pmp->pr_vaddr + pmp->pr_size <= Psp->pr_brkbase ||
583 	    pmp->pr_vaddr >= Psp->pr_brkbase + Psp->pr_brksize)) {
584 		lname = make_name(Pr, lflag, pmp->pr_vaddr, pmp->pr_mapname,
585 		    newmap->label, sizeof (newmap->label));
586 		if (pmp->pr_mflags & MA_SHARED)
587 			newmap->mtypes |= 1 << AT_SHARED;
588 		else
589 			newmap->mtypes |= 1 << AT_PRIVM;
590 	}
591 
592 	if (lname == NULL && (pmp->pr_mflags & MA_ANON)) {
593 		lname = anon_name(newmap->label, Psp, stacks, nstacks,
594 		    pmp->pr_vaddr, pmp->pr_size, pmp->pr_mflags, pmp->pr_shmid,
595 		    &newmap->mtypes);
596 	}
597 
598 	/*
599 	 * Add raw advice that applies to this mapping to the merged_list
600 	 */
601 	psaddr = rawadv_list;
602 	/*
603 	 * Advance to point in rawadv_list that applies to this mapping
604 	 */
605 	while (psaddr && psaddr->addr < pmp->pr_vaddr)
606 		psaddr = psaddr->next;
607 	/*
608 	 * Copy over to merged_list, check to see if size needs to be filled in
609 	 */
610 	while (psaddr && psaddr->addr < (pmp->pr_vaddr + pmp->pr_size)) {
611 		newaddr = malloc(sizeof (saddr_t));
612 		(void) memcpy(newaddr, psaddr, sizeof (saddr_t));
613 		insert_addr(&merged_list, newaddr, YESDUPS);
614 		/*
615 		 * For raw advice that is given without size, try to default
616 		 * size to size of mapping (only allowed if raw adv addr is
617 		 * equal to beginning of mapping). Don't change the entry
618 		 * in rawadv_list, only in the merged_list as the mappings
619 		 * (and thus the default sizes) will be different for
620 		 * different processes.
621 		 */
622 		if ((pmp->pr_vaddr == psaddr->addr) && (psaddr->length == 0))
623 			newaddr->length = pmp->pr_size;
624 		psaddr = psaddr->next;
625 	}
626 
627 	/*
628 	 * Put mapping into merged list with no advice, then
629 	 * check to see if any generic advice applies.
630 	 */
631 	newaddr = malloc(sizeof (saddr_t));
632 	newaddr->addr = pmp->pr_vaddr;
633 	newaddr->length = pmp->pr_size;
634 	newaddr->adv = NO_ADVICE;
635 	insert_addr(&merged_list, newaddr, YESDUPS);
636 
637 	newmap->mtypes &= at_map;
638 	for (i = AT_STACK; i >= AT_PRIVM; i--) {
639 		if (newmap->mtypes & (1 << i)) {
640 			assert(generic_adv[i] != NO_ADVICE);
641 			newaddr->adv = generic_adv[i];
642 			break;
643 		}
644 	}
645 
646 	/*
647 	 * Add to linked list of mappings
648 	 */
649 	if (maplist_tail == NULL) {
650 		maplist_head = maplist_tail = newmap;
651 	} else {
652 		maplist_tail->next = newmap;
653 		maplist_tail = newmap;
654 	}
655 
656 
657 	return (0);
658 }
659 
660 /*
661  * Traverse advice list and apply all applicable advice to each region
662  */
663 static int
apply_advice(saddr_t ** advicelist)664 apply_advice(saddr_t **advicelist)
665 {
666 	saddr_t	*psaddr = *advicelist;
667 	saddr_t	*next;
668 	int	i;
669 
670 
671 	while (!interrupt && psaddr != NULL) {
672 		/*
673 		 * Save next pointer since element may be removed before
674 		 * we get a chance to advance psaddr.
675 		 */
676 		next = psaddr->next;
677 
678 		/*
679 		 * Since mappings have been added to the merged list
680 		 * even if no generic advice was given for the map,
681 		 * check to make sure advice exists before bothering
682 		 * with the for loop.
683 		 */
684 		if (psaddr->adv != NO_ADVICE) {
685 			for (i = MADV_NORMAL; i <= MADV_PURGE; i++) {
686 				if ((psaddr->adv & (1 << i)) &&
687 				    (pr_madvise(Pr, (caddr_t)psaddr->addr,
688 				    psaddr->length, i) < 0)) {
689 					/*
690 					 * madvise(3C) call failed trying to
691 					 * apply advice output error and remove
692 					 * from advice list
693 					 */
694 					(void) fprintf(stderr,
695 					    gettext("Error applying "
696 					    "advice (%s) to memory range "
697 					    "[%lx, %lx):\n"),
698 					    advicestr[i], (ulong_t)psaddr->addr,
699 					    (ulong_t)psaddr->addr +
700 					    psaddr->length);
701 					perror("madvise");
702 					/*
703 					 * Clear this advice from the advice
704 					 * mask. If no more advice is given
705 					 * for this element, remove element
706 					 * from list.
707 					 */
708 					psaddr->adv &= ~(1 << i);
709 					if (psaddr->adv == 0) {
710 						delete_addr(advicelist, psaddr);
711 						break;
712 					}
713 				}
714 			}
715 		}
716 		psaddr = next;
717 	}
718 	return (0);
719 }
720 
721 /*
722  * Set advice but keep mutual exclusive property of advice groupings
723  */
724 static void
set_advice(int * combined_adv,int new_adv)725 set_advice(int *combined_adv, int new_adv) {
726 	/*
727 	 * Since advice falls in 3 groups of mutually exclusive options,
728 	 * clear previous value if new advice overwrites that group.
729 	 */
730 
731 	/*
732 	 * If this is the first advice to be applied, clear invalid value (-1)
733 	 */
734 	if (*combined_adv == -1)
735 		*combined_adv = 0;
736 
737 	if (new_adv & GRP1_ADV)
738 		*combined_adv &= ~GRP1_ADV;
739 	else if (new_adv & GRP2_ADV)
740 		*combined_adv &= ~GRP2_ADV;
741 	else
742 		*combined_adv &= ~GRP3_ADV;
743 
744 	*combined_adv |= new_adv;
745 }
746 
747 /*
748  * Create chopped list from merged list for use with verbose output
749  */
750 static void
create_choplist(saddr_t ** choppedlist,saddr_t * mergedlist)751 create_choplist(saddr_t **choppedlist, saddr_t *mergedlist)
752 {
753 	saddr_t	*mlptr, *clptr;
754 
755 	for (mlptr = mergedlist; mlptr != NULL; mlptr = mlptr->next) {
756 		clptr = malloc(sizeof (saddr_t));
757 		clptr->addr = mlptr->addr;
758 		clptr->length = 0;
759 		/*
760 		 * Initialize the adv to -1 as an indicator for invalid
761 		 * elements in the chopped list (created from gaps between
762 		 * memory maps).
763 		 */
764 		clptr->adv = -1;
765 		clptr->next = NULL;
766 		insert_addr(choppedlist, clptr, NODUPS);
767 
768 		clptr = malloc(sizeof (saddr_t));
769 		clptr->addr = mlptr->addr + mlptr->length;
770 		clptr->length = 0;
771 		/*
772 		 * Again, initialize to -1 as an indicatorfor invalid elements
773 		 */
774 		clptr->adv = -1;
775 		clptr->next = NULL;
776 		insert_addr(choppedlist, clptr, NODUPS);
777 	}
778 
779 	for (clptr = *choppedlist; clptr != NULL; clptr = clptr->next) {
780 		if (clptr->next) {
781 			clptr->length = clptr->next->addr - clptr->addr;
782 		} else {
783 			/*
784 			 * must be last element, now that we've calculated
785 			 * all segment lengths, we can remove this node
786 			 */
787 			delete_addr(choppedlist, clptr);
788 			break;
789 		}
790 	}
791 
792 	for (mlptr = mergedlist; mlptr != NULL; mlptr = mlptr->next) {
793 		for (clptr = *choppedlist; clptr != NULL; clptr = clptr->next) {
794 			if (mlptr->addr <= clptr->addr &&
795 			    mlptr->addr + mlptr->length >=
796 			    clptr->addr + clptr->length)
797 				/*
798 				 * set_advice() will take care of conflicting
799 				 * advice by taking only the last advice
800 				 * applied for each of the 3 groups of advice.
801 				 */
802 				set_advice(&clptr->adv, mlptr->adv);
803 			if (mlptr->addr + mlptr->length <
804 			    clptr->addr)
805 				break;
806 		}
807 	}
808 }
809 
810 /*
811  * Print advice in pmap style for verbose output
812  */
813 static void
print_advice(saddr_t * advlist,mapnode_t * maplist)814 print_advice(saddr_t *advlist, mapnode_t *maplist)
815 {
816 	saddr_t		*psaddr = advlist;
817 	mapnode_t	*pmapnode;
818 	char		*advice;
819 
820 	pmapnode = maplist;
821 
822 	while (psaddr) {
823 		/*
824 		 * Using indicator flag from create_choppedlist, we know
825 		 * which entries in the chopped_list are gaps and should
826 		 * not be printed.
827 		 */
828 		if (psaddr->adv == -1) {
829 			psaddr = psaddr->next;
830 			continue;
831 		}
832 
833 		while (pmapnode && (pmapnode->pmp->pr_vaddr +
834 		    pmapnode->pmp->pr_size <= psaddr->addr))
835 			pmapnode = pmapnode->next;
836 
837 		advice = advtostr(psaddr->adv);
838 
839 		/*
840 		 * Print segment mapping and advice if there is any, or just a
841 		 * segment mapping.
842 		 */
843 		if (strlen(advice) > 0) {
844 			(void) printf("%.*lX %*uK %6s %s\t%s\n",
845 			    addr_width, (ulong_t)psaddr->addr, size_width - 1,
846 			    (int)ROUNDUP_KB(psaddr->length),
847 			    mflags(pmapnode->pmp->pr_mflags), pmapnode->label,
848 			    advice);
849 		} else {
850 			(void) printf("%.*lX %*uK %6s %s\n",
851 			    addr_width, (ulong_t)psaddr->addr, size_width - 1,
852 			    (int)ROUNDUP_KB(psaddr->length),
853 			    mflags(pmapnode->pmp->pr_mflags), pmapnode->label);
854 		}
855 		psaddr = psaddr->next;
856 
857 	}
858 }
859 
860 /*
861  * Call madvise(3c) in the context of the target process
862  */
863 static int
pr_madvise(struct ps_prochandle * Pr,caddr_t addr,size_t len,int advice)864 pr_madvise(struct ps_prochandle *Pr, caddr_t addr, size_t len, int advice)
865 {
866 	return (pr_memcntl(Pr, addr, len, MC_ADVISE,
867 	    (caddr_t)(uintptr_t)advice, 0, 0));
868 }
869 
870 static char *
mflags(uint_t arg)871 mflags(uint_t arg)
872 {
873 	static char code_buf[80];
874 
875 	/*
876 	 * rwxsR
877 	 *
878 	 * r - segment is readable
879 	 * w - segment is writable
880 	 * x - segment is executable
881 	 * s - segment is shared
882 	 * R - segment is mapped MAP_NORESERVE
883 	 *
884 	 */
885 	(void) snprintf(code_buf, sizeof (code_buf), "%c%c%c%c%c ",
886 	    arg & MA_READ ? 'r' : '-',
887 	    arg & MA_WRITE ? 'w' : '-',
888 	    arg & MA_EXEC ? 'x' : '-',
889 	    arg & MA_SHARED ? 's' : '-',
890 	    arg & MA_NORESERVE ? 'R' : '-');
891 
892 	return (code_buf);
893 }
894 
895 /*
896  * Convert advice to a string containing a commented list of applicable advice
897  */
898 static char *
advtostr(int adv)899 advtostr(int adv)
900 {
901 	static char buf[50];
902 	int i;
903 
904 	*buf = '\0';
905 
906 	if (adv != NO_ADVICE) {
907 		for (i = MADV_NORMAL; i <= MADV_PURGE; i++) {
908 			if (adv & (1 << i)) {
909 				/*
910 				 * check if it's the first advice entry
911 				 */
912 				if (*buf == '\0')
913 					(void) snprintf(buf, sizeof (buf) - 1,
914 					    "<= %s", advicestr[i]);
915 				else
916 					(void) snprintf(buf, sizeof (buf) - 1,
917 					    "%s,%s", buf, advicestr[i]);
918 			}
919 		}
920 	}
921 
922 	return (buf);
923 }
924 
925 /*
926  * Handler for catching signals from terminal
927  */
928 /* ARGSUSED */
929 static void
intr(int sig)930 intr(int sig)
931 {
932 	interrupt++;
933 }
934 
935 int
main(int argc,char ** argv)936 main(int argc, char **argv)
937 {
938 	int Fflag = 0;
939 	int rc = 0;
940 	int opt, subopt;
941 	int tmpadv;
942 	char	*options, *value;
943 	saddr_t	*psaddr;
944 	mapnode_t *pmapnode, *tempmapnode;
945 
946 	(void) setlocale(LC_ALL, "");
947 	(void) textdomain(TEXT_DOMAIN);
948 
949 	/*
950 	 * Get name of program for error messages
951 	 */
952 	progname = basename(argv[0]);
953 
954 	/*
955 	 * Not much to do when only name of program given
956 	 */
957 	if (argc == 1)
958 		usage();
959 
960 	/*
961 	 * Catch signals from terminal, so they can be handled asynchronously
962 	 * when we're ready instead of when we're not (;-)
963 	 */
964 	if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
965 		(void) sigset(SIGHUP, intr);
966 	if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
967 		(void) sigset(SIGINT, intr);
968 	if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
969 		(void) sigset(SIGQUIT, intr);
970 	(void) sigset(SIGPIPE, intr);
971 	(void) sigset(SIGTERM, intr);
972 
973 	/*
974 	 * Parse options, record generic advice if any and create
975 	 * rawadv_list from specific address advice.
976 	 */
977 
978 	while ((opt = getopt(argc, argv, "Flo:v")) != EOF) {
979 		switch (opt) {
980 		case 'o':
981 			options = optarg;
982 			while (*options != '\0') {
983 				subopt = getsubopt(&options, suboptstr,
984 				    &value);
985 				switch (subopt) {
986 				case AT_PRIVM:
987 				case AT_HEAP:
988 				case AT_SHARED:
989 				case AT_STACK:
990 					at_map |= (1 << subopt);
991 					tmpadv = get_advice(value);
992 					set_advice(&generic_adv[subopt],
993 					    tmpadv);
994 					break;
995 				default:
996 					at_map |= (1 << AT_SEG);
997 					psaddr = parse_suboptions(value);
998 					if (psaddr == NULL) {
999 						usage();
1000 					} else {
1001 						insert_addr(&rawadv_list,
1002 						    psaddr, YESDUPS);
1003 					}
1004 					break;
1005 				}
1006 			}
1007 			break;
1008 		case 'v':
1009 			opt_verbose = 1;
1010 			break;
1011 		case 'F':		/* force grabbing (no O_EXCL) */
1012 			Fflag = PGRAB_FORCE;
1013 			break;
1014 		case 'l':		/* show unresolved link map names */
1015 			lflag = 1;
1016 			break;
1017 		default:
1018 			usage();
1019 			break;
1020 		}
1021 	}
1022 
1023 	argc -= optind;
1024 	argv += optind;
1025 
1026 	if (argc <= 0) {
1027 		usage();
1028 	}
1029 
1030 	(void) proc_initstdio();
1031 
1032 	/*
1033 	 * Iterate through all pid arguments, create new merged_list, maplist,
1034 	 * (and chopped_list if using verbose output) based on each process'
1035 	 * memory map.
1036 	 */
1037 
1038 	while (!interrupt && argc-- > 0) {
1039 		char *arg;
1040 		int gcode;
1041 		psinfo_t psinfo;
1042 
1043 		(void) proc_flushstdio();
1044 
1045 		if ((Pr = proc_arg_grab(arg = *argv++, PR_ARG_PIDS,
1046 		    PGRAB_RETAIN | Fflag, &gcode)) == NULL) {
1047 			(void) fprintf(stderr,
1048 			    gettext("%s: cannot examine %s: %s\n"),
1049 			    progname, arg, Pgrab_error(gcode));
1050 			rc++;
1051 			continue;
1052 		}
1053 
1054 
1055 		addr_width =
1056 		    (Pstatus(Pr)->pr_dmodel == PR_MODEL_LP64) ? 16 : 8;
1057 		size_width =
1058 		    (Pstatus(Pr)->pr_dmodel == PR_MODEL_LP64) ? 11 : 8;
1059 		(void) memcpy(&psinfo, Ppsinfo(Pr), sizeof (psinfo_t));
1060 
1061 		if (opt_verbose) {
1062 			proc_unctrl_psinfo(&psinfo);
1063 			(void) printf("%d:\t%.70s\n",
1064 			    (int)psinfo.pr_pid, psinfo.pr_psargs);
1065 		}
1066 
1067 		/*
1068 		 * Get mappings for a process unless it is a system process.
1069 		 */
1070 		if (!(Pstatus(Pr)->pr_flags & PR_ISSYS)) {
1071 			nstacks = psinfo.pr_nlwp * 2;
1072 			stacks = calloc(nstacks, sizeof (stacks[0]));
1073 			if (stacks != NULL) {
1074 				int n = 0;
1075 				(void) Plwp_iter(Pr, getstack, &n);
1076 				qsort(stacks, nstacks, sizeof (stacks[0]),
1077 				    cmpstacks);
1078 			}
1079 
1080 			if (Pgetauxval(Pr, AT_BASE) != -1L &&
1081 			    Prd_agent(Pr) == NULL) {
1082 				(void) fprintf(stderr,
1083 				    gettext("%s: warning: "
1084 				    "librtld_db failed to initialize; "
1085 				    "shared library information will not "
1086 				    "be available\n"),
1087 				    progname);
1088 			}
1089 
1090 			/*
1091 			 * Create linked list of mappings for current process
1092 			 * In addition, add generic advice and raw advice
1093 			 * entries to merged_list.
1094 			 * e.g. if rawadv_list contains:
1095 			 *   [0x38000,0x3a000) = adv1
1096 			 *   [0x3a000,0x3c000) = adv2
1097 			 * and there is generic advice:
1098 			 *   heap = adv3
1099 			 * where heap corresponds to 0x38000, then merged_list
1100 			 * will contain:
1101 			 *   ... (include all other mappings from process)
1102 			 *   [0x38000,0x3c000) = adv3
1103 			 *   [0x38000,0x3a000) = adv1
1104 			 *   [0x3a000,0x3c000) = adv2
1105 			 *   ... (include all other mappings from process)
1106 			 */
1107 			assert(merged_list == NULL);
1108 			maplist_head = maplist_tail = NULL;
1109 			rc += Pmapping_iter(Pr, (proc_map_f *)create_maplist,
1110 			    NULL);
1111 
1112 			/*
1113 			 * Apply advice by iterating through merged list
1114 			 */
1115 			(void) apply_advice(&merged_list);
1116 
1117 			if (opt_verbose) {
1118 				assert(chopped_list == NULL);
1119 				/*
1120 				 * Create chopped_list from merged_list
1121 				 */
1122 				create_choplist(&chopped_list, merged_list);
1123 
1124 				/*
1125 				 * Iterate through maplist and output as
1126 				 * given by chopped_list
1127 				 */
1128 				print_advice(chopped_list, maplist_head);
1129 				delete_list(&chopped_list);
1130 			}
1131 
1132 			delete_list(&merged_list);
1133 
1134 			/*
1135 			 * Clear maplist
1136 			 */
1137 			pmapnode = maplist_head;
1138 			while (pmapnode) {
1139 				tempmapnode = pmapnode;
1140 				pmapnode = pmapnode->next;
1141 				free(tempmapnode);
1142 			}
1143 
1144 			if (stacks != NULL) {
1145 				free(stacks);
1146 				stacks = NULL;
1147 			}
1148 		}
1149 
1150 		Prelease(Pr, 0);
1151 	}
1152 
1153 	(void) proc_finistdio();
1154 
1155 	return (rc);
1156 }
1157