xref: /titanic_50/usr/src/cmd/ptools/pmadvise/pmadvise.c (revision 549ec3fff108310966327d1dc9004551b63210b7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * pmadvise
31  *
32  * ptool wrapper for madvise(3C) to apply memory advice to running processes
33  *
34  * usage:	pmadvise -o option[,option] [-v] [-F] pid ...
35  *  (Give "advice" about a process's memory)
36  *  -o option[,option]: options are
37  *      private=<advice>
38  *      shared=<advice>
39  *      heap=<advice>
40  *      stack=<advice>
41  *      <segaddr>[:<length>]=<advice>
42  *     valid <advice> is one of:
43  *      normal, random, sequential, willneed, dontneed,
44  *      free, access_lwp, access_many, access_default
45  *  -v: verbose output
46  *  -F: force grabbing of the target process(es)
47  *  pid: process id list
48  *
49  *
50  * Advice passed to this tool are organized into various lists described here:
51  *  rawadv_list: includes all specific advice from command line (specific
52  *               advice being those given to a particular address range rather
53  *               than a type like "heap" or "stack".  In contrast, these
54  *               types are referred to as generic advice). Duplicates allowed.
55  *               List ordered by addr, then by size (largest size first).
56  *               Created once per run.
57  *  merged_list: includes all specific advice from the rawadv_list as well as
58  *               all generic advice.  This must be recreated for each process
59  *               as the generic advice will apply to different regions for
60  *               different processes. Duplicates allowed. List ordered by addr,
61  *               then by size (largest size first). Created once per pid.
62  *  chopped_list: used for verbose output only. This list parses the merged
63  *                list such that it eliminates any overlap and combines the
64  *                advice. Easiest to think of this visually: if you take all
65  *                the advice in the merged list and lay them down on a memory
66  *                range of the entire process (laying on top of each other when
67  *                necessary), then flatten them into one layer, combining advice
68  *                in the case of overlap, you get the chopped_list of advice.
69  *                Duplicate entries not allowed (since there is no overlap by
70  *                definition in this list).  List ordered by addr. Created once
71  *                per pid.
72  *
73  *                Example:
74  *                   merged_list:   |-----adv1----|---------adv3---------|
75  *                                       |--adv2--|--adv4--|-----adv5----|
76  *                                                  ||
77  *                                                  \/
78  *                   chopped_list:  |adv1|-adv1,2-|-adv3,4-|----adv3,5---|
79  *
80  *  maplist: list of memory mappings for a particular process. Used to create
81  *           generic advice entries for merged_list and for pmap like verbose
82  *           output. Created once per pid.
83  *
84  * Multiple lists are necessary because the actual advice applied given a set
85  * of generic and specific advice changes from process to process, so for each
86  * pid pmadvise is passed, it must create a new merged_list from which to apply
87  * advice (and a new chopped_list if verbose output is requested).
88  *
89  * Pseudo-code:
90  * I.	Input advice from command line
91  * II.	Create [raw advice list] of specific advice
92  * III.	Iterate through PIDs:
93  *	A.	Create [map list]
94  *	B.	Merge generic advice and [raw advice list] into [merged list]
95  *	C.	Apply advice from [merged list]; upon error:
96  *		i.	output madvise error message
97  *		ii.	remove element from [merged list]
98  *	D.	If verbose output:
99  *		i.	Create [chopped list] from [merged list]
100  *		ii.	Iterate through [map list]:
101  *			a.	output advice as given by [merged list]
102  *		iii.	Delete [chopped list]
103  *	E.	Delete [merged list]
104  *	F.	Delete [map list]
105  */
106 
107 #include <stdio.h>
108 #include <stdlib.h>
109 #include <unistd.h>
110 #include <ctype.h>
111 #include <fcntl.h>
112 #include <string.h>
113 #include <dirent.h>
114 #include <limits.h>
115 #include <link.h>
116 #include <libelf.h>
117 #include <locale.h>
118 #include <sys/types.h>
119 #include <sys/mman.h>
120 #include <sys/stat.h>
121 #include <sys/mkdev.h>
122 #include <assert.h>
123 #include <libproc.h>
124 #include <libgen.h>
125 #include <signal.h>
126 
127 #ifndef	TEXT_DOMAIN			/* should be defined by cc -D */
128 #define	TEXT_DOMAIN	"SYS_TEST"	/* use this only if it wasn't */
129 #endif
130 
131 #define	KILOBYTE	1024
132 
133 /*
134  * Round up the value to the nearest kilobyte
135  */
136 #define	ROUNDUP_KB(x)	(((x) + (KILOBYTE - 1)) / KILOBYTE)
137 
138 #define	NO_ADVICE		0
139 
140 /*
141  * The following definitions are used as the third argument in insert_addr()
142  *   NODUPS = no duplicates are not allowed, thus if the addr being inserted
143  *   already exists in the list, return without inserting again.
144  *
145  *   YESDUPS = yes duplicates are allowed, thus always insert the addr
146  *   regardless of whether it already exists in the list or not.
147  */
148 #define	NODUPS	1
149 #define	YESDUPS	0
150 
151 /*
152  * Advice that can be passed to madvise fit into three groups that each
153  * contain 3 mutually exclusive options.  These groups are defined below:
154  *   Group 1: normal, random, sequential
155  *   Group 2: willneed, dontneed, free
156  *   Group 3: default, accesslwp, accessmany
157  * Thus, advice that includes (at most) one from each group is valid.
158  *
159  * The following #define's are used as masks to determine which group(s) a
160  * particular advice fall under.
161  */
162 
163 #define	GRP1_ADV	(1 << MADV_NORMAL | 1 << MADV_RANDOM | \
164 			1 << MADV_SEQUENTIAL)
165 #define	GRP2_ADV	(1 << MADV_WILLNEED | 1 << MADV_DONTNEED | \
166 			1 << MADV_FREE)
167 #define	GRP3_ADV	(1 << MADV_ACCESS_DEFAULT | 1 << MADV_ACCESS_LWP | \
168 			1 << MADV_ACCESS_MANY)
169 
170 static	int	create_maplist(void *, const prmap_t *, const char *);
171 static	int	pr_madvise(struct ps_prochandle *, caddr_t, size_t, int);
172 
173 static	char	*mflags(uint_t);
174 static	char	*advtostr(int);
175 
176 static	int	addr_width, size_width;
177 static	char	*progname;
178 static	struct ps_prochandle *Pr;
179 
180 typedef struct lwpstack {
181 	lwpid_t	lwps_lwpid;
182 	stack_t	lwps_stack;
183 } lwpstack_t;
184 
185 static	lwpstack_t *stacks;
186 static	uint_t	nstacks;
187 
188 /*
189  * Used to set the advice type var (at_map) when parsing the arguments to
190  * pmadvise.  Later, when creating the map list, at_map is used as a mask
191  * to determine if any generic advice applies to each memory mapping.
192  */
193 enum	atype_enum {
194 	AT_PRIVM,
195 	AT_SHARED,
196 	AT_HEAP,
197 	AT_STACK,
198 	AT_SEG,
199 	AT_NTYPES
200 };
201 
202 static char	*suboptstr[] = {
203 	"private",
204 	"shared",
205 	"heap",
206 	"stack",
207 	NULL
208 };
209 
210 
211 int	generic_adv[] = {NO_ADVICE, NO_ADVICE, NO_ADVICE, NO_ADVICE};
212 int	at_map = 0;
213 
214 typedef struct saddr_struct {
215 	uintptr_t	addr;
216 	size_t		length;
217 	int		adv;
218 	struct saddr_struct	*next;
219 } saddr_t;
220 static int	apply_advice(saddr_t **);
221 static void	set_advice(int *, int);
222 static void	create_choplist(saddr_t **, saddr_t *);
223 
224 /*
225  * The segment address advice from the command line
226  */
227 saddr_t	*rawadv_list = NULL;
228 /*
229  * The rawadv_list + list entries for the generic advice (if any).
230  * This must be recreated for each PID as the memory maps might be different.
231  */
232 saddr_t *merged_list = NULL;
233 /*
234  * The merged_list cut up so as to remove all overlap
235  * e.g. if merged_list contained two entries:
236  *
237  * [0x38000:0x3e000) = adv1
238  * [0x3a000:0x3c000) = adv2
239  *
240  * the chopped list will contain three entries:
241  *
242  * [0x38000:0x3a000) = adv1
243  * [0x3a000:0x3c000) = adv1,adv2
244  * [0x3c000:0x3e000) = adv1
245  *
246  */
247 saddr_t *chopped_list = NULL;
248 
249 typedef struct mapnode_struct {
250 	prmap_t			*pmp;
251 	char			label[PATH_MAX];
252 	int			mtypes;
253 	struct mapnode_struct	*next;
254 } mapnode_t;
255 
256 mapnode_t *maplist_head = NULL;
257 mapnode_t *maplist_tail = NULL;
258 static void	print_advice(saddr_t *, mapnode_t *);
259 
260 int	opt_verbose;
261 
262 static char	*advicestr[] = {
263 	"normal",
264 	"random",
265 	"sequential",
266 	"willneed",
267 	"dontneed",
268 	"free",
269 	"access_default",
270 	"access_lwp",
271 	"access_many"
272 };
273 
274 /*
275  * How many signals caught from terminal
276  * We bail out as soon as possible when interrupt is set
277  */
278 static int	interrupt = 0;
279 
280 /*
281  * Interrupt handler
282  */
283 static void	intr(int);
284 
285 /*
286  * Iterative function passed to Plwp_iter to
287  * get alt and main stacks for given lwp.
288  */
289 static int
290 getstack(void *data, const lwpstatus_t *lsp)
291 {
292 	int *np = (int *)data;
293 
294 	if (Plwp_alt_stack(Pr, lsp->pr_lwpid, &stacks[*np].lwps_stack) == 0) {
295 		stacks[*np].lwps_stack.ss_flags |= SS_ONSTACK;
296 		stacks[*np].lwps_lwpid = lsp->pr_lwpid;
297 		(*np)++;
298 	}
299 
300 	if (Plwp_main_stack(Pr, lsp->pr_lwpid, &stacks[*np].lwps_stack) == 0) {
301 		stacks[*np].lwps_lwpid = lsp->pr_lwpid;
302 		(*np)++;
303 	}
304 
305 	return (0);
306 }
307 
308 /*
309  * We compare the high memory addresses since stacks are faulted in from
310  * high memory addresses to low memory addresses, and our prmap_t
311  * structures identify only the range of addresses that have been faulted
312  * in so far.
313  */
314 static int
315 cmpstacks(const void *ap, const void *bp)
316 {
317 	const lwpstack_t *as = ap;
318 	const lwpstack_t *bs = bp;
319 	uintptr_t a = (uintptr_t)as->lwps_stack.ss_sp + as->lwps_stack.ss_size;
320 	uintptr_t b = (uintptr_t)bs->lwps_stack.ss_sp + bs->lwps_stack.ss_size;
321 
322 	if (a < b)
323 		return (1);
324 	if (a > b)
325 		return (-1);
326 	return (0);
327 }
328 
329 /*
330  * Prints usage and exits
331  */
332 static void
333 usage()
334 {
335 	(void) fprintf(stderr,
336 	    gettext("usage:\t%s -o option[,option] [-v] [-F] pid ...\n"),
337 	    progname);
338 	(void) fprintf(stderr,
339 	    gettext("    (Give \"advice\" about a process's memory)\n"
340 		"    -o option[,option]: options are\n"
341 		"        private=<advice>\n"
342 		"        shared=<advice>\n"
343 		"        heap=<advice>\n"
344 		"        stack=<advice>\n"
345 		"        <segaddr>[:<length>]=<advice>\n"
346 		"       valid <advice> is one of:\n"
347 		"        normal, random, sequential, willneed, dontneed,\n"
348 		"        free, access_lwp, access_many, access_default\n"
349 		"    -v: verbose output\n"
350 		"    -F: force grabbing of the target process(es)\n"
351 		"    pid: process id list\n"));
352 	exit(2);
353 }
354 
355 /*
356  * Function to parse advice from options string
357  */
358 static int
359 get_advice(char *optarg)
360 {
361 	/*
362 	 * Determine which advice is given, we use shifted values as
363 	 * multiple pieces of advice may apply for a particular region.
364 	 * (See comment above regarding GRP[1,2,3]_ADV definitions for
365 	 * breakdown of advice groups).
366 	 */
367 	if (strcmp(optarg, "access_default") == 0)
368 		return (1 << MADV_ACCESS_DEFAULT);
369 	else if (strcmp(optarg, "access_many") == 0)
370 		return (1 << MADV_ACCESS_MANY);
371 	else if (strcmp(optarg, "access_lwp") == 0)
372 		return (1 << MADV_ACCESS_LWP);
373 	else if (strcmp(optarg, "sequential") == 0)
374 		return (1 << MADV_SEQUENTIAL);
375 	else if (strcmp(optarg, "willneed") == 0)
376 		return (1 << MADV_WILLNEED);
377 	else if (strcmp(optarg, "dontneed") == 0)
378 		return (1 << MADV_DONTNEED);
379 	else if (strcmp(optarg, "random") == 0)
380 		return (1 << MADV_RANDOM);
381 	else if (strcmp(optarg, "normal") == 0)
382 		return (1 << MADV_NORMAL);
383 	else if (strcmp(optarg, "free") == 0)
384 		return (1 << MADV_FREE);
385 	else {
386 		(void) fprintf(stderr, gettext("%s: invalid advice: %s\n"),
387 		    progname, optarg);
388 		usage();
389 		return (-1);
390 	}
391 }
392 
393 /*
394  * Function to convert character size indicators into actual size
395  * (i.e., 123M => sz = 123 * 1024 * 1024)
396  */
397 static size_t
398 atosz(char *optarg, char **endptr)
399 {
400 	size_t	sz = 0;
401 
402 	if (optarg == NULL || optarg[0] == '\0')
403 		return (0);
404 
405 	sz = strtoll(optarg, endptr, 0);
406 
407 	switch (**endptr) {
408 	case 'E':
409 	case 'e':
410 		sz *= KILOBYTE;
411 		/* FALLTHRU */
412 	case 'P':
413 	case 'p':
414 		sz *= KILOBYTE;
415 		/* FALLTHRU */
416 	case 'T':
417 	case 't':
418 		sz *= KILOBYTE;
419 		/* FALLTHRU */
420 	case 'G':
421 	case 'g':
422 		sz *= KILOBYTE;
423 		/* FALLTHRU */
424 	case 'M':
425 	case 'm':
426 		sz *= KILOBYTE;
427 		/* FALLTHRU */
428 	case 'K':
429 	case 'k':
430 		sz *= KILOBYTE;
431 		/* FALLTHRU */
432 	case 'B':
433 	case 'b':
434 		(*endptr)++;
435 		/* FALLTHRU */
436 	default:
437 		break;
438 	}
439 	return (sz);
440 }
441 
442 /*
443  * Inserts newaddr into list.  dups indicates whether we allow duplicate
444  * addr entries in the list (valid values are NODUPS and YESDUPS).
445  */
446 static void
447 insert_addr(saddr_t **list, saddr_t *newaddr, int dups)
448 {
449 	saddr_t *prev = *list;
450 	saddr_t *psaddr;
451 
452 	if (*list == NULL) {
453 		newaddr->next = *list;
454 		*list = newaddr;
455 		return;
456 	}
457 
458 	for (psaddr = (*list)->next; psaddr != NULL; psaddr = psaddr->next) {
459 		if ((dups == NODUPS) && (psaddr->addr == newaddr->addr)) {
460 			free(newaddr);
461 			return;
462 		}
463 
464 		/*
465 		 * primary level of comparison is by address; smaller addr 1st
466 		 * secondary level of comparison is by length; bigger length 1st
467 		 */
468 		if ((psaddr->addr > newaddr->addr) ||
469 		    (psaddr->addr == newaddr->addr &&
470 		    psaddr->length < newaddr->length))
471 			break;
472 
473 		prev = psaddr;
474 	}
475 
476 	prev->next = newaddr;
477 	newaddr->next = psaddr;
478 }
479 
480 /*
481  * Deletes given element from list
482  */
483 static void
484 delete_addr(saddr_t **list, saddr_t *delme)
485 {
486 	saddr_t	*prev = *list;
487 
488 	if (delme == *list) {
489 		*list = delme->next;
490 		free(delme);
491 		return;
492 	}
493 
494 	while (prev != NULL && prev->next != delme) {
495 		prev = prev->next;
496 	}
497 
498 	if (prev) {
499 		prev->next = delme->next;
500 		free(delme);
501 	}
502 }
503 
504 /*
505  * Delete entire list
506  */
507 static void
508 delete_list(saddr_t **list)
509 {
510 	saddr_t *psaddr = *list;
511 
512 	while (psaddr != NULL) {
513 		saddr_t *temp = psaddr;
514 
515 		psaddr = psaddr->next;
516 		free(temp);
517 	}
518 	*list = NULL;
519 }
520 
521 static saddr_t *
522 parse_suboptions(char *value)
523 {
524 	char	*endptr;
525 	saddr_t *psaddr = malloc(sizeof (saddr_t));
526 
527 	/*
528 	 * This must (better) be a segment addr
529 	 */
530 	psaddr->addr =
531 	    strtoull(value, &endptr, 16);
532 
533 	/*
534 	 * Check to make sure strtoul worked correctly (a properly formatted
535 	 * string will terminate in a ':' (if size is given) or an '=' (if size
536 	 * is not specified). Also check to make sure a 0 addr wasn't returned
537 	 * indicating strtoll was unable to convert).
538 	 */
539 	if ((psaddr->addr == 0) || (*endptr != ':' && *endptr != '=')) {
540 		free(psaddr);
541 		(void) fprintf(stderr,
542 		    gettext("%s: invalid option %s\n"),
543 		    progname, value);
544 		usage();
545 	} else {
546 		/* init other fields */
547 		psaddr->length = 0;
548 		psaddr->adv = NO_ADVICE;
549 		psaddr->next = NULL;
550 
551 		/* skip past address */
552 		value = endptr;
553 
554 		/* check for length */
555 		if (*value == ':') {
556 			/* skip the ":" */
557 			value++;
558 			psaddr->length = atosz(value, &endptr);
559 		}
560 
561 		if (*endptr != '=') {
562 			(void) fprintf(stderr,
563 			    gettext("%s: invalid option %s\n"),
564 			    progname, value);
565 			/*
566 			 * if improperly formatted, free mem, print usage, and
567 			 * exit Note: usage ends with a call to exit()
568 			 */
569 			free(psaddr);
570 			usage();
571 		}
572 		/* skip the "=" */
573 		value = endptr + 1;
574 		at_map |= (1 << AT_SEG);
575 		psaddr->adv =
576 		    get_advice(value);
577 	}
578 
579 	return (psaddr);
580 }
581 
582 /*
583  * Create labels for non-anon, non-heap mappings
584  */
585 static char *
586 make_name(struct ps_prochandle *Pr, uintptr_t addr, const char *mapname,
587 	char *buf, size_t bufsz)
588 {
589 	const pstatus_t *Psp = Pstatus(Pr);
590 	char fname[100];
591 	struct stat statb;
592 	int len;
593 
594 	if (strcmp(mapname, "a.out") == 0 &&
595 	    Pexecname(Pr, buf, bufsz) != NULL)
596 		return (buf);
597 
598 	if (Pobjname(Pr, addr, buf, bufsz) != NULL) {
599 		if ((len = resolvepath(buf, buf, bufsz)) > 0) {
600 			buf[len] = '\0';
601 			return (buf);
602 		}
603 	}
604 
605 	if (*mapname != '\0') {
606 		(void) snprintf(fname, sizeof (fname), "/proc/%d/object/%s",
607 			(int)Psp->pr_pid, mapname);
608 		if (stat(fname, &statb) == 0) {
609 			dev_t dev = statb.st_dev;
610 			ino_t ino = statb.st_ino;
611 			(void) snprintf(buf, bufsz, "dev:%lu,%lu ino:%lu",
612 				(ulong_t)major(dev), (ulong_t)minor(dev), ino);
613 			return (buf);
614 		}
615 	}
616 
617 	return (NULL);
618 }
619 
620 /*
621  * Create label for anon mappings
622  */
623 static char *
624 anon_name(char *name, const pstatus_t *Psp,
625     uintptr_t vaddr, size_t size, int mflags, int shmid, int *mtypes)
626 {
627 	if (mflags & MA_ISM) {
628 		if (shmid == -1)
629 			(void) snprintf(name, PATH_MAX, "  [ %s shmid=null ]",
630 			    (mflags & MA_NORESERVE) ? "ism" : "dism");
631 		else
632 			(void) snprintf(name, PATH_MAX, "  [ %s shmid=0x%x ]",
633 			    (mflags & MA_NORESERVE) ? "ism" : "dism", shmid);
634 		*mtypes |= (1 << AT_SHARED);
635 	} else if (mflags & MA_SHM) {
636 		if (shmid == -1)
637 			(void) sprintf(name, "  [ shmid=null ]");
638 		else
639 			(void) sprintf(name, "  [ shmid=0x%x ]", shmid);
640 		*mtypes |= (1 << AT_SHARED);
641 
642 	} else if (vaddr + size > Psp->pr_stkbase &&
643 	    vaddr < Psp->pr_stkbase + Psp->pr_stksize) {
644 		(void) strcpy(name, "  [ stack ]");
645 		*mtypes |= (1 << AT_STACK);
646 
647 	} else if ((mflags & MA_ANON) &&
648 	    vaddr + size > Psp->pr_brkbase &&
649 	    vaddr < Psp->pr_brkbase + Psp->pr_brksize) {
650 		(void) strcpy(name, "  [ heap ]");
651 		*mtypes |= (1 << AT_HEAP);
652 
653 	} else {
654 		lwpstack_t key, *stk;
655 
656 		key.lwps_stack.ss_sp = (void *)vaddr;
657 		key.lwps_stack.ss_size = size;
658 		if (nstacks > 0 &&
659 		    (stk = bsearch(&key, stacks, nstacks, sizeof (stacks[0]),
660 		    cmpstacks)) != NULL) {
661 			(void) snprintf(name, PATH_MAX, "  [ %s tid=%d ]",
662 			    (stk->lwps_stack.ss_flags & SS_ONSTACK) ?
663 			    "altstack" : "stack",
664 			    stk->lwps_lwpid);
665 			*mtypes |= (1 << AT_STACK);
666 		} else {
667 			(void) strcpy(name, "  [ anon ]");
668 			*mtypes |= (1 << AT_PRIVM);
669 		}
670 	}
671 
672 	return (name);
673 }
674 
675 /*
676  * Create linked list of mappings for current process
677  * In addition, add generic advice and raw advice
678  * entries to merged_list.
679  */
680 /* ARGSUSED */
681 static int
682 create_maplist(void *arg, const prmap_t *pmp, const char *object_name)
683 {
684 	const 		pstatus_t *Psp = Pstatus(Pr);
685 	mapnode_t *newmap = malloc(sizeof (mapnode_t));
686 	saddr_t	*newaddr;
687 	saddr_t	*psaddr;
688 	char	*lname = NULL;
689 	int	i;
690 
691 	if (interrupt)
692 		return (0);
693 
694 	newmap->pmp = malloc(sizeof (prmap_t));
695 	newmap->label[0] = '\0';
696 	newmap->mtypes = 0;
697 	newmap->next = NULL;
698 	(void) memcpy(newmap->pmp, pmp, sizeof (prmap_t));
699 
700 	/*
701 	 * If the mapping is not anon or not part of the heap, make a name
702 	 * for it.  We don't want to report the heap as a.out's data.
703 	 */
704 	if (!(pmp->pr_mflags & MA_ANON) ||
705 	    (pmp->pr_vaddr + pmp->pr_size <= Psp->pr_brkbase ||
706 	    pmp->pr_vaddr >= Psp->pr_brkbase + Psp->pr_brksize)) {
707 		lname = make_name(Pr, pmp->pr_vaddr, pmp->pr_mapname,
708 		    newmap->label, sizeof (newmap->label));
709 		if (pmp->pr_mflags & MA_SHARED)
710 			newmap->mtypes |= 1 << AT_SHARED;
711 		else
712 			newmap->mtypes |= 1 << AT_PRIVM;
713 	}
714 
715 	if (lname == NULL && (pmp->pr_mflags & MA_ANON)) {
716 		lname = anon_name(newmap->label, Psp, pmp->pr_vaddr,
717 		    pmp->pr_size, pmp->pr_mflags, pmp->pr_shmid,
718 		    &newmap->mtypes);
719 	}
720 
721 	/*
722 	 * Add raw advice that applies to this mapping to the merged_list
723 	 */
724 	psaddr = rawadv_list;
725 	/*
726 	 * Advance to point in rawadv_list that applies to this mapping
727 	 */
728 	while (psaddr && psaddr->addr < pmp->pr_vaddr)
729 		psaddr = psaddr->next;
730 	/*
731 	 * Copy over to merged_list, check to see if size needs to be filled in
732 	 */
733 	while (psaddr && psaddr->addr < (pmp->pr_vaddr + pmp->pr_size)) {
734 		newaddr = malloc(sizeof (saddr_t));
735 		(void) memcpy(newaddr, psaddr, sizeof (saddr_t));
736 		insert_addr(&merged_list, newaddr, YESDUPS);
737 		/*
738 		 * For raw advice that is given without size, try to default
739 		 * size to size of mapping (only allowed if raw adv addr is
740 		 * equal to beginning of mapping). Don't change the entry
741 		 * in rawadv_list, only in the merged_list as the mappings
742 		 * (and thus the default sizes) will be different for
743 		 * different processes.
744 		 */
745 		if ((pmp->pr_vaddr == psaddr->addr) && (psaddr->length == 0))
746 			newaddr->length = pmp->pr_size;
747 		psaddr = psaddr->next;
748 	}
749 
750 	/*
751 	 * Put mapping into merged list with no advice, then
752 	 * check to see if any generic advice applies.
753 	 */
754 	newaddr = malloc(sizeof (saddr_t));
755 	newaddr->addr = pmp->pr_vaddr;
756 	newaddr->length = pmp->pr_size;
757 	newaddr->adv = NO_ADVICE;
758 	insert_addr(&merged_list, newaddr, YESDUPS);
759 
760 	newmap->mtypes &= at_map;
761 	for (i = AT_STACK; i >= AT_PRIVM; i--) {
762 		if (newmap->mtypes & (1 << i)) {
763 			assert(generic_adv[i] != NO_ADVICE);
764 			newaddr->adv = generic_adv[i];
765 			break;
766 		}
767 	}
768 
769 	/*
770 	 * Add to linked list of mappings
771 	 */
772 	if (maplist_tail == NULL) {
773 		maplist_head = maplist_tail = newmap;
774 	} else {
775 		maplist_tail->next = newmap;
776 		maplist_tail = newmap;
777 	}
778 
779 
780 	return (0);
781 }
782 
783 /*
784  * Traverse advice list and apply all applicable advice to each region
785  */
786 static int
787 apply_advice(saddr_t **advicelist)
788 {
789 	saddr_t	*psaddr = *advicelist;
790 	saddr_t	*next;
791 	int	i;
792 
793 
794 	while (!interrupt && psaddr != NULL) {
795 		/*
796 		 * Save next pointer since element may be removed before
797 		 * we get a chance to advance psaddr.
798 		 */
799 		next = psaddr->next;
800 
801 		/*
802 		 * Since mappings have been added to the merged list
803 		 * even if no generic advice was given for the map,
804 		 * check to make sure advice exists before bothering
805 		 * with the for loop.
806 		 */
807 		if (psaddr->adv != NO_ADVICE) {
808 			for (i = MADV_NORMAL; i <= MADV_ACCESS_MANY; i++) {
809 				if ((psaddr->adv & (1 << i)) &&
810 				    (pr_madvise(Pr, (caddr_t)psaddr->addr,
811 				    psaddr->length, i) < 0)) {
812 					/*
813 					 * madvise(3C) call failed trying to
814 					 * apply advice output error and remove
815 					 * from advice list
816 					 */
817 					(void) fprintf(stderr,
818 					    gettext("Error applying "
819 						"advice (%s) to memory range "
820 						"[%lx, %lx):\n"),
821 					    advicestr[i], (ulong_t)psaddr->addr,
822 					    (ulong_t)psaddr->addr +
823 					    psaddr->length);
824 					perror("madvise");
825 					/*
826 					 * Clear this advice from the advice
827 					 * mask. If no more advice is given
828 					 * for this element, remove element
829 					 * from list.
830 					 */
831 					psaddr->adv &= ~(1 << i);
832 					if (psaddr->adv == 0) {
833 						delete_addr(advicelist, psaddr);
834 						break;
835 					}
836 				}
837 			}
838 		}
839 		psaddr = next;
840 	}
841 	return (0);
842 }
843 
844 /*
845  * Set advice but keep mutual exclusive property of advice groupings
846  */
847 static void
848 set_advice(int *combined_adv, int new_adv) {
849 	/*
850 	 * Since advice falls in 3 groups of mutually exclusive options,
851 	 * clear previous value if new advice overwrites that group.
852 	 */
853 
854 	/*
855 	 * If this is the first advice to be applied, clear invalid value (-1)
856 	 */
857 	if (*combined_adv == -1)
858 		*combined_adv = 0;
859 
860 	if (new_adv & GRP1_ADV)
861 		*combined_adv &= ~GRP1_ADV;
862 	else if (new_adv & GRP2_ADV)
863 		*combined_adv &= ~GRP2_ADV;
864 	else
865 		*combined_adv &= ~GRP3_ADV;
866 
867 	*combined_adv |= new_adv;
868 }
869 
870 /*
871  * Create chopped list from merged list for use with verbose output
872  */
873 static void
874 create_choplist(saddr_t **choppedlist, saddr_t *mergedlist)
875 {
876 	saddr_t	*mlptr, *clptr;
877 
878 	for (mlptr = mergedlist; mlptr != NULL; mlptr = mlptr->next) {
879 		clptr = malloc(sizeof (saddr_t));
880 		clptr->addr = mlptr->addr;
881 		clptr->length = 0;
882 		/*
883 		 * Initialize the adv to -1 as an indicator for invalid
884 		 * elements in the chopped list (created from gaps between
885 		 * memory maps).
886 		 */
887 		clptr->adv = -1;
888 		clptr->next = NULL;
889 		insert_addr(choppedlist, clptr, NODUPS);
890 
891 		clptr = malloc(sizeof (saddr_t));
892 		clptr->addr = mlptr->addr + mlptr->length;
893 		clptr->length = 0;
894 		/*
895 		 * Again, initialize to -1 as an indicatorfor invalid elements
896 		 */
897 		clptr->adv = -1;
898 		clptr->next = NULL;
899 		insert_addr(choppedlist, clptr, NODUPS);
900 	}
901 
902 	for (clptr = *choppedlist; clptr != NULL; clptr = clptr->next) {
903 		if (clptr->next)
904 			clptr->length = clptr->next->addr - clptr->addr;
905 		else {
906 			/*
907 			 * must be last element, now that we've calculated
908 			 * all segment lengths, we can remove this node
909 			 */
910 			delete_addr(choppedlist, clptr);
911 		}
912 	}
913 
914 	for (mlptr = mergedlist; mlptr != NULL; mlptr = mlptr->next) {
915 		for (clptr = *choppedlist; clptr != NULL; clptr = clptr->next) {
916 			if (mlptr->addr <= clptr->addr &&
917 			    mlptr->addr + mlptr->length >=
918 			    clptr->addr + clptr->length)
919 				/*
920 				 * set_advice() will take care of conflicting
921 				 * advice by taking only the last advice
922 				 * applied for each of the 3 groups of advice.
923 				 */
924 				set_advice(&clptr->adv, mlptr->adv);
925 			if (mlptr->addr + mlptr->length <
926 			    clptr->addr)
927 				break;
928 		}
929 	}
930 }
931 
932 /*
933  * Print advice in pmap style for verbose output
934  */
935 static void
936 print_advice(saddr_t *advlist, mapnode_t *maplist)
937 {
938 	saddr_t		*psaddr = advlist;
939 	mapnode_t	*pmapnode;
940 	char		*advice;
941 
942 	pmapnode = maplist;
943 
944 	while (psaddr) {
945 		/*
946 		 * Using indicator flag from create_choppedlist, we know
947 		 * which entries in the chopped_list are gaps and should
948 		 * not be printed.
949 		 */
950 		if (psaddr->adv == -1) {
951 			psaddr = psaddr->next;
952 			continue;
953 		}
954 
955 		while (pmapnode && (pmapnode->pmp->pr_vaddr +
956 		    pmapnode->pmp->pr_size <= psaddr->addr))
957 			pmapnode = pmapnode->next;
958 
959 		advice = advtostr(psaddr->adv);
960 
961 		/*
962 		 * Print segment mapping and advice if there is any, or just a
963 		 * segment mapping.
964 		 */
965 		if (strlen(advice) > 0) {
966 			(void) printf("%.*lX %*uK %6s %s\t%s\n",
967 			    addr_width, (ulong_t)psaddr->addr, size_width - 1,
968 			    (int)ROUNDUP_KB(psaddr->length),
969 			    mflags(pmapnode->pmp->pr_mflags), pmapnode->label,
970 			    advice);
971 		} else {
972 			(void) printf("%.*lX %*uK %6s %s\n",
973 			    addr_width, (ulong_t)psaddr->addr, size_width - 1,
974 			    (int)ROUNDUP_KB(psaddr->length),
975 			    mflags(pmapnode->pmp->pr_mflags), pmapnode->label);
976 		}
977 		psaddr = psaddr->next;
978 
979 	}
980 }
981 
982 /*
983  * Call madvise(3c) in the context of the target process
984  */
985 static int
986 pr_madvise(struct ps_prochandle *Pr, caddr_t addr, size_t len, int advice)
987 {
988 	return (pr_memcntl(Pr, addr, len, MC_ADVISE,
989 		    (caddr_t)(uintptr_t)advice, 0, 0));
990 }
991 
992 static char *
993 mflags(uint_t arg)
994 {
995 	static char code_buf[80];
996 
997 	/*
998 	 * rwxsR
999 	 *
1000 	 * r - segment is readable
1001 	 * w - segment is writable
1002 	 * x - segment is executable
1003 	 * s - segment is shared
1004 	 * R - segment is mapped MAP_NORESERVE
1005 	 *
1006 	 */
1007 	(void) snprintf(code_buf, sizeof (code_buf), "%c%c%c%c%c ",
1008 	    arg & MA_READ ? 'r' : '-',
1009 	    arg & MA_WRITE ? 'w' : '-',
1010 	    arg & MA_EXEC ? 'x' : '-',
1011 	    arg & MA_SHARED ? 's' : '-',
1012 	    arg & MA_NORESERVE ? 'R' : '-');
1013 
1014 	return (code_buf);
1015 }
1016 
1017 /*
1018  * Convert advice to a string containing a commented list of applicable advice
1019  */
1020 static char *
1021 advtostr(int adv)
1022 {
1023 	static char buf[50];
1024 	int i;
1025 
1026 	*buf = '\0';
1027 
1028 	if (adv != NO_ADVICE) {
1029 		for (i = MADV_NORMAL; i <= MADV_ACCESS_MANY; i++) {
1030 			if (adv & (1 << i)) {
1031 				/*
1032 				 * check if it's the first advice entry
1033 				 */
1034 				if (*buf == '\0')
1035 					(void) snprintf(buf, sizeof (buf) - 1,
1036 					    "<= %s", advicestr[i]);
1037 				else
1038 					(void) snprintf(buf, sizeof (buf) - 1,
1039 					    "%s,%s", buf, advicestr[i]);
1040 			}
1041 		}
1042 	}
1043 
1044 	return (buf);
1045 }
1046 
1047 /*
1048  * Handler for catching signals from terminal
1049  */
1050 /* ARGSUSED */
1051 static void
1052 intr(int sig)
1053 {
1054 	interrupt++;
1055 }
1056 
1057 int
1058 main(int argc, char **argv)
1059 {
1060 	int Fflag = 0;
1061 	int rc = 0;
1062 	int opt, subopt;
1063 	int tmpadv;
1064 	char	*options, *value;
1065 	saddr_t	*psaddr;
1066 	mapnode_t *pmapnode, *tempmapnode;
1067 
1068 	(void) setlocale(LC_ALL, "");
1069 	(void) textdomain(TEXT_DOMAIN);
1070 
1071 	/*
1072 	 * Get name of program for error messages
1073 	 */
1074 	progname = basename(argv[0]);
1075 
1076 	/*
1077 	 * Not much to do when only name of program given
1078 	 */
1079 	if (argc == 1)
1080 		usage();
1081 
1082 	/*
1083 	 * Catch signals from terminal, so they can be handled asynchronously
1084 	 * when we're ready instead of when we're not (;-)
1085 	 */
1086 	if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
1087 		(void) sigset(SIGHUP, intr);
1088 	if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
1089 		(void) sigset(SIGINT, intr);
1090 	if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
1091 		(void) sigset(SIGQUIT, intr);
1092 	(void) sigset(SIGPIPE, intr);
1093 	(void) sigset(SIGTERM, intr);
1094 
1095 	/*
1096 	 * Parse options, record generic advice if any and create
1097 	 * rawadv_list from specific address advice.
1098 	 */
1099 
1100 	while ((opt = getopt(argc, argv, "Fo:v")) != EOF) {
1101 		switch (opt) {
1102 		case 'o':
1103 			options = optarg;
1104 			while (*options != '\0') {
1105 				subopt = getsubopt(&options, suboptstr,
1106 				    &value);
1107 				switch (subopt) {
1108 				case AT_PRIVM:
1109 				case AT_HEAP:
1110 				case AT_SHARED:
1111 				case AT_STACK:
1112 					at_map |= (1 << subopt);
1113 					tmpadv = get_advice(value);
1114 					set_advice(&generic_adv[subopt],
1115 					    tmpadv);
1116 					break;
1117 				default:
1118 					at_map |= (1 << AT_SEG);
1119 					psaddr = parse_suboptions(value);
1120 					if (psaddr == NULL) {
1121 						usage();
1122 					} else {
1123 						insert_addr(&rawadv_list,
1124 						    psaddr, YESDUPS);
1125 					}
1126 					break;
1127 				}
1128 			}
1129 			break;
1130 		case 'v':
1131 			opt_verbose = 1;
1132 			break;
1133 		case 'F':		/* force grabbing (no O_EXCL) */
1134 			Fflag = PGRAB_FORCE;
1135 			break;
1136 		default:
1137 			usage();
1138 			break;
1139 		}
1140 	}
1141 
1142 	argc -= optind;
1143 	argv += optind;
1144 
1145 	if (argc <= 0) {
1146 		usage();
1147 	}
1148 
1149 	/*
1150 	 * Iterate through all pid arguments, create new merged_list, maplist,
1151 	 * (and chopped_list if using verbose output) based on each process'
1152 	 * memory map.
1153 	 */
1154 
1155 	while (!interrupt && argc-- > 0) {
1156 		char *arg;
1157 		int gcode;
1158 		psinfo_t psinfo;
1159 
1160 		if ((Pr = proc_arg_grab(arg = *argv++, PR_ARG_PIDS,
1161 		    PGRAB_RETAIN | Fflag, &gcode)) == NULL) {
1162 			(void) fprintf(stderr,
1163 			    gettext("%s: cannot examine %s: %s\n"),
1164 			    progname, arg, Pgrab_error(gcode));
1165 			rc++;
1166 			continue;
1167 		}
1168 
1169 
1170 		addr_width =
1171 		    (Pstatus(Pr)->pr_dmodel == PR_MODEL_LP64) ? 16 : 8;
1172 		size_width =
1173 		    (Pstatus(Pr)->pr_dmodel == PR_MODEL_LP64) ? 11 : 8;
1174 		(void) memcpy(&psinfo, Ppsinfo(Pr), sizeof (psinfo_t));
1175 
1176 		if (opt_verbose) {
1177 			proc_unctrl_psinfo(&psinfo);
1178 			(void) printf("%d:\t%.70s\n",
1179 			    (int)psinfo.pr_pid, psinfo.pr_psargs);
1180 		}
1181 
1182 		/*
1183 		 * Get mappings for a process unless it is a system process.
1184 		 */
1185 		if (!(Pstatus(Pr)->pr_flags & PR_ISSYS)) {
1186 			nstacks = psinfo.pr_nlwp * 2;
1187 			stacks = calloc(nstacks, sizeof (stacks[0]));
1188 			if (stacks != NULL) {
1189 				int n = 0;
1190 				(void) Plwp_iter(Pr, getstack, &n);
1191 				qsort(stacks, nstacks, sizeof (stacks[0]),
1192 				    cmpstacks);
1193 			}
1194 
1195 			if (Pgetauxval(Pr, AT_BASE) != -1L &&
1196 			    Prd_agent(Pr) == NULL) {
1197 				(void) fprintf(stderr,
1198 				    gettext("%s: warning: "
1199 					"librtld_db failed to initialize; "
1200 					"shared library information will not "
1201 					"be available\n"),
1202 				    progname);
1203 			}
1204 
1205 			/*
1206 			 * Create linked list of mappings for current process
1207 			 * In addition, add generic advice and raw advice
1208 			 * entries to merged_list.
1209 			 * e.g. if rawadv_list contains:
1210 			 *   [0x38000,0x3a000) = adv1
1211 			 *   [0x3a000,0x3c000) = adv2
1212 			 * and there is generic advice:
1213 			 *   heap = adv3
1214 			 * where heap corresponds to 0x38000, then merged_list
1215 			 * will contain:
1216 			 *   ... (include all other mappings from process)
1217 			 *   [0x38000,0x3c000) = adv3
1218 			 *   [0x38000,0x3a000) = adv1
1219 			 *   [0x3a000,0x3c000) = adv2
1220 			 *   ... (include all other mappings from process)
1221 			 */
1222 			assert(merged_list == NULL);
1223 			maplist_head = maplist_tail = NULL;
1224 			rc += Pmapping_iter(Pr, (proc_map_f *)create_maplist,
1225 			    NULL);
1226 
1227 			/*
1228 			 * Apply advice by iterating through merged list
1229 			 */
1230 			(void) apply_advice(&merged_list);
1231 
1232 			if (opt_verbose) {
1233 				assert(chopped_list == NULL);
1234 				/*
1235 				 * Create chopped_list from merged_list
1236 				 */
1237 				create_choplist(&chopped_list, merged_list);
1238 
1239 				/*
1240 				 * Iterate through maplist and output as
1241 				 * given by chopped_list
1242 				 */
1243 				print_advice(chopped_list, maplist_head);
1244 				delete_list(&chopped_list);
1245 			}
1246 
1247 			delete_list(&merged_list);
1248 
1249 			/*
1250 			 * Clear maplist
1251 			 */
1252 			pmapnode = maplist_head;
1253 			while (pmapnode) {
1254 				tempmapnode = pmapnode;
1255 				pmapnode = pmapnode->next;
1256 				free(tempmapnode);
1257 			}
1258 
1259 			if (stacks != NULL) {
1260 				free(stacks);
1261 				stacks = NULL;
1262 			}
1263 		}
1264 
1265 		Prelease(Pr, 0);
1266 	}
1267 
1268 	return (rc);
1269 }
1270