xref: /illumos-gate/usr/src/cmd/ppgsz/ppgsz.c (revision b210e77709da8e42dfe621e10ccf4be504206058)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2001-2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <ctype.h>
32 #include <string.h>
33 #include <signal.h>
34 #include <errno.h>
35 #include <dirent.h>
36 #include <limits.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <sys/mman.h>
40 #include <sys/wait.h>
41 #include <libproc.h>
42 #include <sys/sysmacros.h>
43 #include <libgen.h>
44 #include <thread.h>
45 
46 #ifndef TRUE
47 #define	TRUE	1
48 #endif
49 #ifndef FALSE
50 #define	FALSE	0
51 #endif
52 
53 static struct	ps_prochandle *Pr;
54 static char	*command;
55 static volatile int interrupt;
56 static int	Fflag;
57 static int	cflag = 1;
58 
59 static void	intr(int);
60 static int	setpgsz(struct ps_prochandle *, int, size_t *);
61 static int	setpgsz_anon(struct ps_prochandle *, size_t, int);
62 static caddr_t	setup_mha(uint_t, size_t, int);
63 static size_t	discover_optimal_pagesize(struct ps_prochandle *,
64 		uint_t, pid_t);
65 static void	usage();
66 
67 #define	INVPGSZ		3
68 
69 /* subopt */
70 
71 static char	*suboptstr[] = {
72 	"heap",
73 	"stack",
74 	"anon",
75 	NULL
76 };
77 
78 enum	suboptenum {
79 	E_HEAP,
80 	E_STACK,
81 	E_ANON
82 };
83 
84 static size_t
85 atosz(char *optarg)
86 {
87 	size_t		sz = 0;
88 	char		*endptr;
89 
90 	if (optarg == NULL || optarg[0] == '\0')
91 		return (INVPGSZ);
92 
93 	sz = strtoll(optarg, &endptr, 0);
94 
95 	switch (*endptr) {
96 	case 'T':
97 	case 't':
98 		sz *= 1024;
99 	/*FALLTHRU*/
100 	case 'G':
101 	case 'g':
102 		sz *= 1024;
103 	/*FALLTHRU*/
104 	case 'M':
105 	case 'm':
106 		sz *= 1024;
107 	/*FALLTHRU*/
108 	case 'K':
109 	case 'k':
110 		sz *= 1024;
111 	/*FALLTHRU*/
112 	case 'B':
113 	case 'b':
114 	default:
115 		break;
116 	}
117 	return (sz);
118 }
119 
120 /* pgsz array sufficient for max page sizes */
121 
122 static size_t	pgsza[8 * sizeof (void *)];
123 static int	nelem;
124 
125 static void
126 getpgsz()
127 {
128 	if ((nelem = getpagesizes(NULL, 0)) == 0) {
129 		(void) fprintf(stderr, "%s: cannot determine system page"
130 		    " sizes\n", command);
131 		exit(125);
132 	}
133 
134 	(void) getpagesizes(pgsza, nelem);
135 }
136 
137 static size_t
138 cnvpgsz(char *optarg)
139 {
140 	size_t		pgsz = atosz(optarg);
141 	int		i;
142 
143 	if (!ISP2(pgsz) || ((pgsz < pgsza[0]) && pgsz != 0)) {
144 		pgsz = INVPGSZ;
145 	} else {
146 		for (i = nelem - 1; i >= 0; i--) {
147 			if (pgsz == pgsza[i])
148 				break;
149 			if (pgsz > pgsza[i]) {
150 				pgsz = INVPGSZ;
151 				break;
152 			}
153 		}
154 	}
155 	if (pgsz == INVPGSZ) {
156 		if (optarg != NULL) {
157 			(void) fprintf(stderr,
158 			    "%s: invalid page size specified (%s)\n",
159 			    command, optarg);
160 		} else {
161 			usage();
162 		}
163 		exit(125);
164 	}
165 	return (pgsz);
166 }
167 
168 static void
169 usage()
170 {
171 	(void) fprintf(stderr,
172 	    "usage:\t%s -o option[,option] [-F] cmd | -p pid ...\n"
173 	    "    (set preferred page size of cmd or each process)\n"
174 	    "    -o option[,option]: options are\n"
175 	    "         stack=sz\n"
176 	    "         heap=sz\n"
177 	    "         anon=sz		(sz: valid page size or 0 (zero))\n"
178 	    "    -F: force grabbing of the target process(es)\n"
179 	    "    cmd: launch command\n"
180 	    "    -p pid ...: process id list\n",
181 	    command);
182 	exit(125);
183 }
184 
185 int
186 main(int argc, char *argv[])
187 {
188 	int		rc, err = 0;
189 	int		opt, subopt;
190 	int		errflg = 0;
191 	char		*options, *value;
192 	size_t		pgsz[] = {INVPGSZ, INVPGSZ, INVPGSZ};
193 	pid_t		pid;
194 	int		status;
195 
196 	if ((command = strrchr(argv[0], '/')) != NULL)
197 		command++;
198 	else
199 		command = argv[0];
200 
201 	getpgsz();
202 
203 	/* options */
204 	while ((opt = getopt(argc, argv, "o:Fp")) != EOF) {
205 		switch (opt) {
206 		case 'o':		/* options */
207 			options = optarg;
208 			while (*options != '\0') {
209 				subopt = getsubopt(&options, suboptstr, &value);
210 				switch (subopt) {
211 				case E_HEAP:
212 				case E_STACK:
213 				case E_ANON:
214 					pgsz[subopt] = cnvpgsz(value);
215 					break;
216 				default:
217 					errflg = 1;
218 					break;
219 				}
220 			}
221 			break;
222 		case 'F':		/* force grabbing (no O_EXCL) */
223 			Fflag = PGRAB_FORCE;
224 			break;
225 		case 'p':
226 			cflag = 0;
227 			break;
228 		default:
229 			errflg = 1;
230 			break;
231 		}
232 	}
233 
234 	argc -= optind;
235 	argv += optind;
236 
237 	if ((pgsz[E_HEAP] == INVPGSZ && pgsz[E_STACK] == INVPGSZ &&
238 	    pgsz[E_ANON] == INVPGSZ) || errflg || argc <= 0) {
239 		usage();
240 	}
241 
242 	/* catch signals from terminal */
243 	if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
244 		(void) sigset(SIGHUP, intr);
245 	if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
246 		(void) sigset(SIGINT, intr);
247 	if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
248 		(void) sigset(SIGQUIT, intr);
249 	(void) sigset(SIGTERM, intr);
250 
251 	if (cflag && !interrupt) {		/* command */
252 		int		err;
253 		char		path[PATH_MAX];
254 
255 		Pr = Pcreate(argv[0], &argv[0], &err, path, sizeof (path));
256 		if (Pr == NULL) {
257 			switch (err) {
258 			case C_PERM:
259 				(void) fprintf(stderr,
260 				    "%s: cannot control set-id or "
261 				    "unreadable object file: %s\n",
262 				    command, path);
263 				break;
264 			case C_LP64:
265 				(void) fprintf(stderr,
266 				    "%s: cannot control _LP64 "
267 				    "program: %s\n", command, path);
268 				break;
269 			case C_NOEXEC:
270 				(void) fprintf(stderr, "%s: cannot execute "
271 				    "program: %s\n", command, argv[0]);
272 				exit(126);
273 				break;
274 			case C_NOENT:
275 				(void) fprintf(stderr, "%s: cannot find "
276 				    "program: %s\n", command, argv[0]);
277 				exit(127);
278 				break;
279 			case C_STRANGE:
280 				break;
281 			default:
282 				(void) fprintf(stderr,
283 				    "%s: %s\n", command, Pcreate_error(err));
284 				break;
285 			}
286 			exit(125);
287 		}
288 
289 		if ((rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz)) != 0) {
290 			(void) fprintf(stderr, "%s: set page size "
291 			    "failed for program: %s\n", command, argv[0]);
292 			(void) pr_exit(Pr, 1);
293 			exit(125);
294 		}
295 
296 		/*
297 		 * release the command to run, wait for it and
298 		 * return it's exit status if we can.
299 		 */
300 		Prelease(Pr, 0);
301 		do {
302 			pid = wait(&status);
303 		} while (pid == -1 && errno == EINTR);
304 
305 		if (pid == -1) {
306 			(void) fprintf(stderr, "%s: wait() error: %s\n",
307 			    command, strerror(errno));
308 			exit(125);
309 		}
310 
311 		/*
312 		 * Pass thru the child's exit value.
313 		 */
314 		if (WIFEXITED(status))
315 			exit(WEXITSTATUS(status));
316 		exit(status | WCOREFLG);
317 	}
318 
319 	/* process pids */
320 
321 	while (--argc >= 0 && !interrupt) {
322 		char *arg;
323 		psinfo_t psinfo;
324 		int gret;
325 
326 		(void) fflush(stdout);	/* line-at-a-time */
327 
328 		/* get the specified pid and the psinfo struct */
329 		arg = *argv++;
330 		pid = proc_arg_psinfo(arg, PR_ARG_PIDS, &psinfo, &gret);
331 
332 		if (pid == -1) {
333 			(void) fprintf(stderr, "%s: cannot examine pid %s:"
334 			    " %s\n", command, arg, Pgrab_error(gret));
335 			if (!isdigit(arg[0]) && strncmp(arg, "/proc/", 6)) {
336 				(void) fprintf(stderr,
337 				    "\tdo not use -p option"
338 				    " to launch a command\n");
339 			}
340 			err++;
341 		} else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) {
342 			rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz);
343 			if (rc != 0) {
344 				(void) fprintf(stderr, "%s: set page size "
345 				    "failed for pid: %d\n", command, (int)pid);
346 				err++;
347 			}
348 			Prelease(Pr, 0);
349 			Pr = NULL;
350 		} else {
351 			switch (gret) {
352 			case G_SYS:
353 				proc_unctrl_psinfo(&psinfo);
354 				(void) fprintf(stderr, "%s: cannot set page "
355 				    "size for system process: %d [ %s ]\n",
356 				    command, (int)pid, psinfo.pr_psargs);
357 				err++;
358 				break;
359 			case G_SELF:
360 				/* do it to own self */
361 				rc = setpgsz(NULL, psinfo.pr_dmodel, pgsz);
362 				if (rc != 0) {
363 					(void) fprintf(stderr, "%s: set page"
364 					    "size failed for self: %d\n",
365 					    command, (int)pid);
366 					err++;
367 				}
368 				break;
369 			default:
370 				(void) fprintf(stderr, "%s: %s: %d\n",
371 				    command, Pgrab_error(gret), (int)pid);
372 				err++;
373 				break;
374 			}
375 		}
376 	}
377 
378 	if (interrupt || err)
379 		exit(125);
380 
381 	return (0);
382 }
383 
384 /* ARGSUSED */
385 static void
386 intr(int sig)
387 {
388 	interrupt = 1;
389 }
390 
391 /* ------ begin specific code ------ */
392 
393 /* set process page size */
394 /*ARGSUSED*/
395 static int
396 setpgsz(struct	ps_prochandle *Pr, int dmodel, size_t pgsz[])
397 {
398 	int			rc;
399 	int			err = 0;
400 	caddr_t			mpss;
401 	int			i;
402 	static uint_t	pgszcmd[] =
403 	{MHA_MAPSIZE_BSSBRK, MHA_MAPSIZE_STACK, MHA_MAPSIZE_VA};
404 
405 	for (i = E_HEAP; i <= E_ANON; i++) {
406 		if (pgsz[i] == INVPGSZ)
407 			continue;
408 
409 		if (i == E_ANON)
410 			rc = setpgsz_anon(Pr, pgsz[i], dmodel);
411 		else {
412 			mpss = setup_mha(pgszcmd[i], pgsz[i], dmodel);
413 			rc = pr_memcntl(Pr, NULL, 0, MC_HAT_ADVISE, mpss, 0, 0);
414 		}
415 
416 		if (rc < 0) {
417 			(void) fprintf(stderr, "%s: warning: set %s page size "
418 			    "failed (%s) for pid %d\n", command, suboptstr[i],
419 			    strerror(errno), (int)Pstatus(Pr)->pr_pid);
420 			err++;
421 		}
422 	}
423 	return (err);
424 }
425 
426 
427 /*
428  * Walk through the process' address space segments.  Set all anonymous
429  * segments to the new page size.
430  */
431 static int
432 setpgsz_anon(struct ps_prochandle *Pr, size_t pgsz, int dmodel)
433 {
434 	caddr_t		mpss;
435 	prmap_t		map;
436 	uintptr_t	addr;
437 	size_t		size;
438 	const psinfo_t	*psinfo;
439 	const pstatus_t	*pstatus;
440 	int		fd;
441 	int		rc;
442 	char		path[PATH_MAX];
443 
444 	/*
445 	 * Setting the page size for anonymous segments on a process before it
446 	 * has run will have no effect, since it has not configured anonymous
447 	 * memory and the page size setting is not "sticky" inside the kernel.
448 	 * Any anonymous memory subsequently mapped will have the default page
449 	 * size.
450 	 */
451 	if (cflag)
452 		return (0);
453 
454 	if ((psinfo = Ppsinfo(Pr)) == NULL)
455 		return (-1);
456 	if ((pstatus = Pstatus(Pr)) == NULL)
457 		return (-1);
458 
459 	if (pgsz == 0)
460 		pgsz = discover_optimal_pagesize(Pr, dmodel, psinfo->pr_pid);
461 
462 	mpss = setup_mha(MHA_MAPSIZE_VA, pgsz, dmodel);
463 
464 	(void) snprintf(path, PATH_MAX, "/proc/%d/map", (int)psinfo->pr_pid);
465 	if ((fd = open(path, O_RDONLY)) < 0)
466 		return (-1);
467 
468 	while (read(fd, &map, sizeof (map)) == sizeof (map)) {
469 		if ((map.pr_mflags & MA_ANON) == 0) {
470 			/* Not anon. */
471 			continue;
472 		} else if (map.pr_mflags & MA_SHARED) {
473 			/* Can't change pagesize for shared mappings. */
474 			continue;
475 		} else if (map.pr_vaddr + map.pr_size >
476 		    pstatus->pr_brkbase &&
477 		    map.pr_vaddr <
478 		    pstatus->pr_brkbase + pstatus->pr_brksize) {
479 			/* Heap. */
480 			continue;
481 		} else if (map.pr_vaddr >= pstatus->pr_stkbase &&
482 		    map.pr_vaddr + map.pr_size <=
483 		    pstatus->pr_stkbase + pstatus->pr_stksize) {
484 			/* Stack. */
485 			continue;
486 		} else if (map.pr_size < pgsz) {
487 			/* Too small. */
488 			continue;
489 		}
490 
491 		/*
492 		 * Find the first address in the segment that is page-aligned.
493 		 */
494 		if (pgsz == 0 || ((map.pr_vaddr % pgsz) == 0))
495 			addr = map.pr_vaddr;
496 		else
497 			addr = map.pr_vaddr + (pgsz - (map.pr_vaddr % pgsz));
498 
499 		/*
500 		 * Calculate how many pages will fit in the segment.
501 		 */
502 		if (pgsz == 0)
503 			size = map.pr_size;
504 		else
505 			size = map.pr_size - (addr % map.pr_vaddr) -
506 			    ((map.pr_vaddr + map.pr_size) % pgsz);
507 
508 		/*
509 		 * If no aligned pages fit in the segment, ignore it.
510 		 */
511 		if (size < pgsz) {
512 			continue;
513 		}
514 
515 		rc = pr_memcntl(Pr, (caddr_t)addr, size,
516 		    MC_HAT_ADVISE, mpss, 0, 0);
517 
518 		/*
519 		 * If an error occurs on any segment, report the error here and
520 		 * then go on to try setting the page size for the remaining
521 		 * segments.
522 		 */
523 		if (rc < 0) {
524 			(void) fprintf(stderr, "%s: warning: set page size "
525 			    "failed (%s) for pid %d for anon segment at "
526 			    "address: %p\n", command, strerror(errno),
527 			    (int)psinfo->pr_pid, (void *)map.pr_vaddr);
528 		}
529 	}
530 
531 	(void) close(fd);
532 	return (0);
533 }
534 
535 /*
536  * Discover the optimal page size for the process.
537  * Do this by creating a 4M segment in the target process, set its pagesize
538  * to 0, and read the map file to discover the page size selected by the system.
539  */
540 static size_t
541 discover_optimal_pagesize(struct ps_prochandle *Pr, uint_t dmodel, pid_t pid)
542 {
543 	size_t			size = 0;
544 	size_t			len = pgsza[nelem - 1];
545 	prxmap_t		xmap;
546 	caddr_t			mha;
547 	void			*addr;
548 	int			fd = -1;
549 	char			path[PATH_MAX];
550 
551 	(void) snprintf(path, PATH_MAX, "/proc/%d/xmap", (int)pid);
552 	if ((fd = open(path, O_RDONLY)) < 0)
553 		return (size);
554 
555 	if ((addr = pr_mmap(Pr, (void *)len, len, PROT_READ | PROT_WRITE,
556 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1, 0)) == MAP_FAILED) {
557 		goto err;
558 	}
559 
560 	mha = setup_mha(MHA_MAPSIZE_VA, 0, dmodel);
561 	if (pr_memcntl(Pr, addr, len, MC_HAT_ADVISE, mha, 0, 0) < 0) {
562 		goto err;
563 	}
564 
565 	/*
566 	 * Touch a page in the segment so the hat mapping gets created.
567 	 */
568 	(void) Pwrite(Pr, &len, sizeof (len), (uintptr_t)addr);
569 
570 	/*
571 	 * Read through the address map looking for our segment.
572 	 */
573 
574 	while (read(fd, &xmap, sizeof (xmap)) == sizeof (xmap)) {
575 		if (xmap.pr_vaddr == (uintptr_t)addr)
576 			break;
577 	}
578 	if (xmap.pr_vaddr != (uintptr_t)addr)
579 		goto err;
580 
581 	size = xmap.pr_hatpagesize;
582 
583 err:
584 	if (addr != MAP_FAILED) {
585 		if (pr_munmap(Pr, addr, len) == -1) {
586 			(void) fprintf(stderr,
587 			    "%s: couldn't delete segment at %p\n",
588 			    command, addr);
589 		}
590 	}
591 	if (fd != -1)
592 		(void) close(fd);
593 
594 	return (size);
595 }
596 
597 static struct memcntl_mha	gmha;
598 #ifdef _LP64
599 static struct memcntl_mha32	gmha32;
600 #endif
601 
602 static caddr_t
603 /* ARGSUSED */
604 setup_mha(uint_t command, size_t pagesize, int dmodel)
605 {
606 #ifdef _LP64
607 	if (dmodel == PR_MODEL_ILP32) {
608 		gmha32.mha_cmd = command;
609 		gmha32.mha_flags = 0;
610 		gmha32.mha_pagesize = pagesize;
611 		return ((caddr_t)&gmha32);
612 	}
613 #endif
614 	gmha.mha_cmd = command;
615 	gmha.mha_flags = 0;
616 	gmha.mha_pagesize = pagesize;
617 	return ((caddr_t)&gmha);
618 }
619