xref: /illumos-gate/usr/src/cmd/ppgsz/ppgsz.c (revision 440a8a36792bdf9ef51639066aab0b7771ffcab8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2001-2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <ctype.h>
34 #include <string.h>
35 #include <signal.h>
36 #include <errno.h>
37 #include <dirent.h>
38 #include <limits.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/mman.h>
42 #include <sys/wait.h>
43 #include <libproc.h>
44 #include <sys/sysmacros.h>
45 #include <libgen.h>
46 #include <thread.h>
47 
48 #ifndef TRUE
49 #define	TRUE	1
50 #endif
51 #ifndef FALSE
52 #define	FALSE	0
53 #endif
54 
55 static struct	ps_prochandle *Pr;
56 static char	*command;
57 static volatile int interrupt;
58 static int	Fflag;
59 static int	cflag = 1;
60 
61 static void	intr(int);
62 static int	setpgsz(struct ps_prochandle *, int, size_t *);
63 static int	setpgsz_anon(struct ps_prochandle *, size_t, int);
64 static caddr_t	setup_mha(uint_t, size_t, int);
65 static size_t	discover_optimal_pagesize(struct ps_prochandle *,
66 		uint_t, pid_t);
67 static void	usage();
68 
69 #define	INVPGSZ		3
70 
71 /* subopt */
72 
73 static char	*suboptstr[] = {
74 	"heap",
75 	"stack",
76 	"anon",
77 	NULL
78 };
79 
80 enum	suboptenum {
81 	E_HEAP,
82 	E_STACK,
83 	E_ANON
84 };
85 
86 static size_t
87 atosz(char *optarg)
88 {
89 	size_t		sz = 0;
90 	char		*endptr;
91 
92 	if (optarg == NULL || optarg[0] == '\0')
93 		return (INVPGSZ);
94 
95 	sz = strtoll(optarg, &endptr, 0);
96 
97 	switch (*endptr) {
98 	case 'T':
99 	case 't':
100 		sz *= 1024;
101 	/*FALLTHRU*/
102 	case 'G':
103 	case 'g':
104 		sz *= 1024;
105 	/*FALLTHRU*/
106 	case 'M':
107 	case 'm':
108 		sz *= 1024;
109 	/*FALLTHRU*/
110 	case 'K':
111 	case 'k':
112 		sz *= 1024;
113 	/*FALLTHRU*/
114 	case 'B':
115 	case 'b':
116 	default:
117 		break;
118 	}
119 	return (sz);
120 }
121 
122 /* pgsz array sufficient for max page sizes */
123 
124 static size_t	pgsza[8 * sizeof (void *)];
125 static int	nelem;
126 
127 static void
128 getpgsz()
129 {
130 	if ((nelem = getpagesizes(NULL, 0)) == 0) {
131 		(void) fprintf(stderr, "%s: cannot determine system page"
132 		    " sizes\n", command);
133 		exit(125);
134 	}
135 
136 	(void) getpagesizes(pgsza, nelem);
137 }
138 
139 static size_t
140 cnvpgsz(char *optarg)
141 {
142 	size_t		pgsz = atosz(optarg);
143 	int		i;
144 
145 	if (!ISP2(pgsz) || ((pgsz < pgsza[0]) && pgsz != 0)) {
146 		pgsz = INVPGSZ;
147 	} else {
148 		for (i = nelem - 1; i >= 0; i--) {
149 			if (pgsz == pgsza[i])
150 				break;
151 			if (pgsz > pgsza[i]) {
152 				pgsz = INVPGSZ;
153 				break;
154 			}
155 		}
156 	}
157 	if (pgsz == INVPGSZ) {
158 		if (optarg != NULL) {
159 			(void) fprintf(stderr,
160 			    "%s: invalid page size specified (%s)\n",
161 			    command, optarg);
162 		} else {
163 			usage();
164 		}
165 		exit(125);
166 	}
167 	return (pgsz);
168 }
169 
170 static void
171 usage()
172 {
173 	(void) fprintf(stderr,
174 	    "usage:\t%s -o option[,option] [-F] cmd | -p pid ...\n"
175 	    "    (set preferred page size of cmd or each process)\n"
176 	    "    -o option[,option]: options are\n"
177 	    "         stack=sz\n"
178 	    "         heap=sz\n"
179 	    "         anon=sz		(sz: valid page size or 0 (zero))\n"
180 	    "    -F: force grabbing of the target process(es)\n"
181 	    "    cmd: launch command\n"
182 	    "    -p pid ...: process id list\n",
183 	    command);
184 	exit(125);
185 }
186 
187 int
188 main(int argc, char *argv[])
189 {
190 	int		rc, err = 0;
191 	int		opt, subopt;
192 	int		errflg = 0;
193 	char		*options, *value;
194 	size_t		pgsz[] = {INVPGSZ, INVPGSZ, INVPGSZ};
195 	pid_t		pid;
196 	int		status;
197 
198 	if ((command = strrchr(argv[0], '/')) != NULL)
199 		command++;
200 	else
201 		command = argv[0];
202 
203 	getpgsz();
204 
205 	/* options */
206 	while ((opt = getopt(argc, argv, "o:Fp")) != EOF) {
207 		switch (opt) {
208 		case 'o':		/* options */
209 			options = optarg;
210 			while (*options != '\0') {
211 				subopt = getsubopt(&options, suboptstr, &value);
212 				switch (subopt) {
213 				case E_HEAP:
214 				case E_STACK:
215 				case E_ANON:
216 					pgsz[subopt] = cnvpgsz(value);
217 					break;
218 				default:
219 					errflg = 1;
220 					break;
221 				}
222 			}
223 			break;
224 		case 'F':		/* force grabbing (no O_EXCL) */
225 			Fflag = PGRAB_FORCE;
226 			break;
227 		case 'p':
228 			cflag = 0;
229 			break;
230 		default:
231 			errflg = 1;
232 			break;
233 		}
234 	}
235 
236 	argc -= optind;
237 	argv += optind;
238 
239 	if ((pgsz[E_HEAP] == INVPGSZ && pgsz[E_STACK] == INVPGSZ &&
240 	    pgsz[E_ANON] == INVPGSZ) || errflg || argc <= 0) {
241 		usage();
242 	}
243 
244 	/* catch signals from terminal */
245 	if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
246 		(void) sigset(SIGHUP, intr);
247 	if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
248 		(void) sigset(SIGINT, intr);
249 	if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
250 		(void) sigset(SIGQUIT, intr);
251 	(void) sigset(SIGTERM, intr);
252 
253 	if (cflag && !interrupt) {		/* command */
254 		int		err;
255 		char		path[PATH_MAX];
256 
257 		Pr = Pcreate(argv[0], &argv[0], &err, path, sizeof (path));
258 		if (Pr == NULL) {
259 			switch (err) {
260 			case C_PERM:
261 				(void) fprintf(stderr,
262 				    "%s: cannot control set-id or "
263 				    "unreadable object file: %s\n",
264 				    command, path);
265 				break;
266 			case C_LP64:
267 				(void) fprintf(stderr,
268 				    "%s: cannot control _LP64 "
269 				    "program: %s\n", command, path);
270 				break;
271 			case C_NOEXEC:
272 				(void) fprintf(stderr, "%s: cannot execute "
273 				    "program: %s\n", command, argv[0]);
274 				exit(126);
275 				break;
276 			case C_NOENT:
277 				(void) fprintf(stderr, "%s: cannot find "
278 				    "program: %s\n", command, argv[0]);
279 				exit(127);
280 				break;
281 			case C_STRANGE:
282 				break;
283 			default:
284 				(void) fprintf(stderr,
285 				    "%s: %s\n", command, Pcreate_error(err));
286 				break;
287 			}
288 			exit(125);
289 		}
290 
291 		if ((rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz)) != 0) {
292 			(void) fprintf(stderr, "%s: set page size "
293 			    "failed for program: %s\n", command, argv[0]);
294 			(void) pr_exit(Pr, 1);
295 			exit(125);
296 		}
297 
298 		/*
299 		 * release the command to run, wait for it and
300 		 * return it's exit status if we can.
301 		 */
302 		Prelease(Pr, 0);
303 		do {
304 			pid = wait(&status);
305 		} while (pid == -1 && errno == EINTR);
306 
307 		if (pid == -1) {
308 			(void) fprintf(stderr, "%s: wait() error: %s\n",
309 			    command, strerror(errno));
310 			exit(125);
311 		}
312 
313 		/*
314 		 * Pass thru the child's exit value.
315 		 */
316 		if (WIFEXITED(status))
317 			exit(WEXITSTATUS(status));
318 		exit(status | WCOREFLG);
319 	}
320 
321 	/* process pids */
322 
323 	while (--argc >= 0 && !interrupt) {
324 		char *arg;
325 		psinfo_t psinfo;
326 		int gret;
327 
328 		(void) fflush(stdout);	/* line-at-a-time */
329 
330 		/* get the specified pid and the psinfo struct */
331 		arg = *argv++;
332 		pid = proc_arg_psinfo(arg, PR_ARG_PIDS, &psinfo, &gret);
333 
334 		if (pid == -1) {
335 			(void) fprintf(stderr, "%s: cannot examine pid %s:"
336 			    " %s\n", command, arg, Pgrab_error(gret));
337 			if (!isdigit(arg[0]) && strncmp(arg, "/proc/", 6)) {
338 				(void) fprintf(stderr,
339 				    "\tdo not use -p option"
340 				    " to launch a command\n");
341 			}
342 			err++;
343 		} else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) {
344 			rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz);
345 			if (rc != 0) {
346 				(void) fprintf(stderr, "%s: set page size "
347 				    "failed for pid: %d\n", command, (int)pid);
348 				err++;
349 			}
350 			Prelease(Pr, 0);
351 			Pr = NULL;
352 		} else {
353 			switch (gret) {
354 			case G_SYS:
355 				proc_unctrl_psinfo(&psinfo);
356 				(void) fprintf(stderr, "%s: cannot set page "
357 				    "size for system process: %d [ %s ]\n",
358 				    command, (int)pid, psinfo.pr_psargs);
359 				err++;
360 				break;
361 			case G_SELF:
362 				/* do it to own self */
363 				rc = setpgsz(NULL, psinfo.pr_dmodel, pgsz);
364 				if (rc != 0) {
365 					(void) fprintf(stderr, "%s: set page"
366 					    "size failed for self: %d\n",
367 					    command, (int)pid);
368 					err++;
369 				}
370 				break;
371 			default:
372 				(void) fprintf(stderr, "%s: %s: %d\n",
373 				    command, Pgrab_error(gret), (int)pid);
374 				err++;
375 				break;
376 			}
377 		}
378 	}
379 
380 	if (interrupt || err)
381 		exit(125);
382 
383 	return (0);
384 }
385 
386 /* ARGSUSED */
387 static void
388 intr(int sig)
389 {
390 	interrupt = 1;
391 }
392 
393 /* ------ begin specific code ------ */
394 
395 /* set process page size */
396 /*ARGSUSED*/
397 static int
398 setpgsz(struct	ps_prochandle *Pr, int dmodel, size_t pgsz[])
399 {
400 	int			rc;
401 	int			err = 0;
402 	caddr_t			mpss;
403 	int			i;
404 	static uint_t	pgszcmd[] =
405 	{MHA_MAPSIZE_BSSBRK, MHA_MAPSIZE_STACK, MHA_MAPSIZE_VA};
406 
407 	for (i = E_HEAP; i <= E_ANON; i++) {
408 		if (pgsz[i] == INVPGSZ)
409 			continue;
410 
411 		if (i == E_ANON)
412 			rc = setpgsz_anon(Pr, pgsz[i], dmodel);
413 		else {
414 			mpss = setup_mha(pgszcmd[i], pgsz[i], dmodel);
415 			rc = pr_memcntl(Pr, NULL, 0, MC_HAT_ADVISE, mpss, 0, 0);
416 		}
417 
418 		if (rc < 0) {
419 			(void) fprintf(stderr, "%s: warning: set %s page size "
420 			    "failed (%s) for pid %d\n", command, suboptstr[i],
421 			    strerror(errno), (int)Pstatus(Pr)->pr_pid);
422 			err++;
423 		}
424 	}
425 	return (err);
426 }
427 
428 
429 /*
430  * Walk through the process' address space segments.  Set all anonymous
431  * segments to the new page size.
432  */
433 static int
434 setpgsz_anon(struct ps_prochandle *Pr, size_t pgsz, int dmodel)
435 {
436 	caddr_t		mpss;
437 	prmap_t		map;
438 	uintptr_t	addr;
439 	size_t		size;
440 	const psinfo_t	*psinfo;
441 	const pstatus_t	*pstatus;
442 	int		fd;
443 	int		rc;
444 	char		path[PATH_MAX];
445 
446 	/*
447 	 * Setting the page size for anonymous segments on a process before it
448 	 * has run will have no effect, since it has not configured anonymous
449 	 * memory and the page size setting is not "sticky" inside the kernel.
450 	 * Any anonymous memory subsequently mapped will have the default page
451 	 * size.
452 	 */
453 	if (cflag)
454 		return (0);
455 
456 	if ((psinfo = Ppsinfo(Pr)) == NULL)
457 		return (-1);
458 	if ((pstatus = Pstatus(Pr)) == NULL)
459 		return (-1);
460 
461 	if (pgsz == 0)
462 		pgsz = discover_optimal_pagesize(Pr, dmodel, psinfo->pr_pid);
463 
464 	mpss = setup_mha(MHA_MAPSIZE_VA, pgsz, dmodel);
465 
466 	(void) snprintf(path, PATH_MAX, "/proc/%d/map", (int)psinfo->pr_pid);
467 	if ((fd = open(path, O_RDONLY)) < 0)
468 		return (-1);
469 
470 	while (read(fd, &map, sizeof (map)) == sizeof (map)) {
471 		if ((map.pr_mflags & MA_ANON) == 0) {
472 			/* Not anon. */
473 			continue;
474 		} else if (map.pr_mflags & MA_SHARED) {
475 			/* Can't change pagesize for shared mappings. */
476 			continue;
477 		} else if (map.pr_vaddr + map.pr_size >
478 		    pstatus->pr_brkbase &&
479 		    map.pr_vaddr <
480 		    pstatus->pr_brkbase + pstatus->pr_brksize) {
481 			/* Heap. */
482 			continue;
483 		} else if (map.pr_vaddr >= pstatus->pr_stkbase &&
484 		    map.pr_vaddr + map.pr_size <=
485 		    pstatus->pr_stkbase + pstatus->pr_stksize) {
486 			/* Stack. */
487 			continue;
488 		} else if (map.pr_size < pgsz) {
489 			/* Too small. */
490 			continue;
491 		}
492 
493 		/*
494 		 * Find the first address in the segment that is page-aligned.
495 		 */
496 		if (pgsz == 0 || ((map.pr_vaddr % pgsz) == 0))
497 			addr = map.pr_vaddr;
498 		else
499 			addr = map.pr_vaddr + (pgsz - (map.pr_vaddr % pgsz));
500 
501 		/*
502 		 * Calculate how many pages will fit in the segment.
503 		 */
504 		if (pgsz == 0)
505 			size = map.pr_size;
506 		else
507 			size = map.pr_size - (addr % map.pr_vaddr) -
508 			    ((map.pr_vaddr + map.pr_size) % pgsz);
509 
510 		/*
511 		 * If no aligned pages fit in the segment, ignore it.
512 		 */
513 		if (size < pgsz) {
514 			continue;
515 		}
516 
517 		rc = pr_memcntl(Pr, (caddr_t)addr, size,
518 		    MC_HAT_ADVISE, mpss, 0, 0);
519 
520 		/*
521 		 * If an error occurs on any segment, report the error here and
522 		 * then go on to try setting the page size for the remaining
523 		 * segments.
524 		 */
525 		if (rc < 0) {
526 			(void) fprintf(stderr, "%s: warning: set page size "
527 			    "failed (%s) for pid %d for anon segment at "
528 			    "address: %p\n", command, strerror(errno),
529 			    (int)psinfo->pr_pid, (void *)map.pr_vaddr);
530 		}
531 	}
532 
533 	(void) close(fd);
534 	return (0);
535 }
536 
537 /*
538  * Discover the optimal page size for the process.
539  * Do this by creating a 4M segment in the target process, set its pagesize
540  * to 0, and read the map file to discover the page size selected by the system.
541  */
542 static size_t
543 discover_optimal_pagesize(struct ps_prochandle *Pr, uint_t dmodel, pid_t pid)
544 {
545 	size_t			size = 0;
546 	size_t			len = pgsza[nelem - 1];
547 	prxmap_t		xmap;
548 	caddr_t			mha;
549 	void			*addr;
550 	int			fd = -1;
551 	char			path[PATH_MAX];
552 
553 	(void) snprintf(path, PATH_MAX, "/proc/%d/xmap", (int)pid);
554 	if ((fd = open(path, O_RDONLY)) < 0)
555 		return (size);
556 
557 	if ((addr = pr_mmap(Pr, (void *)len, len, PROT_READ | PROT_WRITE,
558 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1, 0)) == MAP_FAILED) {
559 		goto err;
560 	}
561 
562 	mha = setup_mha(MHA_MAPSIZE_VA, 0, dmodel);
563 	if (pr_memcntl(Pr, addr, len, MC_HAT_ADVISE, mha, 0, 0) < 0) {
564 		goto err;
565 	}
566 
567 	/*
568 	 * Touch a page in the segment so the hat mapping gets created.
569 	 */
570 	(void) Pwrite(Pr, &len, sizeof (len), (uintptr_t)addr);
571 
572 	/*
573 	 * Read through the address map looking for our segment.
574 	 */
575 
576 	while (read(fd, &xmap, sizeof (xmap)) == sizeof (xmap)) {
577 		if (xmap.pr_vaddr == (uintptr_t)addr)
578 			break;
579 	}
580 	if (xmap.pr_vaddr != (uintptr_t)addr)
581 		goto err;
582 
583 	size = xmap.pr_hatpagesize;
584 
585 err:
586 	if (addr != MAP_FAILED) {
587 		if (pr_munmap(Pr, addr, len) == -1) {
588 			(void) fprintf(stderr,
589 			    "%s: couldn't delete segment at %p\n",
590 			    command, addr);
591 		}
592 	}
593 	if (fd != -1)
594 		(void) close(fd);
595 
596 	return (size);
597 }
598 
599 static struct memcntl_mha	gmha;
600 #ifdef _LP64
601 static struct memcntl_mha32	gmha32;
602 #endif
603 
604 static caddr_t
605 /* ARGSUSED */
606 setup_mha(uint_t command, size_t pagesize, int dmodel)
607 {
608 #ifdef _LP64
609 	if (dmodel == PR_MODEL_ILP32) {
610 		gmha32.mha_cmd = command;
611 		gmha32.mha_flags = 0;
612 		gmha32.mha_pagesize = pagesize;
613 		return ((caddr_t)&gmha32);
614 	}
615 #endif
616 	gmha.mha_cmd = command;
617 	gmha.mha_flags = 0;
618 	gmha.mha_pagesize = pagesize;
619 	return ((caddr_t)&gmha);
620 }
621