1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <ctype.h>
34 #include <string.h>
35 #include <signal.h>
36 #include <errno.h>
37 #include <dirent.h>
38 #include <limits.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/mman.h>
42 #include <sys/wait.h>
43 #include <libproc.h>
44 #include <sys/sysmacros.h>
45 #include <libgen.h>
46 #include <thread.h>
47
48 #ifndef TRUE
49 #define TRUE 1
50 #endif
51 #ifndef FALSE
52 #define FALSE 0
53 #endif
54
55 static struct ps_prochandle *Pr;
56 static char *command;
57 static volatile int interrupt;
58 static int Fflag;
59 static int cflag = 1;
60
61 static void intr(int);
62 static int setpgsz(struct ps_prochandle *, int, size_t *);
63 static int setpgsz_anon(struct ps_prochandle *, size_t, int);
64 static caddr_t setup_mha(uint_t, size_t, int);
65 static size_t discover_optimal_pagesize(struct ps_prochandle *,
66 uint_t, pid_t);
67 static void usage();
68
69 #define INVPGSZ 3
70
71 /* subopt */
72
73 static char *suboptstr[] = {
74 "heap",
75 "stack",
76 "anon",
77 NULL
78 };
79
80 enum suboptenum {
81 E_HEAP,
82 E_STACK,
83 E_ANON
84 };
85
86 static size_t
atosz(char * optarg)87 atosz(char *optarg)
88 {
89 size_t sz = 0;
90 char *endptr;
91
92 if (optarg == NULL || optarg[0] == '\0')
93 return (INVPGSZ);
94
95 sz = strtoll(optarg, &endptr, 0);
96
97 switch (*endptr) {
98 case 'T':
99 case 't':
100 sz *= 1024;
101 /*FALLTHRU*/
102 case 'G':
103 case 'g':
104 sz *= 1024;
105 /*FALLTHRU*/
106 case 'M':
107 case 'm':
108 sz *= 1024;
109 /*FALLTHRU*/
110 case 'K':
111 case 'k':
112 sz *= 1024;
113 /*FALLTHRU*/
114 case 'B':
115 case 'b':
116 default:
117 break;
118 }
119 return (sz);
120 }
121
122 /* pgsz array sufficient for max page sizes */
123
124 static size_t pgsza[8 * sizeof (void *)];
125 static int nelem;
126
127 static void
getpgsz()128 getpgsz()
129 {
130 if ((nelem = getpagesizes(NULL, 0)) == 0) {
131 (void) fprintf(stderr, "%s: cannot determine system page"
132 " sizes\n", command);
133 exit(125);
134 }
135
136 (void) getpagesizes(pgsza, nelem);
137 }
138
139 static size_t
cnvpgsz(char * optarg)140 cnvpgsz(char *optarg)
141 {
142 size_t pgsz = atosz(optarg);
143 int i;
144
145 if (!ISP2(pgsz) || ((pgsz < pgsza[0]) && pgsz != 0)) {
146 pgsz = INVPGSZ;
147 } else {
148 for (i = nelem - 1; i >= 0; i--) {
149 if (pgsz == pgsza[i])
150 break;
151 if (pgsz > pgsza[i]) {
152 pgsz = INVPGSZ;
153 break;
154 }
155 }
156 }
157 if (pgsz == INVPGSZ) {
158 if (optarg != NULL) {
159 (void) fprintf(stderr,
160 "%s: invalid page size specified (%s)\n",
161 command, optarg);
162 } else {
163 usage();
164 }
165 exit(125);
166 }
167 return (pgsz);
168 }
169
170 static void
usage()171 usage()
172 {
173 (void) fprintf(stderr,
174 "usage:\t%s -o option[,option] [-F] cmd | -p pid ...\n"
175 " (set preferred page size of cmd or each process)\n"
176 " -o option[,option]: options are\n"
177 " stack=sz\n"
178 " heap=sz\n"
179 " anon=sz (sz: valid page size or 0 (zero))\n"
180 " -F: force grabbing of the target process(es)\n"
181 " cmd: launch command\n"
182 " -p pid ...: process id list\n",
183 command);
184 exit(125);
185 }
186
187 int
main(int argc,char * argv[])188 main(int argc, char *argv[])
189 {
190 int rc, err = 0;
191 int opt, subopt;
192 int errflg = 0;
193 char *options, *value;
194 size_t pgsz[] = {INVPGSZ, INVPGSZ, INVPGSZ};
195 pid_t pid;
196 int status;
197
198 if ((command = strrchr(argv[0], '/')) != NULL)
199 command++;
200 else
201 command = argv[0];
202
203 getpgsz();
204
205 /* options */
206 while ((opt = getopt(argc, argv, "o:Fp")) != EOF) {
207 switch (opt) {
208 case 'o': /* options */
209 options = optarg;
210 while (*options != '\0') {
211 subopt = getsubopt(&options, suboptstr, &value);
212 switch (subopt) {
213 case E_HEAP:
214 case E_STACK:
215 case E_ANON:
216 pgsz[subopt] = cnvpgsz(value);
217 break;
218 default:
219 errflg = 1;
220 break;
221 }
222 }
223 break;
224 case 'F': /* force grabbing (no O_EXCL) */
225 Fflag = PGRAB_FORCE;
226 break;
227 case 'p':
228 cflag = 0;
229 break;
230 default:
231 errflg = 1;
232 break;
233 }
234 }
235
236 argc -= optind;
237 argv += optind;
238
239 if ((pgsz[E_HEAP] == INVPGSZ && pgsz[E_STACK] == INVPGSZ &&
240 pgsz[E_ANON] == INVPGSZ) || errflg || argc <= 0) {
241 usage();
242 }
243
244 /* catch signals from terminal */
245 if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
246 (void) sigset(SIGHUP, intr);
247 if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
248 (void) sigset(SIGINT, intr);
249 if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
250 (void) sigset(SIGQUIT, intr);
251 (void) sigset(SIGTERM, intr);
252
253 if (cflag && !interrupt) { /* command */
254 int err;
255 char path[PATH_MAX];
256
257 Pr = Pcreate(argv[0], &argv[0], &err, path, sizeof (path));
258 if (Pr == NULL) {
259 switch (err) {
260 case C_PERM:
261 (void) fprintf(stderr,
262 "%s: cannot control set-id or "
263 "unreadable object file: %s\n",
264 command, path);
265 break;
266 case C_LP64:
267 (void) fprintf(stderr,
268 "%s: cannot control _LP64 "
269 "program: %s\n", command, path);
270 break;
271 case C_NOEXEC:
272 (void) fprintf(stderr, "%s: cannot execute "
273 "program: %s\n", command, argv[0]);
274 exit(126);
275 break;
276 case C_NOENT:
277 (void) fprintf(stderr, "%s: cannot find "
278 "program: %s\n", command, argv[0]);
279 exit(127);
280 break;
281 case C_STRANGE:
282 break;
283 default:
284 (void) fprintf(stderr,
285 "%s: %s\n", command, Pcreate_error(err));
286 break;
287 }
288 exit(125);
289 }
290
291 if ((rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz)) != 0) {
292 (void) fprintf(stderr, "%s: set page size "
293 "failed for program: %s\n", command, argv[0]);
294 (void) pr_exit(Pr, 1);
295 exit(125);
296 }
297
298 /*
299 * release the command to run, wait for it and
300 * return it's exit status if we can.
301 */
302 Prelease(Pr, 0);
303 do {
304 pid = wait(&status);
305 } while (pid == -1 && errno == EINTR);
306
307 if (pid == -1) {
308 (void) fprintf(stderr, "%s: wait() error: %s\n",
309 command, strerror(errno));
310 exit(125);
311 }
312
313 /*
314 * Pass thru the child's exit value.
315 */
316 if (WIFEXITED(status))
317 exit(WEXITSTATUS(status));
318 exit(status | WCOREFLG);
319 }
320
321 /* process pids */
322
323 while (--argc >= 0 && !interrupt) {
324 char *arg;
325 psinfo_t psinfo;
326 int gret;
327
328 (void) fflush(stdout); /* line-at-a-time */
329
330 /* get the specified pid and the psinfo struct */
331 arg = *argv++;
332 pid = proc_arg_psinfo(arg, PR_ARG_PIDS, &psinfo, &gret);
333
334 if (pid == -1) {
335 (void) fprintf(stderr, "%s: cannot examine pid %s:"
336 " %s\n", command, arg, Pgrab_error(gret));
337 if (!isdigit(arg[0]) && strncmp(arg, "/proc/", 6)) {
338 (void) fprintf(stderr,
339 "\tdo not use -p option"
340 " to launch a command\n");
341 }
342 err++;
343 } else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) {
344 rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz);
345 if (rc != 0) {
346 (void) fprintf(stderr, "%s: set page size "
347 "failed for pid: %d\n", command, (int)pid);
348 err++;
349 }
350 Prelease(Pr, 0);
351 Pr = NULL;
352 } else {
353 switch (gret) {
354 case G_SYS:
355 proc_unctrl_psinfo(&psinfo);
356 (void) fprintf(stderr, "%s: cannot set page "
357 "size for system process: %d [ %s ]\n",
358 command, (int)pid, psinfo.pr_psargs);
359 err++;
360 break;
361 case G_SELF:
362 /* do it to own self */
363 rc = setpgsz(NULL, psinfo.pr_dmodel, pgsz);
364 if (rc != 0) {
365 (void) fprintf(stderr, "%s: set page"
366 "size failed for self: %d\n",
367 command, (int)pid);
368 err++;
369 }
370 break;
371 default:
372 (void) fprintf(stderr, "%s: %s: %d\n",
373 command, Pgrab_error(gret), (int)pid);
374 err++;
375 break;
376 }
377 }
378 }
379
380 if (interrupt || err)
381 exit(125);
382
383 return (0);
384 }
385
386 /* ARGSUSED */
387 static void
intr(int sig)388 intr(int sig)
389 {
390 interrupt = 1;
391 }
392
393 /* ------ begin specific code ------ */
394
395 /* set process page size */
396 /*ARGSUSED*/
397 static int
setpgsz(struct ps_prochandle * Pr,int dmodel,size_t pgsz[])398 setpgsz(struct ps_prochandle *Pr, int dmodel, size_t pgsz[])
399 {
400 int rc;
401 int err = 0;
402 caddr_t mpss;
403 int i;
404 static uint_t pgszcmd[] =
405 {MHA_MAPSIZE_BSSBRK, MHA_MAPSIZE_STACK, MHA_MAPSIZE_VA};
406
407 for (i = E_HEAP; i <= E_ANON; i++) {
408 if (pgsz[i] == INVPGSZ)
409 continue;
410
411 if (i == E_ANON)
412 rc = setpgsz_anon(Pr, pgsz[i], dmodel);
413 else {
414 mpss = setup_mha(pgszcmd[i], pgsz[i], dmodel);
415 rc = pr_memcntl(Pr, NULL, 0, MC_HAT_ADVISE, mpss, 0, 0);
416 }
417
418 if (rc < 0) {
419 (void) fprintf(stderr, "%s: warning: set %s page size "
420 "failed (%s) for pid %d\n", command, suboptstr[i],
421 strerror(errno), (int)Pstatus(Pr)->pr_pid);
422 err++;
423 }
424 }
425 return (err);
426 }
427
428
429 /*
430 * Walk through the process' address space segments. Set all anonymous
431 * segments to the new page size.
432 */
433 static int
setpgsz_anon(struct ps_prochandle * Pr,size_t pgsz,int dmodel)434 setpgsz_anon(struct ps_prochandle *Pr, size_t pgsz, int dmodel)
435 {
436 caddr_t mpss;
437 prmap_t map;
438 uintptr_t addr;
439 size_t size;
440 const psinfo_t *psinfo;
441 const pstatus_t *pstatus;
442 int fd;
443 int rc;
444 char path[PATH_MAX];
445
446 /*
447 * Setting the page size for anonymous segments on a process before it
448 * has run will have no effect, since it has not configured anonymous
449 * memory and the page size setting is not "sticky" inside the kernel.
450 * Any anonymous memory subsequently mapped will have the default page
451 * size.
452 */
453 if (cflag)
454 return (0);
455
456 if ((psinfo = Ppsinfo(Pr)) == NULL)
457 return (-1);
458 if ((pstatus = Pstatus(Pr)) == NULL)
459 return (-1);
460
461 if (pgsz == 0)
462 pgsz = discover_optimal_pagesize(Pr, dmodel, psinfo->pr_pid);
463
464 mpss = setup_mha(MHA_MAPSIZE_VA, pgsz, dmodel);
465
466 (void) snprintf(path, PATH_MAX, "/proc/%d/map", (int)psinfo->pr_pid);
467 if ((fd = open(path, O_RDONLY)) < 0)
468 return (-1);
469
470 while (read(fd, &map, sizeof (map)) == sizeof (map)) {
471 if ((map.pr_mflags & MA_ANON) == 0) {
472 /* Not anon. */
473 continue;
474 } else if (map.pr_mflags & MA_SHARED) {
475 /* Can't change pagesize for shared mappings. */
476 continue;
477 } else if (map.pr_vaddr + map.pr_size >
478 pstatus->pr_brkbase &&
479 map.pr_vaddr <
480 pstatus->pr_brkbase + pstatus->pr_brksize) {
481 /* Heap. */
482 continue;
483 } else if (map.pr_vaddr >= pstatus->pr_stkbase &&
484 map.pr_vaddr + map.pr_size <=
485 pstatus->pr_stkbase + pstatus->pr_stksize) {
486 /* Stack. */
487 continue;
488 } else if (map.pr_size < pgsz) {
489 /* Too small. */
490 continue;
491 }
492
493 /*
494 * Find the first address in the segment that is page-aligned.
495 */
496 if (pgsz == 0 || ((map.pr_vaddr % pgsz) == 0))
497 addr = map.pr_vaddr;
498 else
499 addr = map.pr_vaddr + (pgsz - (map.pr_vaddr % pgsz));
500
501 /*
502 * Calculate how many pages will fit in the segment.
503 */
504 if (pgsz == 0)
505 size = map.pr_size;
506 else
507 size = map.pr_size - (addr % map.pr_vaddr) -
508 ((map.pr_vaddr + map.pr_size) % pgsz);
509
510 /*
511 * If no aligned pages fit in the segment, ignore it.
512 */
513 if (size < pgsz) {
514 continue;
515 }
516
517 rc = pr_memcntl(Pr, (caddr_t)addr, size,
518 MC_HAT_ADVISE, mpss, 0, 0);
519
520 /*
521 * If an error occurs on any segment, report the error here and
522 * then go on to try setting the page size for the remaining
523 * segments.
524 */
525 if (rc < 0) {
526 (void) fprintf(stderr, "%s: warning: set page size "
527 "failed (%s) for pid %d for anon segment at "
528 "address: %p\n", command, strerror(errno),
529 (int)psinfo->pr_pid, (void *)map.pr_vaddr);
530 }
531 }
532
533 (void) close(fd);
534 return (0);
535 }
536
537 /*
538 * Discover the optimal page size for the process.
539 * Do this by creating a 4M segment in the target process, set its pagesize
540 * to 0, and read the map file to discover the page size selected by the system.
541 */
542 static size_t
discover_optimal_pagesize(struct ps_prochandle * Pr,uint_t dmodel,pid_t pid)543 discover_optimal_pagesize(struct ps_prochandle *Pr, uint_t dmodel, pid_t pid)
544 {
545 size_t size = 0;
546 size_t len = pgsza[nelem - 1];
547 prxmap_t xmap;
548 caddr_t mha;
549 void *addr;
550 int fd = -1;
551 char path[PATH_MAX];
552
553 (void) snprintf(path, PATH_MAX, "/proc/%d/xmap", (int)pid);
554 if ((fd = open(path, O_RDONLY)) < 0)
555 return (size);
556
557 if ((addr = pr_mmap(Pr, (void *)len, len, PROT_READ | PROT_WRITE,
558 MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1, 0)) == MAP_FAILED) {
559 goto err;
560 }
561
562 mha = setup_mha(MHA_MAPSIZE_VA, 0, dmodel);
563 if (pr_memcntl(Pr, addr, len, MC_HAT_ADVISE, mha, 0, 0) < 0) {
564 goto err;
565 }
566
567 /*
568 * Touch a page in the segment so the hat mapping gets created.
569 */
570 (void) Pwrite(Pr, &len, sizeof (len), (uintptr_t)addr);
571
572 /*
573 * Read through the address map looking for our segment.
574 */
575
576 while (read(fd, &xmap, sizeof (xmap)) == sizeof (xmap)) {
577 if (xmap.pr_vaddr == (uintptr_t)addr)
578 break;
579 }
580 if (xmap.pr_vaddr != (uintptr_t)addr)
581 goto err;
582
583 size = xmap.pr_hatpagesize;
584
585 err:
586 if (addr != MAP_FAILED) {
587 if (pr_munmap(Pr, addr, len) == -1) {
588 (void) fprintf(stderr,
589 "%s: couldn't delete segment at %p\n",
590 command, addr);
591 }
592 }
593 if (fd != -1)
594 (void) close(fd);
595
596 return (size);
597 }
598
599 static struct memcntl_mha gmha;
600 #ifdef _LP64
601 static struct memcntl_mha32 gmha32;
602 #endif
603
604 static caddr_t
605 /* ARGSUSED */
setup_mha(uint_t command,size_t pagesize,int dmodel)606 setup_mha(uint_t command, size_t pagesize, int dmodel)
607 {
608 #ifdef _LP64
609 if (dmodel == PR_MODEL_ILP32) {
610 gmha32.mha_cmd = command;
611 gmha32.mha_flags = 0;
612 gmha32.mha_pagesize = pagesize;
613 return ((caddr_t)&gmha32);
614 }
615 #endif
616 gmha.mha_cmd = command;
617 gmha.mha_flags = 0;
618 gmha.mha_pagesize = pagesize;
619 return ((caddr_t)&gmha);
620 }
621