1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <ctype.h>
32 #include <string.h>
33 #include <signal.h>
34 #include <errno.h>
35 #include <dirent.h>
36 #include <limits.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <sys/mman.h>
40 #include <sys/wait.h>
41 #include <libproc.h>
42 #include <sys/sysmacros.h>
43 #include <libgen.h>
44 #include <thread.h>
45
46 #ifndef TRUE
47 #define TRUE 1
48 #endif
49 #ifndef FALSE
50 #define FALSE 0
51 #endif
52
53 static struct ps_prochandle *Pr;
54 static char *command;
55 static volatile int interrupt;
56 static int Fflag;
57 static int cflag = 1;
58
59 static void intr(int);
60 static int setpgsz(struct ps_prochandle *, int, size_t *);
61 static int setpgsz_anon(struct ps_prochandle *, size_t, int);
62 static caddr_t setup_mha(uint_t, size_t, int);
63 static size_t discover_optimal_pagesize(struct ps_prochandle *,
64 uint_t, pid_t);
65 static void usage();
66
67 #define INVPGSZ 3
68
69 /* subopt */
70
71 static char *suboptstr[] = {
72 "heap",
73 "stack",
74 "anon",
75 NULL
76 };
77
78 enum suboptenum {
79 E_HEAP,
80 E_STACK,
81 E_ANON
82 };
83
84 static size_t
atosz(char * optarg)85 atosz(char *optarg)
86 {
87 size_t sz = 0;
88 char *endptr;
89
90 if (optarg == NULL || optarg[0] == '\0')
91 return (INVPGSZ);
92
93 sz = strtoll(optarg, &endptr, 0);
94
95 switch (*endptr) {
96 case 'T':
97 case 't':
98 sz *= 1024;
99 /*FALLTHRU*/
100 case 'G':
101 case 'g':
102 sz *= 1024;
103 /*FALLTHRU*/
104 case 'M':
105 case 'm':
106 sz *= 1024;
107 /*FALLTHRU*/
108 case 'K':
109 case 'k':
110 sz *= 1024;
111 /*FALLTHRU*/
112 case 'B':
113 case 'b':
114 default:
115 break;
116 }
117 return (sz);
118 }
119
120 /* pgsz array sufficient for max page sizes */
121
122 static size_t pgsza[8 * sizeof (void *)];
123 static int nelem;
124
125 static void
getpgsz()126 getpgsz()
127 {
128 if ((nelem = getpagesizes(NULL, 0)) == 0) {
129 (void) fprintf(stderr, "%s: cannot determine system page"
130 " sizes\n", command);
131 exit(125);
132 }
133
134 (void) getpagesizes(pgsza, nelem);
135 }
136
137 static size_t
cnvpgsz(char * optarg)138 cnvpgsz(char *optarg)
139 {
140 size_t pgsz = atosz(optarg);
141 int i;
142
143 if (!ISP2(pgsz) || ((pgsz < pgsza[0]) && pgsz != 0)) {
144 pgsz = INVPGSZ;
145 } else {
146 for (i = nelem - 1; i >= 0; i--) {
147 if (pgsz == pgsza[i])
148 break;
149 if (pgsz > pgsza[i]) {
150 pgsz = INVPGSZ;
151 break;
152 }
153 }
154 }
155 if (pgsz == INVPGSZ) {
156 if (optarg != NULL) {
157 (void) fprintf(stderr,
158 "%s: invalid page size specified (%s)\n",
159 command, optarg);
160 } else {
161 usage();
162 }
163 exit(125);
164 }
165 return (pgsz);
166 }
167
168 static void
usage()169 usage()
170 {
171 (void) fprintf(stderr,
172 "usage:\t%s -o option[,option] [-F] cmd | -p pid ...\n"
173 " (set preferred page size of cmd or each process)\n"
174 " -o option[,option]: options are\n"
175 " stack=sz\n"
176 " heap=sz\n"
177 " anon=sz (sz: valid page size or 0 (zero))\n"
178 " -F: force grabbing of the target process(es)\n"
179 " cmd: launch command\n"
180 " -p pid ...: process id list\n",
181 command);
182 exit(125);
183 }
184
185 int
main(int argc,char * argv[])186 main(int argc, char *argv[])
187 {
188 int rc, err = 0;
189 int opt, subopt;
190 int errflg = 0;
191 char *options, *value;
192 size_t pgsz[] = {INVPGSZ, INVPGSZ, INVPGSZ};
193 pid_t pid;
194 int status;
195
196 if ((command = strrchr(argv[0], '/')) != NULL)
197 command++;
198 else
199 command = argv[0];
200
201 getpgsz();
202
203 /* options */
204 while ((opt = getopt(argc, argv, "o:Fp")) != EOF) {
205 switch (opt) {
206 case 'o': /* options */
207 options = optarg;
208 while (*options != '\0') {
209 subopt = getsubopt(&options, suboptstr, &value);
210 switch (subopt) {
211 case E_HEAP:
212 case E_STACK:
213 case E_ANON:
214 pgsz[subopt] = cnvpgsz(value);
215 break;
216 default:
217 errflg = 1;
218 break;
219 }
220 }
221 break;
222 case 'F': /* force grabbing (no O_EXCL) */
223 Fflag = PGRAB_FORCE;
224 break;
225 case 'p':
226 cflag = 0;
227 break;
228 default:
229 errflg = 1;
230 break;
231 }
232 }
233
234 argc -= optind;
235 argv += optind;
236
237 if ((pgsz[E_HEAP] == INVPGSZ && pgsz[E_STACK] == INVPGSZ &&
238 pgsz[E_ANON] == INVPGSZ) || errflg || argc <= 0) {
239 usage();
240 }
241
242 /* catch signals from terminal */
243 if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
244 (void) sigset(SIGHUP, intr);
245 if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
246 (void) sigset(SIGINT, intr);
247 if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
248 (void) sigset(SIGQUIT, intr);
249 (void) sigset(SIGTERM, intr);
250
251 if (cflag && !interrupt) { /* command */
252 int err;
253 char path[PATH_MAX];
254
255 Pr = Pcreate(argv[0], &argv[0], &err, path, sizeof (path));
256 if (Pr == NULL) {
257 switch (err) {
258 case C_PERM:
259 (void) fprintf(stderr,
260 "%s: cannot control set-id or "
261 "unreadable object file: %s\n",
262 command, path);
263 break;
264 case C_LP64:
265 (void) fprintf(stderr,
266 "%s: cannot control _LP64 "
267 "program: %s\n", command, path);
268 break;
269 case C_NOEXEC:
270 (void) fprintf(stderr, "%s: cannot execute "
271 "program: %s\n", command, argv[0]);
272 exit(126);
273 break;
274 case C_NOENT:
275 (void) fprintf(stderr, "%s: cannot find "
276 "program: %s\n", command, argv[0]);
277 exit(127);
278 break;
279 case C_STRANGE:
280 break;
281 default:
282 (void) fprintf(stderr,
283 "%s: %s\n", command, Pcreate_error(err));
284 break;
285 }
286 exit(125);
287 }
288
289 if ((rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz)) != 0) {
290 (void) fprintf(stderr, "%s: set page size "
291 "failed for program: %s\n", command, argv[0]);
292 (void) pr_exit(Pr, 1);
293 exit(125);
294 }
295
296 /*
297 * release the command to run, wait for it and
298 * return it's exit status if we can.
299 */
300 Prelease(Pr, 0);
301 do {
302 pid = wait(&status);
303 } while (pid == -1 && errno == EINTR);
304
305 if (pid == -1) {
306 (void) fprintf(stderr, "%s: wait() error: %s\n",
307 command, strerror(errno));
308 exit(125);
309 }
310
311 /*
312 * Pass thru the child's exit value.
313 */
314 if (WIFEXITED(status))
315 exit(WEXITSTATUS(status));
316 exit(status | WCOREFLG);
317 }
318
319 /* process pids */
320
321 while (--argc >= 0 && !interrupt) {
322 char *arg;
323 psinfo_t psinfo;
324 int gret;
325
326 (void) fflush(stdout); /* line-at-a-time */
327
328 /* get the specified pid and the psinfo struct */
329 arg = *argv++;
330 pid = proc_arg_psinfo(arg, PR_ARG_PIDS, &psinfo, &gret);
331
332 if (pid == -1) {
333 (void) fprintf(stderr, "%s: cannot examine pid %s:"
334 " %s\n", command, arg, Pgrab_error(gret));
335 if (!isdigit(arg[0]) && strncmp(arg, "/proc/", 6)) {
336 (void) fprintf(stderr,
337 "\tdo not use -p option"
338 " to launch a command\n");
339 }
340 err++;
341 } else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) {
342 rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz);
343 if (rc != 0) {
344 (void) fprintf(stderr, "%s: set page size "
345 "failed for pid: %d\n", command, (int)pid);
346 err++;
347 }
348 Prelease(Pr, 0);
349 Pr = NULL;
350 } else {
351 switch (gret) {
352 case G_SYS:
353 proc_unctrl_psinfo(&psinfo);
354 (void) fprintf(stderr, "%s: cannot set page "
355 "size for system process: %d [ %s ]\n",
356 command, (int)pid, psinfo.pr_psargs);
357 err++;
358 break;
359 case G_SELF:
360 /* do it to own self */
361 rc = setpgsz(NULL, psinfo.pr_dmodel, pgsz);
362 if (rc != 0) {
363 (void) fprintf(stderr, "%s: set page"
364 "size failed for self: %d\n",
365 command, (int)pid);
366 err++;
367 }
368 break;
369 default:
370 (void) fprintf(stderr, "%s: %s: %d\n",
371 command, Pgrab_error(gret), (int)pid);
372 err++;
373 break;
374 }
375 }
376 }
377
378 if (interrupt || err)
379 exit(125);
380
381 return (0);
382 }
383
384 /* ARGSUSED */
385 static void
intr(int sig)386 intr(int sig)
387 {
388 interrupt = 1;
389 }
390
391 /* ------ begin specific code ------ */
392
393 /* set process page size */
394 /*ARGSUSED*/
395 static int
setpgsz(struct ps_prochandle * Pr,int dmodel,size_t pgsz[])396 setpgsz(struct ps_prochandle *Pr, int dmodel, size_t pgsz[])
397 {
398 int rc;
399 int err = 0;
400 caddr_t mpss;
401 int i;
402 static uint_t pgszcmd[] =
403 {MHA_MAPSIZE_BSSBRK, MHA_MAPSIZE_STACK, MHA_MAPSIZE_VA};
404
405 for (i = E_HEAP; i <= E_ANON; i++) {
406 if (pgsz[i] == INVPGSZ)
407 continue;
408
409 if (i == E_ANON)
410 rc = setpgsz_anon(Pr, pgsz[i], dmodel);
411 else {
412 mpss = setup_mha(pgszcmd[i], pgsz[i], dmodel);
413 rc = pr_memcntl(Pr, NULL, 0, MC_HAT_ADVISE, mpss, 0, 0);
414 }
415
416 if (rc < 0) {
417 (void) fprintf(stderr, "%s: warning: set %s page size "
418 "failed (%s) for pid %d\n", command, suboptstr[i],
419 strerror(errno), (int)Pstatus(Pr)->pr_pid);
420 err++;
421 }
422 }
423 return (err);
424 }
425
426
427 /*
428 * Walk through the process' address space segments. Set all anonymous
429 * segments to the new page size.
430 */
431 static int
setpgsz_anon(struct ps_prochandle * Pr,size_t pgsz,int dmodel)432 setpgsz_anon(struct ps_prochandle *Pr, size_t pgsz, int dmodel)
433 {
434 caddr_t mpss;
435 prmap_t map;
436 uintptr_t addr;
437 size_t size;
438 const psinfo_t *psinfo;
439 const pstatus_t *pstatus;
440 int fd;
441 int rc;
442 char path[PATH_MAX];
443
444 /*
445 * Setting the page size for anonymous segments on a process before it
446 * has run will have no effect, since it has not configured anonymous
447 * memory and the page size setting is not "sticky" inside the kernel.
448 * Any anonymous memory subsequently mapped will have the default page
449 * size.
450 */
451 if (cflag)
452 return (0);
453
454 if ((psinfo = Ppsinfo(Pr)) == NULL)
455 return (-1);
456 if ((pstatus = Pstatus(Pr)) == NULL)
457 return (-1);
458
459 if (pgsz == 0)
460 pgsz = discover_optimal_pagesize(Pr, dmodel, psinfo->pr_pid);
461
462 mpss = setup_mha(MHA_MAPSIZE_VA, pgsz, dmodel);
463
464 (void) snprintf(path, PATH_MAX, "/proc/%d/map", (int)psinfo->pr_pid);
465 if ((fd = open(path, O_RDONLY)) < 0)
466 return (-1);
467
468 while (read(fd, &map, sizeof (map)) == sizeof (map)) {
469 if ((map.pr_mflags & MA_ANON) == 0) {
470 /* Not anon. */
471 continue;
472 } else if (map.pr_mflags & MA_SHARED) {
473 /* Can't change pagesize for shared mappings. */
474 continue;
475 } else if (map.pr_vaddr + map.pr_size >
476 pstatus->pr_brkbase &&
477 map.pr_vaddr <
478 pstatus->pr_brkbase + pstatus->pr_brksize) {
479 /* Heap. */
480 continue;
481 } else if (map.pr_vaddr >= pstatus->pr_stkbase &&
482 map.pr_vaddr + map.pr_size <=
483 pstatus->pr_stkbase + pstatus->pr_stksize) {
484 /* Stack. */
485 continue;
486 } else if (map.pr_size < pgsz) {
487 /* Too small. */
488 continue;
489 }
490
491 /*
492 * Find the first address in the segment that is page-aligned.
493 */
494 if (pgsz == 0 || ((map.pr_vaddr % pgsz) == 0))
495 addr = map.pr_vaddr;
496 else
497 addr = map.pr_vaddr + (pgsz - (map.pr_vaddr % pgsz));
498
499 /*
500 * Calculate how many pages will fit in the segment.
501 */
502 if (pgsz == 0)
503 size = map.pr_size;
504 else
505 size = map.pr_size - (addr % map.pr_vaddr) -
506 ((map.pr_vaddr + map.pr_size) % pgsz);
507
508 /*
509 * If no aligned pages fit in the segment, ignore it.
510 */
511 if (size < pgsz) {
512 continue;
513 }
514
515 rc = pr_memcntl(Pr, (caddr_t)addr, size,
516 MC_HAT_ADVISE, mpss, 0, 0);
517
518 /*
519 * If an error occurs on any segment, report the error here and
520 * then go on to try setting the page size for the remaining
521 * segments.
522 */
523 if (rc < 0) {
524 (void) fprintf(stderr, "%s: warning: set page size "
525 "failed (%s) for pid %d for anon segment at "
526 "address: %p\n", command, strerror(errno),
527 (int)psinfo->pr_pid, (void *)map.pr_vaddr);
528 }
529 }
530
531 (void) close(fd);
532 return (0);
533 }
534
535 /*
536 * Discover the optimal page size for the process.
537 * Do this by creating a 4M segment in the target process, set its pagesize
538 * to 0, and read the map file to discover the page size selected by the system.
539 */
540 static size_t
discover_optimal_pagesize(struct ps_prochandle * Pr,uint_t dmodel,pid_t pid)541 discover_optimal_pagesize(struct ps_prochandle *Pr, uint_t dmodel, pid_t pid)
542 {
543 size_t size = 0;
544 size_t len = pgsza[nelem - 1];
545 prxmap_t xmap;
546 caddr_t mha;
547 void *addr;
548 int fd = -1;
549 char path[PATH_MAX];
550
551 (void) snprintf(path, PATH_MAX, "/proc/%d/xmap", (int)pid);
552 if ((fd = open(path, O_RDONLY)) < 0)
553 return (size);
554
555 if ((addr = pr_mmap(Pr, (void *)len, len, PROT_READ | PROT_WRITE,
556 MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1, 0)) == MAP_FAILED) {
557 goto err;
558 }
559
560 mha = setup_mha(MHA_MAPSIZE_VA, 0, dmodel);
561 if (pr_memcntl(Pr, addr, len, MC_HAT_ADVISE, mha, 0, 0) < 0) {
562 goto err;
563 }
564
565 /*
566 * Touch a page in the segment so the hat mapping gets created.
567 */
568 (void) Pwrite(Pr, &len, sizeof (len), (uintptr_t)addr);
569
570 /*
571 * Read through the address map looking for our segment.
572 */
573
574 while (read(fd, &xmap, sizeof (xmap)) == sizeof (xmap)) {
575 if (xmap.pr_vaddr == (uintptr_t)addr)
576 break;
577 }
578 if (xmap.pr_vaddr != (uintptr_t)addr)
579 goto err;
580
581 size = xmap.pr_hatpagesize;
582
583 err:
584 if (addr != MAP_FAILED) {
585 if (pr_munmap(Pr, addr, len) == -1) {
586 (void) fprintf(stderr,
587 "%s: couldn't delete segment at %p\n",
588 command, addr);
589 }
590 }
591 if (fd != -1)
592 (void) close(fd);
593
594 return (size);
595 }
596
597 static struct memcntl_mha gmha;
598 #ifdef _LP64
599 static struct memcntl_mha32 gmha32;
600 #endif
601
602 static caddr_t
603 /* ARGSUSED */
setup_mha(uint_t command,size_t pagesize,int dmodel)604 setup_mha(uint_t command, size_t pagesize, int dmodel)
605 {
606 #ifdef _LP64
607 if (dmodel == PR_MODEL_ILP32) {
608 gmha32.mha_cmd = command;
609 gmha32.mha_flags = 0;
610 gmha32.mha_pagesize = pagesize;
611 return ((caddr_t)&gmha32);
612 }
613 #endif
614 gmha.mha_cmd = command;
615 gmha.mha_flags = 0;
616 gmha.mha_pagesize = pagesize;
617 return ((caddr_t)&gmha);
618 }
619