1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <unistd.h> 32 #include <fcntl.h> 33 #include <ctype.h> 34 #include <string.h> 35 #include <signal.h> 36 #include <errno.h> 37 #include <dirent.h> 38 #include <limits.h> 39 #include <sys/types.h> 40 #include <sys/stat.h> 41 #include <sys/mman.h> 42 #include <sys/wait.h> 43 #include <libproc.h> 44 #include <sys/sysmacros.h> 45 #include <libgen.h> 46 #include <thread.h> 47 48 #ifndef TRUE 49 #define TRUE 1 50 #endif 51 #ifndef FALSE 52 #define FALSE 0 53 #endif 54 55 static struct ps_prochandle *Pr; 56 static char *command; 57 static volatile int interrupt; 58 static int Fflag; 59 static int cflag = 1; 60 61 static void intr(int); 62 static int setpgsz(struct ps_prochandle *, int, size_t *); 63 static int setpgsz_anon(struct ps_prochandle *, size_t, int); 64 static caddr_t setup_mha(uint_t, size_t, int); 65 static size_t discover_optimal_pagesize(struct ps_prochandle *, 66 uint_t, pid_t); 67 static void usage(); 68 69 #define INVPGSZ 3 70 71 /* subopt */ 72 73 static char *suboptstr[] = { 74 "heap", 75 "stack", 76 "anon", 77 NULL 78 }; 79 80 enum suboptenum { 81 E_HEAP, 82 E_STACK, 83 E_ANON 84 }; 85 86 static size_t 87 atosz(char *optarg) 88 { 89 size_t sz = 0; 90 char *endptr; 91 92 if (optarg == NULL || optarg[0] == '\0') 93 return (INVPGSZ); 94 95 sz = strtoll(optarg, &endptr, 0); 96 97 switch (*endptr) { 98 case 'T': 99 case 't': 100 sz *= 1024; 101 /*FALLTHRU*/ 102 case 'G': 103 case 'g': 104 sz *= 1024; 105 /*FALLTHRU*/ 106 case 'M': 107 case 'm': 108 sz *= 1024; 109 /*FALLTHRU*/ 110 case 'K': 111 case 'k': 112 sz *= 1024; 113 /*FALLTHRU*/ 114 case 'B': 115 case 'b': 116 default: 117 break; 118 } 119 return (sz); 120 } 121 122 /* pgsz array sufficient for max page sizes */ 123 124 static size_t pgsza[8 * sizeof (void *)]; 125 static int nelem; 126 127 static void 128 getpgsz() 129 { 130 if ((nelem = getpagesizes(NULL, 0)) == 0) { 131 (void) fprintf(stderr, "%s: cannot determine system page" 132 " sizes\n", command); 133 exit(125); 134 } 135 136 (void) getpagesizes(pgsza, nelem); 137 } 138 139 static size_t 140 cnvpgsz(char *optarg) 141 { 142 size_t pgsz = atosz(optarg); 143 int i; 144 145 if (!ISP2(pgsz) || ((pgsz < pgsza[0]) && pgsz != 0)) { 146 pgsz = INVPGSZ; 147 } else { 148 for (i = nelem - 1; i >= 0; i--) { 149 if (pgsz == pgsza[i]) 150 break; 151 if (pgsz > pgsza[i]) { 152 pgsz = INVPGSZ; 153 break; 154 } 155 } 156 } 157 if (pgsz == INVPGSZ) { 158 if (optarg != NULL) { 159 (void) fprintf(stderr, 160 "%s: invalid page size specified (%s)\n", 161 command, optarg); 162 } else { 163 usage(); 164 } 165 exit(125); 166 } 167 return (pgsz); 168 } 169 170 static void 171 usage() 172 { 173 (void) fprintf(stderr, 174 "usage:\t%s -o option[,option] [-F] cmd | -p pid ...\n" 175 " (set preferred page size of cmd or each process)\n" 176 " -o option[,option]: options are\n" 177 " stack=sz\n" 178 " heap=sz\n" 179 " anon=sz (sz: valid page size or 0 (zero))\n" 180 " -F: force grabbing of the target process(es)\n" 181 " cmd: launch command\n" 182 " -p pid ...: process id list\n", 183 command); 184 exit(125); 185 } 186 187 int 188 main(int argc, char *argv[]) 189 { 190 int rc, err = 0; 191 int opt, subopt; 192 int errflg = 0; 193 char *options, *value; 194 size_t pgsz[] = {INVPGSZ, INVPGSZ, INVPGSZ}; 195 pid_t pid; 196 int status; 197 198 if ((command = strrchr(argv[0], '/')) != NULL) 199 command++; 200 else 201 command = argv[0]; 202 203 getpgsz(); 204 205 /* options */ 206 while ((opt = getopt(argc, argv, "o:Fp")) != EOF) { 207 switch (opt) { 208 case 'o': /* options */ 209 options = optarg; 210 while (*options != '\0') { 211 subopt = getsubopt(&options, suboptstr, &value); 212 switch (subopt) { 213 case E_HEAP: 214 case E_STACK: 215 case E_ANON: 216 pgsz[subopt] = cnvpgsz(value); 217 break; 218 default: 219 errflg = 1; 220 break; 221 } 222 } 223 break; 224 case 'F': /* force grabbing (no O_EXCL) */ 225 Fflag = PGRAB_FORCE; 226 break; 227 case 'p': 228 cflag = 0; 229 break; 230 default: 231 errflg = 1; 232 break; 233 } 234 } 235 236 argc -= optind; 237 argv += optind; 238 239 if ((pgsz[E_HEAP] == INVPGSZ && pgsz[E_STACK] == INVPGSZ && 240 pgsz[E_ANON] == INVPGSZ) || errflg || argc <= 0) { 241 usage(); 242 } 243 244 /* catch signals from terminal */ 245 if (sigset(SIGHUP, SIG_IGN) == SIG_DFL) 246 (void) sigset(SIGHUP, intr); 247 if (sigset(SIGINT, SIG_IGN) == SIG_DFL) 248 (void) sigset(SIGINT, intr); 249 if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL) 250 (void) sigset(SIGQUIT, intr); 251 (void) sigset(SIGTERM, intr); 252 253 if (cflag && !interrupt) { /* command */ 254 int err; 255 char path[PATH_MAX]; 256 257 Pr = Pcreate(argv[0], &argv[0], &err, path, sizeof (path)); 258 if (Pr == NULL) { 259 switch (err) { 260 case C_PERM: 261 (void) fprintf(stderr, 262 "%s: cannot control set-id or " 263 "unreadable object file: %s\n", 264 command, path); 265 break; 266 case C_LP64: 267 (void) fprintf(stderr, 268 "%s: cannot control _LP64 " 269 "program: %s\n", command, path); 270 break; 271 case C_NOEXEC: 272 (void) fprintf(stderr, "%s: cannot execute " 273 "program: %s\n", command, argv[0]); 274 exit(126); 275 break; 276 case C_NOENT: 277 (void) fprintf(stderr, "%s: cannot find " 278 "program: %s\n", command, argv[0]); 279 exit(127); 280 break; 281 case C_STRANGE: 282 break; 283 default: 284 (void) fprintf(stderr, 285 "%s: %s\n", command, Pcreate_error(err)); 286 break; 287 } 288 exit(125); 289 } 290 291 if ((rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz)) != 0) { 292 (void) fprintf(stderr, "%s: set page size " 293 "failed for program: %s\n", command, argv[0]); 294 (void) pr_exit(Pr, 1); 295 exit(125); 296 } 297 298 /* 299 * release the command to run, wait for it and 300 * return it's exit status if we can. 301 */ 302 Prelease(Pr, 0); 303 do { 304 pid = wait(&status); 305 } while (pid == -1 && errno == EINTR); 306 307 if (pid == -1) { 308 (void) fprintf(stderr, "%s: wait() error: %s\n", 309 command, strerror(errno)); 310 exit(125); 311 } 312 313 /* 314 * Pass thru the child's exit value. 315 */ 316 if (WIFEXITED(status)) 317 exit(WEXITSTATUS(status)); 318 exit(status | WCOREFLG); 319 } 320 321 /* process pids */ 322 323 while (--argc >= 0 && !interrupt) { 324 char *arg; 325 psinfo_t psinfo; 326 int gret; 327 328 (void) fflush(stdout); /* line-at-a-time */ 329 330 /* get the specified pid and the psinfo struct */ 331 arg = *argv++; 332 pid = proc_arg_psinfo(arg, PR_ARG_PIDS, &psinfo, &gret); 333 334 if (pid == -1) { 335 (void) fprintf(stderr, "%s: cannot examine pid %s:" 336 " %s\n", command, arg, Pgrab_error(gret)); 337 if (!isdigit(arg[0]) && strncmp(arg, "/proc/", 6)) { 338 (void) fprintf(stderr, 339 "\tdo not use -p option" 340 " to launch a command\n"); 341 } 342 err++; 343 } else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) { 344 rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz); 345 if (rc != 0) { 346 (void) fprintf(stderr, "%s: set page size " 347 "failed for pid: %d\n", command, (int)pid); 348 err++; 349 } 350 Prelease(Pr, 0); 351 Pr = NULL; 352 } else { 353 switch (gret) { 354 case G_SYS: 355 proc_unctrl_psinfo(&psinfo); 356 (void) fprintf(stderr, "%s: cannot set page " 357 "size for system process: %d [ %s ]\n", 358 command, (int)pid, psinfo.pr_psargs); 359 err++; 360 break; 361 case G_SELF: 362 /* do it to own self */ 363 rc = setpgsz(NULL, psinfo.pr_dmodel, pgsz); 364 if (rc != 0) { 365 (void) fprintf(stderr, "%s: set page" 366 "size failed for self: %d\n", 367 command, (int)pid); 368 err++; 369 } 370 break; 371 default: 372 (void) fprintf(stderr, "%s: %s: %d\n", 373 command, Pgrab_error(gret), (int)pid); 374 err++; 375 break; 376 } 377 } 378 } 379 380 if (interrupt || err) 381 exit(125); 382 383 return (0); 384 } 385 386 /* ARGSUSED */ 387 static void 388 intr(int sig) 389 { 390 interrupt = 1; 391 } 392 393 /* ------ begin specific code ------ */ 394 395 /* set process page size */ 396 /*ARGSUSED*/ 397 static int 398 setpgsz(struct ps_prochandle *Pr, int dmodel, size_t pgsz[]) 399 { 400 int rc; 401 int err = 0; 402 caddr_t mpss; 403 int i; 404 static uint_t pgszcmd[] = 405 {MHA_MAPSIZE_BSSBRK, MHA_MAPSIZE_STACK, MHA_MAPSIZE_VA}; 406 407 for (i = E_HEAP; i <= E_ANON; i++) { 408 if (pgsz[i] == INVPGSZ) 409 continue; 410 411 if (i == E_ANON) 412 rc = setpgsz_anon(Pr, pgsz[i], dmodel); 413 else { 414 mpss = setup_mha(pgszcmd[i], pgsz[i], dmodel); 415 rc = pr_memcntl(Pr, NULL, 0, MC_HAT_ADVISE, mpss, 0, 0); 416 } 417 418 if (rc < 0) { 419 (void) fprintf(stderr, "%s: warning: set %s page size " 420 "failed (%s) for pid %d\n", command, suboptstr[i], 421 strerror(errno), (int)Pstatus(Pr)->pr_pid); 422 err++; 423 } 424 } 425 return (err); 426 } 427 428 429 /* 430 * Walk through the process' address space segments. Set all anonymous 431 * segments to the new page size. 432 */ 433 static int 434 setpgsz_anon(struct ps_prochandle *Pr, size_t pgsz, int dmodel) 435 { 436 caddr_t mpss; 437 prmap_t map; 438 uintptr_t addr; 439 size_t size; 440 const psinfo_t *psinfo; 441 const pstatus_t *pstatus; 442 int fd; 443 int rc; 444 char path[PATH_MAX]; 445 446 /* 447 * Setting the page size for anonymous segments on a process before it 448 * has run will have no effect, since it has not configured anonymous 449 * memory and the page size setting is not "sticky" inside the kernel. 450 * Any anonymous memory subsequently mapped will have the default page 451 * size. 452 */ 453 if (cflag) 454 return (0); 455 456 if ((psinfo = Ppsinfo(Pr)) == NULL) 457 return (-1); 458 if ((pstatus = Pstatus(Pr)) == NULL) 459 return (-1); 460 461 if (pgsz == 0) 462 pgsz = discover_optimal_pagesize(Pr, dmodel, psinfo->pr_pid); 463 464 mpss = setup_mha(MHA_MAPSIZE_VA, pgsz, dmodel); 465 466 (void) snprintf(path, PATH_MAX, "/proc/%d/map", (int)psinfo->pr_pid); 467 if ((fd = open(path, O_RDONLY)) < 0) 468 return (-1); 469 470 while (read(fd, &map, sizeof (map)) == sizeof (map)) { 471 if ((map.pr_mflags & MA_ANON) == 0) { 472 /* Not anon. */ 473 continue; 474 } else if (map.pr_mflags & MA_SHARED) { 475 /* Can't change pagesize for shared mappings. */ 476 continue; 477 } else if (map.pr_vaddr + map.pr_size > 478 pstatus->pr_brkbase && 479 map.pr_vaddr < 480 pstatus->pr_brkbase + pstatus->pr_brksize) { 481 /* Heap. */ 482 continue; 483 } else if (map.pr_vaddr >= pstatus->pr_stkbase && 484 map.pr_vaddr + map.pr_size <= 485 pstatus->pr_stkbase + pstatus->pr_stksize) { 486 /* Stack. */ 487 continue; 488 } else if (map.pr_size < pgsz) { 489 /* Too small. */ 490 continue; 491 } 492 493 /* 494 * Find the first address in the segment that is page-aligned. 495 */ 496 if (pgsz == 0 || ((map.pr_vaddr % pgsz) == 0)) 497 addr = map.pr_vaddr; 498 else 499 addr = map.pr_vaddr + (pgsz - (map.pr_vaddr % pgsz)); 500 501 /* 502 * Calculate how many pages will fit in the segment. 503 */ 504 if (pgsz == 0) 505 size = map.pr_size; 506 else 507 size = map.pr_size - (addr % map.pr_vaddr) - 508 ((map.pr_vaddr + map.pr_size) % pgsz); 509 510 /* 511 * If no aligned pages fit in the segment, ignore it. 512 */ 513 if (size < pgsz) { 514 continue; 515 } 516 517 rc = pr_memcntl(Pr, (caddr_t)addr, size, 518 MC_HAT_ADVISE, mpss, 0, 0); 519 520 /* 521 * If an error occurs on any segment, report the error here and 522 * then go on to try setting the page size for the remaining 523 * segments. 524 */ 525 if (rc < 0) { 526 (void) fprintf(stderr, "%s: warning: set page size " 527 "failed (%s) for pid %d for anon segment at " 528 "address: %p\n", command, strerror(errno), 529 (int)psinfo->pr_pid, (void *)map.pr_vaddr); 530 } 531 } 532 533 (void) close(fd); 534 return (0); 535 } 536 537 /* 538 * Discover the optimal page size for the process. 539 * Do this by creating a 4M segment in the target process, set its pagesize 540 * to 0, and read the map file to discover the page size selected by the system. 541 */ 542 static size_t 543 discover_optimal_pagesize(struct ps_prochandle *Pr, uint_t dmodel, pid_t pid) 544 { 545 size_t size = 0; 546 size_t len = pgsza[nelem - 1]; 547 prxmap_t xmap; 548 caddr_t mha; 549 void *addr; 550 int fd = -1; 551 char path[PATH_MAX]; 552 553 (void) snprintf(path, PATH_MAX, "/proc/%d/xmap", (int)pid); 554 if ((fd = open(path, O_RDONLY)) < 0) 555 return (size); 556 557 if ((addr = pr_mmap(Pr, (void *)len, len, PROT_READ | PROT_WRITE, 558 MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1, 0)) == MAP_FAILED) { 559 goto err; 560 } 561 562 mha = setup_mha(MHA_MAPSIZE_VA, 0, dmodel); 563 if (pr_memcntl(Pr, addr, len, MC_HAT_ADVISE, mha, 0, 0) < 0) { 564 goto err; 565 } 566 567 /* 568 * Touch a page in the segment so the hat mapping gets created. 569 */ 570 (void) Pwrite(Pr, &len, sizeof (len), (uintptr_t)addr); 571 572 /* 573 * Read through the address map looking for our segment. 574 */ 575 576 while (read(fd, &xmap, sizeof (xmap)) == sizeof (xmap)) { 577 if (xmap.pr_vaddr == (uintptr_t)addr) 578 break; 579 } 580 if (xmap.pr_vaddr != (uintptr_t)addr) 581 goto err; 582 583 size = xmap.pr_hatpagesize; 584 585 err: 586 if (addr != MAP_FAILED) { 587 if (pr_munmap(Pr, addr, len) == -1) { 588 (void) fprintf(stderr, 589 "%s: couldn't delete segment at %p\n", 590 command, addr); 591 } 592 } 593 if (fd != -1) 594 (void) close(fd); 595 596 return (size); 597 } 598 599 static struct memcntl_mha gmha; 600 #ifdef _LP64 601 static struct memcntl_mha32 gmha32; 602 #endif 603 604 static caddr_t 605 /* ARGSUSED */ 606 setup_mha(uint_t command, size_t pagesize, int dmodel) 607 { 608 #ifdef _LP64 609 if (dmodel == PR_MODEL_ILP32) { 610 gmha32.mha_cmd = command; 611 gmha32.mha_flags = 0; 612 gmha32.mha_pagesize = pagesize; 613 return ((caddr_t)&gmha32); 614 } 615 #endif 616 gmha.mha_cmd = command; 617 gmha.mha_flags = 0; 618 gmha.mha_pagesize = pagesize; 619 return ((caddr_t)&gmha); 620 } 621