1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <unistd.h> 30 #include <fcntl.h> 31 #include <ctype.h> 32 #include <string.h> 33 #include <signal.h> 34 #include <errno.h> 35 #include <dirent.h> 36 #include <limits.h> 37 #include <sys/types.h> 38 #include <sys/stat.h> 39 #include <sys/mman.h> 40 #include <sys/wait.h> 41 #include <libproc.h> 42 #include <sys/sysmacros.h> 43 #include <libgen.h> 44 #include <thread.h> 45 46 #ifndef TRUE 47 #define TRUE 1 48 #endif 49 #ifndef FALSE 50 #define FALSE 0 51 #endif 52 53 static struct ps_prochandle *Pr; 54 static char *command; 55 static volatile int interrupt; 56 static int Fflag; 57 static int cflag = 1; 58 59 static void intr(int); 60 static int setpgsz(struct ps_prochandle *, int, size_t *); 61 static int setpgsz_anon(struct ps_prochandle *, size_t, int); 62 static caddr_t setup_mha(uint_t, size_t, int); 63 static size_t discover_optimal_pagesize(struct ps_prochandle *, 64 uint_t, pid_t); 65 static void usage(); 66 67 #define INVPGSZ 3 68 69 /* subopt */ 70 71 static char *suboptstr[] = { 72 "heap", 73 "stack", 74 "anon", 75 NULL 76 }; 77 78 enum suboptenum { 79 E_HEAP, 80 E_STACK, 81 E_ANON 82 }; 83 84 static size_t 85 atosz(char *optarg) 86 { 87 size_t sz = 0; 88 char *endptr; 89 90 if (optarg == NULL || optarg[0] == '\0') 91 return (INVPGSZ); 92 93 sz = strtoll(optarg, &endptr, 0); 94 95 switch (*endptr) { 96 case 'T': 97 case 't': 98 sz *= 1024; 99 /*FALLTHRU*/ 100 case 'G': 101 case 'g': 102 sz *= 1024; 103 /*FALLTHRU*/ 104 case 'M': 105 case 'm': 106 sz *= 1024; 107 /*FALLTHRU*/ 108 case 'K': 109 case 'k': 110 sz *= 1024; 111 /*FALLTHRU*/ 112 case 'B': 113 case 'b': 114 default: 115 break; 116 } 117 return (sz); 118 } 119 120 /* pgsz array sufficient for max page sizes */ 121 122 static size_t pgsza[8 * sizeof (void *)]; 123 static int nelem; 124 125 static void 126 getpgsz() 127 { 128 if ((nelem = getpagesizes(NULL, 0)) == 0) { 129 (void) fprintf(stderr, "%s: cannot determine system page" 130 " sizes\n", command); 131 exit(125); 132 } 133 134 (void) getpagesizes(pgsza, nelem); 135 } 136 137 static size_t 138 cnvpgsz(char *optarg) 139 { 140 size_t pgsz = atosz(optarg); 141 int i; 142 143 if (!ISP2(pgsz) || ((pgsz < pgsza[0]) && pgsz != 0)) { 144 pgsz = INVPGSZ; 145 } else { 146 for (i = nelem - 1; i >= 0; i--) { 147 if (pgsz == pgsza[i]) 148 break; 149 if (pgsz > pgsza[i]) { 150 pgsz = INVPGSZ; 151 break; 152 } 153 } 154 } 155 if (pgsz == INVPGSZ) { 156 if (optarg != NULL) { 157 (void) fprintf(stderr, 158 "%s: invalid page size specified (%s)\n", 159 command, optarg); 160 } else { 161 usage(); 162 } 163 exit(125); 164 } 165 return (pgsz); 166 } 167 168 static void 169 usage() 170 { 171 (void) fprintf(stderr, 172 "usage:\t%s -o option[,option] [-F] cmd | -p pid ...\n" 173 " (set preferred page size of cmd or each process)\n" 174 " -o option[,option]: options are\n" 175 " stack=sz\n" 176 " heap=sz\n" 177 " anon=sz (sz: valid page size or 0 (zero))\n" 178 " -F: force grabbing of the target process(es)\n" 179 " cmd: launch command\n" 180 " -p pid ...: process id list\n", 181 command); 182 exit(125); 183 } 184 185 int 186 main(int argc, char *argv[]) 187 { 188 int rc, err = 0; 189 int opt, subopt; 190 int errflg = 0; 191 char *options, *value; 192 size_t pgsz[] = {INVPGSZ, INVPGSZ, INVPGSZ}; 193 pid_t pid; 194 int status; 195 196 if ((command = strrchr(argv[0], '/')) != NULL) 197 command++; 198 else 199 command = argv[0]; 200 201 getpgsz(); 202 203 /* options */ 204 while ((opt = getopt(argc, argv, "o:Fp")) != EOF) { 205 switch (opt) { 206 case 'o': /* options */ 207 options = optarg; 208 while (*options != '\0') { 209 subopt = getsubopt(&options, suboptstr, &value); 210 switch (subopt) { 211 case E_HEAP: 212 case E_STACK: 213 case E_ANON: 214 pgsz[subopt] = cnvpgsz(value); 215 break; 216 default: 217 errflg = 1; 218 break; 219 } 220 } 221 break; 222 case 'F': /* force grabbing (no O_EXCL) */ 223 Fflag = PGRAB_FORCE; 224 break; 225 case 'p': 226 cflag = 0; 227 break; 228 default: 229 errflg = 1; 230 break; 231 } 232 } 233 234 argc -= optind; 235 argv += optind; 236 237 if ((pgsz[E_HEAP] == INVPGSZ && pgsz[E_STACK] == INVPGSZ && 238 pgsz[E_ANON] == INVPGSZ) || errflg || argc <= 0) { 239 usage(); 240 } 241 242 /* catch signals from terminal */ 243 if (sigset(SIGHUP, SIG_IGN) == SIG_DFL) 244 (void) sigset(SIGHUP, intr); 245 if (sigset(SIGINT, SIG_IGN) == SIG_DFL) 246 (void) sigset(SIGINT, intr); 247 if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL) 248 (void) sigset(SIGQUIT, intr); 249 (void) sigset(SIGTERM, intr); 250 251 if (cflag && !interrupt) { /* command */ 252 int err; 253 char path[PATH_MAX]; 254 255 Pr = Pcreate(argv[0], &argv[0], &err, path, sizeof (path)); 256 if (Pr == NULL) { 257 switch (err) { 258 case C_PERM: 259 (void) fprintf(stderr, 260 "%s: cannot control set-id or " 261 "unreadable object file: %s\n", 262 command, path); 263 break; 264 case C_LP64: 265 (void) fprintf(stderr, 266 "%s: cannot control _LP64 " 267 "program: %s\n", command, path); 268 break; 269 case C_NOEXEC: 270 (void) fprintf(stderr, "%s: cannot execute " 271 "program: %s\n", command, argv[0]); 272 exit(126); 273 break; 274 case C_NOENT: 275 (void) fprintf(stderr, "%s: cannot find " 276 "program: %s\n", command, argv[0]); 277 exit(127); 278 break; 279 case C_STRANGE: 280 break; 281 default: 282 (void) fprintf(stderr, 283 "%s: %s\n", command, Pcreate_error(err)); 284 break; 285 } 286 exit(125); 287 } 288 289 if ((rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz)) != 0) { 290 (void) fprintf(stderr, "%s: set page size " 291 "failed for program: %s\n", command, argv[0]); 292 (void) pr_exit(Pr, 1); 293 exit(125); 294 } 295 296 /* 297 * release the command to run, wait for it and 298 * return it's exit status if we can. 299 */ 300 Prelease(Pr, 0); 301 do { 302 pid = wait(&status); 303 } while (pid == -1 && errno == EINTR); 304 305 if (pid == -1) { 306 (void) fprintf(stderr, "%s: wait() error: %s\n", 307 command, strerror(errno)); 308 exit(125); 309 } 310 311 /* 312 * Pass thru the child's exit value. 313 */ 314 if (WIFEXITED(status)) 315 exit(WEXITSTATUS(status)); 316 exit(status | WCOREFLG); 317 } 318 319 /* process pids */ 320 321 while (--argc >= 0 && !interrupt) { 322 char *arg; 323 psinfo_t psinfo; 324 int gret; 325 326 (void) fflush(stdout); /* line-at-a-time */ 327 328 /* get the specified pid and the psinfo struct */ 329 arg = *argv++; 330 pid = proc_arg_psinfo(arg, PR_ARG_PIDS, &psinfo, &gret); 331 332 if (pid == -1) { 333 (void) fprintf(stderr, "%s: cannot examine pid %s:" 334 " %s\n", command, arg, Pgrab_error(gret)); 335 if (!isdigit(arg[0]) && strncmp(arg, "/proc/", 6)) { 336 (void) fprintf(stderr, 337 "\tdo not use -p option" 338 " to launch a command\n"); 339 } 340 err++; 341 } else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) { 342 rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz); 343 if (rc != 0) { 344 (void) fprintf(stderr, "%s: set page size " 345 "failed for pid: %d\n", command, (int)pid); 346 err++; 347 } 348 Prelease(Pr, 0); 349 Pr = NULL; 350 } else { 351 switch (gret) { 352 case G_SYS: 353 proc_unctrl_psinfo(&psinfo); 354 (void) fprintf(stderr, "%s: cannot set page " 355 "size for system process: %d [ %s ]\n", 356 command, (int)pid, psinfo.pr_psargs); 357 err++; 358 break; 359 case G_SELF: 360 /* do it to own self */ 361 rc = setpgsz(NULL, psinfo.pr_dmodel, pgsz); 362 if (rc != 0) { 363 (void) fprintf(stderr, "%s: set page" 364 "size failed for self: %d\n", 365 command, (int)pid); 366 err++; 367 } 368 break; 369 default: 370 (void) fprintf(stderr, "%s: %s: %d\n", 371 command, Pgrab_error(gret), (int)pid); 372 err++; 373 break; 374 } 375 } 376 } 377 378 if (interrupt || err) 379 exit(125); 380 381 return (0); 382 } 383 384 /* ARGSUSED */ 385 static void 386 intr(int sig) 387 { 388 interrupt = 1; 389 } 390 391 /* ------ begin specific code ------ */ 392 393 /* set process page size */ 394 /*ARGSUSED*/ 395 static int 396 setpgsz(struct ps_prochandle *Pr, int dmodel, size_t pgsz[]) 397 { 398 int rc; 399 int err = 0; 400 caddr_t mpss; 401 int i; 402 static uint_t pgszcmd[] = 403 {MHA_MAPSIZE_BSSBRK, MHA_MAPSIZE_STACK, MHA_MAPSIZE_VA}; 404 405 for (i = E_HEAP; i <= E_ANON; i++) { 406 if (pgsz[i] == INVPGSZ) 407 continue; 408 409 if (i == E_ANON) 410 rc = setpgsz_anon(Pr, pgsz[i], dmodel); 411 else { 412 mpss = setup_mha(pgszcmd[i], pgsz[i], dmodel); 413 rc = pr_memcntl(Pr, NULL, 0, MC_HAT_ADVISE, mpss, 0, 0); 414 } 415 416 if (rc < 0) { 417 (void) fprintf(stderr, "%s: warning: set %s page size " 418 "failed (%s) for pid %d\n", command, suboptstr[i], 419 strerror(errno), (int)Pstatus(Pr)->pr_pid); 420 err++; 421 } 422 } 423 return (err); 424 } 425 426 427 /* 428 * Walk through the process' address space segments. Set all anonymous 429 * segments to the new page size. 430 */ 431 static int 432 setpgsz_anon(struct ps_prochandle *Pr, size_t pgsz, int dmodel) 433 { 434 caddr_t mpss; 435 prmap_t map; 436 uintptr_t addr; 437 size_t size; 438 const psinfo_t *psinfo; 439 const pstatus_t *pstatus; 440 int fd; 441 int rc; 442 char path[PATH_MAX]; 443 444 /* 445 * Setting the page size for anonymous segments on a process before it 446 * has run will have no effect, since it has not configured anonymous 447 * memory and the page size setting is not "sticky" inside the kernel. 448 * Any anonymous memory subsequently mapped will have the default page 449 * size. 450 */ 451 if (cflag) 452 return (0); 453 454 if ((psinfo = Ppsinfo(Pr)) == NULL) 455 return (-1); 456 if ((pstatus = Pstatus(Pr)) == NULL) 457 return (-1); 458 459 if (pgsz == 0) 460 pgsz = discover_optimal_pagesize(Pr, dmodel, psinfo->pr_pid); 461 462 mpss = setup_mha(MHA_MAPSIZE_VA, pgsz, dmodel); 463 464 (void) snprintf(path, PATH_MAX, "/proc/%d/map", (int)psinfo->pr_pid); 465 if ((fd = open(path, O_RDONLY)) < 0) 466 return (-1); 467 468 while (read(fd, &map, sizeof (map)) == sizeof (map)) { 469 if ((map.pr_mflags & MA_ANON) == 0) { 470 /* Not anon. */ 471 continue; 472 } else if (map.pr_mflags & MA_SHARED) { 473 /* Can't change pagesize for shared mappings. */ 474 continue; 475 } else if (map.pr_vaddr + map.pr_size > 476 pstatus->pr_brkbase && 477 map.pr_vaddr < 478 pstatus->pr_brkbase + pstatus->pr_brksize) { 479 /* Heap. */ 480 continue; 481 } else if (map.pr_vaddr >= pstatus->pr_stkbase && 482 map.pr_vaddr + map.pr_size <= 483 pstatus->pr_stkbase + pstatus->pr_stksize) { 484 /* Stack. */ 485 continue; 486 } else if (map.pr_size < pgsz) { 487 /* Too small. */ 488 continue; 489 } 490 491 /* 492 * Find the first address in the segment that is page-aligned. 493 */ 494 if (pgsz == 0 || ((map.pr_vaddr % pgsz) == 0)) 495 addr = map.pr_vaddr; 496 else 497 addr = map.pr_vaddr + (pgsz - (map.pr_vaddr % pgsz)); 498 499 /* 500 * Calculate how many pages will fit in the segment. 501 */ 502 if (pgsz == 0) 503 size = map.pr_size; 504 else 505 size = map.pr_size - (addr % map.pr_vaddr) - 506 ((map.pr_vaddr + map.pr_size) % pgsz); 507 508 /* 509 * If no aligned pages fit in the segment, ignore it. 510 */ 511 if (size < pgsz) { 512 continue; 513 } 514 515 rc = pr_memcntl(Pr, (caddr_t)addr, size, 516 MC_HAT_ADVISE, mpss, 0, 0); 517 518 /* 519 * If an error occurs on any segment, report the error here and 520 * then go on to try setting the page size for the remaining 521 * segments. 522 */ 523 if (rc < 0) { 524 (void) fprintf(stderr, "%s: warning: set page size " 525 "failed (%s) for pid %d for anon segment at " 526 "address: %p\n", command, strerror(errno), 527 (int)psinfo->pr_pid, (void *)map.pr_vaddr); 528 } 529 } 530 531 (void) close(fd); 532 return (0); 533 } 534 535 /* 536 * Discover the optimal page size for the process. 537 * Do this by creating a 4M segment in the target process, set its pagesize 538 * to 0, and read the map file to discover the page size selected by the system. 539 */ 540 static size_t 541 discover_optimal_pagesize(struct ps_prochandle *Pr, uint_t dmodel, pid_t pid) 542 { 543 size_t size = 0; 544 size_t len = pgsza[nelem - 1]; 545 prxmap_t xmap; 546 caddr_t mha; 547 void *addr; 548 int fd = -1; 549 char path[PATH_MAX]; 550 551 (void) snprintf(path, PATH_MAX, "/proc/%d/xmap", (int)pid); 552 if ((fd = open(path, O_RDONLY)) < 0) 553 return (size); 554 555 if ((addr = pr_mmap(Pr, (void *)len, len, PROT_READ | PROT_WRITE, 556 MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1, 0)) == MAP_FAILED) { 557 goto err; 558 } 559 560 mha = setup_mha(MHA_MAPSIZE_VA, 0, dmodel); 561 if (pr_memcntl(Pr, addr, len, MC_HAT_ADVISE, mha, 0, 0) < 0) { 562 goto err; 563 } 564 565 /* 566 * Touch a page in the segment so the hat mapping gets created. 567 */ 568 (void) Pwrite(Pr, &len, sizeof (len), (uintptr_t)addr); 569 570 /* 571 * Read through the address map looking for our segment. 572 */ 573 574 while (read(fd, &xmap, sizeof (xmap)) == sizeof (xmap)) { 575 if (xmap.pr_vaddr == (uintptr_t)addr) 576 break; 577 } 578 if (xmap.pr_vaddr != (uintptr_t)addr) 579 goto err; 580 581 size = xmap.pr_hatpagesize; 582 583 err: 584 if (addr != MAP_FAILED) { 585 if (pr_munmap(Pr, addr, len) == -1) { 586 (void) fprintf(stderr, 587 "%s: couldn't delete segment at %p\n", 588 command, addr); 589 } 590 } 591 if (fd != -1) 592 (void) close(fd); 593 594 return (size); 595 } 596 597 static struct memcntl_mha gmha; 598 #ifdef _LP64 599 static struct memcntl_mha32 gmha32; 600 #endif 601 602 static caddr_t 603 /* ARGSUSED */ 604 setup_mha(uint_t command, size_t pagesize, int dmodel) 605 { 606 #ifdef _LP64 607 if (dmodel == PR_MODEL_ILP32) { 608 gmha32.mha_cmd = command; 609 gmha32.mha_flags = 0; 610 gmha32.mha_pagesize = pagesize; 611 return ((caddr_t)&gmha32); 612 } 613 #endif 614 gmha.mha_cmd = command; 615 gmha.mha_flags = 0; 616 gmha.mha_pagesize = pagesize; 617 return ((caddr_t)&gmha); 618 } 619