/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <fcntl.h> #include <ctype.h> #include <string.h> #include <signal.h> #include <errno.h> #include <dirent.h> #include <limits.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> #include <sys/wait.h> #include <libproc.h> #include <sys/sysmacros.h> #include <libgen.h> #include <thread.h> #ifndef TRUE #define TRUE 1 #endif #ifndef FALSE #define FALSE 0 #endif static struct ps_prochandle *Pr; static char *command; static volatile int interrupt; static int Fflag; static int cflag = 1; static void intr(int); static int setpgsz(struct ps_prochandle *, int, size_t *); static int setpgsz_anon(struct ps_prochandle *, size_t, int); static caddr_t setup_mha(uint_t, size_t, int); static size_t discover_optimal_pagesize(struct ps_prochandle *, uint_t, pid_t); static void usage(); #define INVPGSZ 3 /* subopt */ static char *suboptstr[] = { "heap", "stack", "anon", NULL }; enum suboptenum { E_HEAP, E_STACK, E_ANON }; static size_t atosz(char *optarg) { size_t sz = 0; char *endptr; if (optarg == NULL || optarg[0] == '\0') return (INVPGSZ); sz = strtoll(optarg, &endptr, 0); switch (*endptr) { case 'T': case 't': sz *= 1024; /*FALLTHRU*/ case 'G': case 'g': sz *= 1024; /*FALLTHRU*/ case 'M': case 'm': sz *= 1024; /*FALLTHRU*/ case 'K': case 'k': sz *= 1024; /*FALLTHRU*/ case 'B': case 'b': default: break; } return (sz); } /* pgsz array sufficient for max page sizes */ static size_t pgsza[8 * sizeof (void *)]; static int nelem; static void getpgsz() { if ((nelem = getpagesizes(NULL, 0)) == 0) { (void) fprintf(stderr, "%s: cannot determine system page" " sizes\n", command); exit(125); } (void) getpagesizes(pgsza, nelem); } static size_t cnvpgsz(char *optarg) { size_t pgsz = atosz(optarg); int i; if (!ISP2(pgsz) || ((pgsz < pgsza[0]) && pgsz != 0)) { pgsz = INVPGSZ; } else { for (i = nelem - 1; i >= 0; i--) { if (pgsz == pgsza[i]) break; if (pgsz > pgsza[i]) { pgsz = INVPGSZ; break; } } } if (pgsz == INVPGSZ) { if (optarg != NULL) { (void) fprintf(stderr, "%s: invalid page size specified (%s)\n", command, optarg); } else { usage(); } exit(125); } return (pgsz); } static void usage() { (void) fprintf(stderr, "usage:\t%s -o option[,option] [-F] cmd | -p pid ...\n" " (set preferred page size of cmd or each process)\n" " -o option[,option]: options are\n" " stack=sz\n" " heap=sz\n" " anon=sz (sz: valid page size or 0 (zero))\n" " -F: force grabbing of the target process(es)\n" " cmd: launch command\n" " -p pid ...: process id list\n", command); exit(125); } int main(int argc, char *argv[]) { int rc, err = 0; int opt, subopt; int errflg = 0; char *options, *value; size_t pgsz[] = {INVPGSZ, INVPGSZ, INVPGSZ}; pid_t pid; int status; if ((command = strrchr(argv[0], '/')) != NULL) command++; else command = argv[0]; getpgsz(); /* options */ while ((opt = getopt(argc, argv, "o:Fp")) != EOF) { switch (opt) { case 'o': /* options */ options = optarg; while (*options != '\0') { subopt = getsubopt(&options, suboptstr, &value); switch (subopt) { case E_HEAP: case E_STACK: case E_ANON: pgsz[subopt] = cnvpgsz(value); break; default: errflg = 1; break; } } break; case 'F': /* force grabbing (no O_EXCL) */ Fflag = PGRAB_FORCE; break; case 'p': cflag = 0; break; default: errflg = 1; break; } } argc -= optind; argv += optind; if ((pgsz[E_HEAP] == INVPGSZ && pgsz[E_STACK] == INVPGSZ && pgsz[E_ANON] == INVPGSZ) || errflg || argc <= 0) { usage(); } /* catch signals from terminal */ if (sigset(SIGHUP, SIG_IGN) == SIG_DFL) (void) sigset(SIGHUP, intr); if (sigset(SIGINT, SIG_IGN) == SIG_DFL) (void) sigset(SIGINT, intr); if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL) (void) sigset(SIGQUIT, intr); (void) sigset(SIGTERM, intr); if (cflag && !interrupt) { /* command */ int err; char path[PATH_MAX]; Pr = Pcreate(argv[0], &argv[0], &err, path, sizeof (path)); if (Pr == NULL) { switch (err) { case C_PERM: (void) fprintf(stderr, "%s: cannot control set-id or " "unreadable object file: %s\n", command, path); break; case C_LP64: (void) fprintf(stderr, "%s: cannot control _LP64 " "program: %s\n", command, path); break; case C_NOEXEC: (void) fprintf(stderr, "%s: cannot execute " "program: %s\n", command, argv[0]); exit(126); break; case C_NOENT: (void) fprintf(stderr, "%s: cannot find " "program: %s\n", command, argv[0]); exit(127); break; case C_STRANGE: break; default: (void) fprintf(stderr, "%s: %s\n", command, Pcreate_error(err)); break; } exit(125); } if ((rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz)) != 0) { (void) fprintf(stderr, "%s: set page size " "failed for program: %s\n", command, argv[0]); (void) pr_exit(Pr, 1); exit(125); } /* * release the command to run, wait for it and * return it's exit status if we can. */ Prelease(Pr, 0); do { pid = wait(&status); } while (pid == -1 && errno == EINTR); if (pid == -1) { (void) fprintf(stderr, "%s: wait() error: %s\n", command, strerror(errno)); exit(125); } /* * Pass thru the child's exit value. */ if (WIFEXITED(status)) exit(WEXITSTATUS(status)); exit(status | WCOREFLG); } /* process pids */ while (--argc >= 0 && !interrupt) { char *arg; psinfo_t psinfo; int gret; (void) fflush(stdout); /* line-at-a-time */ /* get the specified pid and the psinfo struct */ arg = *argv++; pid = proc_arg_psinfo(arg, PR_ARG_PIDS, &psinfo, &gret); if (pid == -1) { (void) fprintf(stderr, "%s: cannot examine pid %s:" " %s\n", command, arg, Pgrab_error(gret)); if (!isdigit(arg[0]) && strncmp(arg, "/proc/", 6)) { (void) fprintf(stderr, "\tdo not use -p option" " to launch a command\n"); } err++; } else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) { rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz); if (rc != 0) { (void) fprintf(stderr, "%s: set page size " "failed for pid: %d\n", command, (int)pid); err++; } Prelease(Pr, 0); Pr = NULL; } else { switch (gret) { case G_SYS: proc_unctrl_psinfo(&psinfo); (void) fprintf(stderr, "%s: cannot set page " "size for system process: %d [ %s ]\n", command, (int)pid, psinfo.pr_psargs); err++; break; case G_SELF: /* do it to own self */ rc = setpgsz(NULL, psinfo.pr_dmodel, pgsz); if (rc != 0) { (void) fprintf(stderr, "%s: set page" "size failed for self: %d\n", command, (int)pid); err++; } break; default: (void) fprintf(stderr, "%s: %s: %d\n", command, Pgrab_error(gret), (int)pid); err++; break; } } } if (interrupt || err) exit(125); return (0); } /* ARGSUSED */ static void intr(int sig) { interrupt = 1; } /* ------ begin specific code ------ */ /* set process page size */ /*ARGSUSED*/ static int setpgsz(struct ps_prochandle *Pr, int dmodel, size_t pgsz[]) { int rc; int err = 0; caddr_t mpss; int i; static uint_t pgszcmd[] = {MHA_MAPSIZE_BSSBRK, MHA_MAPSIZE_STACK, MHA_MAPSIZE_VA}; for (i = E_HEAP; i <= E_ANON; i++) { if (pgsz[i] == INVPGSZ) continue; if (i == E_ANON) rc = setpgsz_anon(Pr, pgsz[i], dmodel); else { mpss = setup_mha(pgszcmd[i], pgsz[i], dmodel); rc = pr_memcntl(Pr, NULL, 0, MC_HAT_ADVISE, mpss, 0, 0); } if (rc < 0) { (void) fprintf(stderr, "%s: warning: set %s page size " "failed (%s) for pid %d\n", command, suboptstr[i], strerror(errno), (int)Pstatus(Pr)->pr_pid); err++; } } return (err); } /* * Walk through the process' address space segments. Set all anonymous * segments to the new page size. */ static int setpgsz_anon(struct ps_prochandle *Pr, size_t pgsz, int dmodel) { caddr_t mpss; prmap_t map; uintptr_t addr; size_t size; const psinfo_t *psinfo; const pstatus_t *pstatus; int fd; int rc; char path[PATH_MAX]; /* * Setting the page size for anonymous segments on a process before it * has run will have no effect, since it has not configured anonymous * memory and the page size setting is not "sticky" inside the kernel. * Any anonymous memory subsequently mapped will have the default page * size. */ if (cflag) return (0); if ((psinfo = Ppsinfo(Pr)) == NULL) return (-1); if ((pstatus = Pstatus(Pr)) == NULL) return (-1); if (pgsz == 0) pgsz = discover_optimal_pagesize(Pr, dmodel, psinfo->pr_pid); mpss = setup_mha(MHA_MAPSIZE_VA, pgsz, dmodel); (void) snprintf(path, PATH_MAX, "/proc/%d/map", (int)psinfo->pr_pid); if ((fd = open(path, O_RDONLY)) < 0) return (-1); while (read(fd, &map, sizeof (map)) == sizeof (map)) { if ((map.pr_mflags & MA_ANON) == 0) { /* Not anon. */ continue; } else if (map.pr_mflags & MA_SHARED) { /* Can't change pagesize for shared mappings. */ continue; } else if (map.pr_vaddr + map.pr_size > pstatus->pr_brkbase && map.pr_vaddr < pstatus->pr_brkbase + pstatus->pr_brksize) { /* Heap. */ continue; } else if (map.pr_vaddr >= pstatus->pr_stkbase && map.pr_vaddr + map.pr_size <= pstatus->pr_stkbase + pstatus->pr_stksize) { /* Stack. */ continue; } else if (map.pr_size < pgsz) { /* Too small. */ continue; } /* * Find the first address in the segment that is page-aligned. */ if (pgsz == 0 || ((map.pr_vaddr % pgsz) == 0)) addr = map.pr_vaddr; else addr = map.pr_vaddr + (pgsz - (map.pr_vaddr % pgsz)); /* * Calculate how many pages will fit in the segment. */ if (pgsz == 0) size = map.pr_size; else size = map.pr_size - (addr % map.pr_vaddr) - ((map.pr_vaddr + map.pr_size) % pgsz); /* * If no aligned pages fit in the segment, ignore it. */ if (size < pgsz) { continue; } rc = pr_memcntl(Pr, (caddr_t)addr, size, MC_HAT_ADVISE, mpss, 0, 0); /* * If an error occurs on any segment, report the error here and * then go on to try setting the page size for the remaining * segments. */ if (rc < 0) { (void) fprintf(stderr, "%s: warning: set page size " "failed (%s) for pid %d for anon segment at " "address: %p\n", command, strerror(errno), (int)psinfo->pr_pid, (void *)map.pr_vaddr); } } (void) close(fd); return (0); } /* * Discover the optimal page size for the process. * Do this by creating a 4M segment in the target process, set its pagesize * to 0, and read the map file to discover the page size selected by the system. */ static size_t discover_optimal_pagesize(struct ps_prochandle *Pr, uint_t dmodel, pid_t pid) { size_t size = 0; size_t len = pgsza[nelem - 1]; prxmap_t xmap; caddr_t mha; void *addr; int fd = -1; char path[PATH_MAX]; (void) snprintf(path, PATH_MAX, "/proc/%d/xmap", (int)pid); if ((fd = open(path, O_RDONLY)) < 0) return (size); if ((addr = pr_mmap(Pr, (void *)len, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1, 0)) == MAP_FAILED) { goto err; } mha = setup_mha(MHA_MAPSIZE_VA, 0, dmodel); if (pr_memcntl(Pr, addr, len, MC_HAT_ADVISE, mha, 0, 0) < 0) { goto err; } /* * Touch a page in the segment so the hat mapping gets created. */ (void) Pwrite(Pr, &len, sizeof (len), (uintptr_t)addr); /* * Read through the address map looking for our segment. */ while (read(fd, &xmap, sizeof (xmap)) == sizeof (xmap)) { if (xmap.pr_vaddr == (uintptr_t)addr) break; } if (xmap.pr_vaddr != (uintptr_t)addr) goto err; size = xmap.pr_hatpagesize; err: if (addr != MAP_FAILED) { if (pr_munmap(Pr, addr, len) == -1) { (void) fprintf(stderr, "%s: couldn't delete segment at %p\n", command, addr); } } if (fd != -1) (void) close(fd); return (size); } static struct memcntl_mha gmha; #ifdef _LP64 static struct memcntl_mha32 gmha32; #endif static caddr_t /* ARGSUSED */ setup_mha(uint_t command, size_t pagesize, int dmodel) { #ifdef _LP64 if (dmodel == PR_MODEL_ILP32) { gmha32.mha_cmd = command; gmha32.mha_flags = 0; gmha32.mha_pagesize = pagesize; return ((caddr_t)&gmha32); } #endif gmha.mha_cmd = command; gmha.mha_flags = 0; gmha.mha_pagesize = pagesize; return ((caddr_t)&gmha); }