17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5004388ebScasper * Common Development and Distribution License (the "License"). 6004388ebScasper * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate 2223a1cceaSRoger A. Faulkner /* 2323a1cceaSRoger A. Faulkner * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 2423a1cceaSRoger A. Faulkner */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * rcapd is a long-running daemon enforcing project-based resource caps (see 287c478bd9Sstevel@tonic-gate * rcapd(1M)). Each instance of a process aggregate (project or, generically, 297c478bd9Sstevel@tonic-gate * "collection") may have a memory cap. A single thread monitors the resource 307c478bd9Sstevel@tonic-gate * utilization of capped collections, enforces caps when they are exceeded (and 317c478bd9Sstevel@tonic-gate * other conditions are met), and incorporates changes in configuration or 327c478bd9Sstevel@tonic-gate * caps. Each of these actions occurs not more frequently than the rate 337c478bd9Sstevel@tonic-gate * specified with rcapadm(1M). 347c478bd9Sstevel@tonic-gate */ 357c478bd9Sstevel@tonic-gate 367c478bd9Sstevel@tonic-gate #include <sys/priocntl.h> 377c478bd9Sstevel@tonic-gate #include <sys/proc.h> 387c478bd9Sstevel@tonic-gate #include <sys/resource.h> 397c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h> 407c478bd9Sstevel@tonic-gate #include <sys/stat.h> 417c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 427c478bd9Sstevel@tonic-gate #include <sys/time.h> 437c478bd9Sstevel@tonic-gate #include <sys/types.h> 447c478bd9Sstevel@tonic-gate #include <dirent.h> 457c478bd9Sstevel@tonic-gate #include <errno.h> 467c478bd9Sstevel@tonic-gate #include <fcntl.h> 477c478bd9Sstevel@tonic-gate #include <kstat.h> 487c478bd9Sstevel@tonic-gate #include <libintl.h> 497c478bd9Sstevel@tonic-gate #include <limits.h> 507c478bd9Sstevel@tonic-gate #include <locale.h> 517c478bd9Sstevel@tonic-gate #include <priv.h> 527c478bd9Sstevel@tonic-gate #include <signal.h> 537c478bd9Sstevel@tonic-gate #include <stdarg.h> 547c478bd9Sstevel@tonic-gate #include <stdio.h> 55004388ebScasper #include <stdio_ext.h> 567c478bd9Sstevel@tonic-gate #include <stdlib.h> 57d75e6a5dStn143363 #include <libscf.h> 587c478bd9Sstevel@tonic-gate #include <strings.h> 597c478bd9Sstevel@tonic-gate #include <time.h> 607c478bd9Sstevel@tonic-gate #include <unistd.h> 617c478bd9Sstevel@tonic-gate #include <zone.h> 627c478bd9Sstevel@tonic-gate #include <assert.h> 630209230bSgjelinek #include <sys/vm_usage.h> 647c478bd9Sstevel@tonic-gate #include "rcapd.h" 657c478bd9Sstevel@tonic-gate #include "rcapd_mapping.h" 667c478bd9Sstevel@tonic-gate #include "rcapd_rfd.h" 677c478bd9Sstevel@tonic-gate #include "rcapd_stat.h" 687c478bd9Sstevel@tonic-gate #include "utils.h" 697c478bd9Sstevel@tonic-gate 707c478bd9Sstevel@tonic-gate #define POSITIVE_MIN(x, y) \ 717c478bd9Sstevel@tonic-gate (((x) <= 0) ? (y) : ((y) <= 0) ? (x) : MIN(x, y)) 727c478bd9Sstevel@tonic-gate #define NEXT_EVENT_TIME(base, seconds) \ 737c478bd9Sstevel@tonic-gate (((int)seconds > 0) ? (base + (hrtime_t)seconds * (hrtime_t)NANOSEC) \ 747c478bd9Sstevel@tonic-gate : (hrtime_t)0) 757c478bd9Sstevel@tonic-gate #define NEXT_REPORT_EVENT_TIME(base, seconds) \ 767c478bd9Sstevel@tonic-gate ((rcfg.rcfg_stat_file[0] != 0) ? \ 777c478bd9Sstevel@tonic-gate NEXT_EVENT_TIME(gethrtime(), seconds) : (hrtime_t)0) 787c478bd9Sstevel@tonic-gate #define EVENT_TIME(time, eventtime) \ 797c478bd9Sstevel@tonic-gate (((time) > (eventtime)) && (eventtime) != 0) 807c478bd9Sstevel@tonic-gate #define STAT_TEMPLATE_SUFFIX ".XXXXXX" /* suffix of mkstemp() arg */ 817c478bd9Sstevel@tonic-gate #define DAEMON_UID 1 /* uid to use */ 827c478bd9Sstevel@tonic-gate 830209230bSgjelinek #define CAPPED_PROJECT 0x01 840209230bSgjelinek #define CAPPED_ZONE 0x02 850209230bSgjelinek 867c478bd9Sstevel@tonic-gate typedef struct soft_scan_arg { 877c478bd9Sstevel@tonic-gate uint64_t ssa_sum_excess; 887c478bd9Sstevel@tonic-gate int64_t ssa_scan_goal; 890209230bSgjelinek boolean_t ssa_project_over_cap; 907c478bd9Sstevel@tonic-gate } soft_scan_arg_t; 917c478bd9Sstevel@tonic-gate 920209230bSgjelinek typedef struct sample_col_arg { 930209230bSgjelinek boolean_t sca_any_over_cap; 940209230bSgjelinek boolean_t sca_project_over_cap; 950209230bSgjelinek } sample_col_arg_t; 960209230bSgjelinek 970209230bSgjelinek 987c478bd9Sstevel@tonic-gate static int debug_mode = 0; /* debug mode flag */ 997c478bd9Sstevel@tonic-gate static pid_t rcapd_pid; /* rcapd's pid to ensure it's not */ 1007c478bd9Sstevel@tonic-gate /* scanned */ 1017c478bd9Sstevel@tonic-gate static kstat_ctl_t *kctl; /* kstat chain */ 1027c478bd9Sstevel@tonic-gate static int memory_pressure = 0; /* physical memory utilization (%) */ 1037c478bd9Sstevel@tonic-gate static int memory_pressure_sample = 0; /* count of samples */ 1040209230bSgjelinek static long page_size_kb = 0; /* system page size in KB */ 1050209230bSgjelinek static size_t nvmu_vals = 0; /* # of kernel RSS/swap vals in array */ 1060209230bSgjelinek static size_t vmu_vals_len = 0; /* size of RSS/swap vals array */ 1070209230bSgjelinek static vmusage_t *vmu_vals = NULL; /* snapshot of kernel RSS/swap values */ 1087c478bd9Sstevel@tonic-gate static hrtime_t next_report; /* time of next report */ 1097c478bd9Sstevel@tonic-gate static int termination_signal = 0; /* terminating signal */ 1100209230bSgjelinek static zoneid_t my_zoneid = (zoneid_t)-1; 1110209230bSgjelinek static lcollection_t *gz_col; /* global zone collection */ 1127c478bd9Sstevel@tonic-gate 1137c478bd9Sstevel@tonic-gate rcfg_t rcfg; 1140209230bSgjelinek /* 1150209230bSgjelinek * Updated when we re-read the collection configurations if this rcapd instance 1160209230bSgjelinek * is running in the global zone and the global zone is capped. 1170209230bSgjelinek */ 1180209230bSgjelinek boolean_t gz_capped = B_FALSE; 1197c478bd9Sstevel@tonic-gate 1207c478bd9Sstevel@tonic-gate /* 1217c478bd9Sstevel@tonic-gate * Flags. 1227c478bd9Sstevel@tonic-gate */ 1237c478bd9Sstevel@tonic-gate static int ever_ran; 1247c478bd9Sstevel@tonic-gate int should_run; 1257c478bd9Sstevel@tonic-gate static int should_reconfigure; 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate static int verify_statistics(void); 1287c478bd9Sstevel@tonic-gate static int update_statistics(void); 1297c478bd9Sstevel@tonic-gate 1307c478bd9Sstevel@tonic-gate /* 1310209230bSgjelinek * Checks if a process is marked 'system'. Returns FALSE only when it is not. 1327c478bd9Sstevel@tonic-gate */ 1330209230bSgjelinek static boolean_t 1347c478bd9Sstevel@tonic-gate proc_issystem(pid_t pid) 1357c478bd9Sstevel@tonic-gate { 1367c478bd9Sstevel@tonic-gate char pc_clname[PC_CLNMSZ]; 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate if (priocntl(P_PID, pid, PC_GETXPARMS, NULL, PC_KY_CLNAME, pc_clname, 1397c478bd9Sstevel@tonic-gate PC_KY_NULL) != -1) { 1407c478bd9Sstevel@tonic-gate return (strcmp(pc_clname, "SYS") == 0); 1417c478bd9Sstevel@tonic-gate } else { 1427c478bd9Sstevel@tonic-gate debug("cannot get class-specific scheduling parameters; " 1430209230bSgjelinek "assuming system process\n"); 1440209230bSgjelinek return (B_TRUE); 1457c478bd9Sstevel@tonic-gate } 1467c478bd9Sstevel@tonic-gate } 1477c478bd9Sstevel@tonic-gate 1487c478bd9Sstevel@tonic-gate static void 1490209230bSgjelinek lprocess_insert_mark(psinfo_t *psinfop) 1507c478bd9Sstevel@tonic-gate { 1510209230bSgjelinek pid_t pid = psinfop->pr_pid; 1520209230bSgjelinek /* flag indicating whether the process should be scanned. */ 1530209230bSgjelinek int unscannable = psinfop->pr_nlwp == 0; 1540209230bSgjelinek rcid_t colid; 1557c478bd9Sstevel@tonic-gate lcollection_t *lcol; 1567c478bd9Sstevel@tonic-gate lprocess_t *lproc; 1577c478bd9Sstevel@tonic-gate 1580209230bSgjelinek /* 1590209230bSgjelinek * Determine which collection to put this process into. We only have 1600209230bSgjelinek * to worry about tracking both zone and project capped processes if 1610209230bSgjelinek * this rcapd instance is running in the global zone, since we'll only 1620209230bSgjelinek * see processes in our own projects in a non-global zone. In the 1630209230bSgjelinek * global zone, if the process belongs to a non-global zone, we only 1640209230bSgjelinek * need to track it for the capped non-global zone collection. For 1650209230bSgjelinek * global zone processes, we first attempt to put the process into a 1660209230bSgjelinek * capped project collection. On the second pass into this function 1670209230bSgjelinek * the projid will be cleared so we will just track the process for the 1680209230bSgjelinek * global zone collection as a whole. 1690209230bSgjelinek */ 1700209230bSgjelinek if (psinfop->pr_zoneid == my_zoneid && psinfop->pr_projid != -1) { 1710209230bSgjelinek colid.rcid_type = RCIDT_PROJECT; 1720209230bSgjelinek colid.rcid_val = psinfop->pr_projid; 1730209230bSgjelinek } else { 1740209230bSgjelinek /* try to add to zone collection */ 1750209230bSgjelinek colid.rcid_type = RCIDT_ZONE; 1760209230bSgjelinek colid.rcid_val = psinfop->pr_zoneid; 1770209230bSgjelinek } 1780209230bSgjelinek 1790209230bSgjelinek if ((lcol = lcollection_find(&colid)) == NULL) 1807c478bd9Sstevel@tonic-gate return; 1817c478bd9Sstevel@tonic-gate 1827c478bd9Sstevel@tonic-gate /* 1837c478bd9Sstevel@tonic-gate * If the process is already being tracked, update the unscannable flag, 1847c478bd9Sstevel@tonic-gate * as determined by the caller, from the process's psinfo. 1857c478bd9Sstevel@tonic-gate */ 1867c478bd9Sstevel@tonic-gate lproc = lcol->lcol_lprocess; 1877c478bd9Sstevel@tonic-gate while (lproc != NULL) { 1887c478bd9Sstevel@tonic-gate if (lproc->lpc_pid == pid) { 1897c478bd9Sstevel@tonic-gate lproc->lpc_mark = 1; 1907c478bd9Sstevel@tonic-gate if (unscannable != 0 && lproc->lpc_unscannable == 0) { 1917c478bd9Sstevel@tonic-gate debug("process %d: became unscannable\n", 1927c478bd9Sstevel@tonic-gate (int)lproc->lpc_pid); 1937c478bd9Sstevel@tonic-gate lproc->lpc_unscannable = 1; 1947c478bd9Sstevel@tonic-gate } 1957c478bd9Sstevel@tonic-gate return; 1967c478bd9Sstevel@tonic-gate } 1977c478bd9Sstevel@tonic-gate lproc = lproc->lpc_next; 1987c478bd9Sstevel@tonic-gate } 1997c478bd9Sstevel@tonic-gate 2007c478bd9Sstevel@tonic-gate /* 2017c478bd9Sstevel@tonic-gate * We've fallen off the list without finding our current process; 2027c478bd9Sstevel@tonic-gate * insert it at the list head. 2037c478bd9Sstevel@tonic-gate */ 2047c478bd9Sstevel@tonic-gate if ((lproc = malloc(sizeof (*lproc))) == NULL) 2057c478bd9Sstevel@tonic-gate debug("insufficient memory to track new process %d", (int)pid); 2067c478bd9Sstevel@tonic-gate else { 2077c478bd9Sstevel@tonic-gate (void) bzero(lproc, sizeof (*lproc)); 2087c478bd9Sstevel@tonic-gate lproc->lpc_pid = pid; 2097c478bd9Sstevel@tonic-gate lproc->lpc_mark = 1; 2107c478bd9Sstevel@tonic-gate lproc->lpc_collection = lcol; 2117c478bd9Sstevel@tonic-gate lproc->lpc_psinfo_fd = -1; 2127c478bd9Sstevel@tonic-gate lproc->lpc_pgdata_fd = -1; 2137c478bd9Sstevel@tonic-gate lproc->lpc_xmap_fd = -1; 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate /* 2167c478bd9Sstevel@tonic-gate * If the caller didn't flag this process as unscannable 2177c478bd9Sstevel@tonic-gate * already, do some more checking. 2187c478bd9Sstevel@tonic-gate */ 2197c478bd9Sstevel@tonic-gate lproc->lpc_unscannable = unscannable || proc_issystem(pid); 2207c478bd9Sstevel@tonic-gate 2217c478bd9Sstevel@tonic-gate #ifdef DEBUG 2227c478bd9Sstevel@tonic-gate /* 2237c478bd9Sstevel@tonic-gate * Verify the sanity of lprocess. It should not contain the 2247c478bd9Sstevel@tonic-gate * process we are about to prepend. 2257c478bd9Sstevel@tonic-gate */ 2267c478bd9Sstevel@tonic-gate if (lcollection_member(lcol, lproc)) { 2277c478bd9Sstevel@tonic-gate lprocess_t *cur = lcol->lcol_lprocess; 2287c478bd9Sstevel@tonic-gate debug("The collection %lld already has these members, " 2290209230bSgjelinek "including me, %d!\n", 2300209230bSgjelinek (long long)lcol->lcol_id.rcid_val, 2317c478bd9Sstevel@tonic-gate (int)lproc->lpc_pid); 2327c478bd9Sstevel@tonic-gate while (cur != NULL) { 2337c478bd9Sstevel@tonic-gate debug("\t%d\n", (int)cur->lpc_pid); 2347c478bd9Sstevel@tonic-gate cur = cur->lpc_next; 2357c478bd9Sstevel@tonic-gate } 2367c478bd9Sstevel@tonic-gate info(gettext("process already on lprocess\n")); 2377c478bd9Sstevel@tonic-gate abort(); 2387c478bd9Sstevel@tonic-gate } 2397c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 2407c478bd9Sstevel@tonic-gate lproc->lpc_next = lcol->lcol_lprocess; 2417c478bd9Sstevel@tonic-gate if (lproc->lpc_next != NULL) 2427c478bd9Sstevel@tonic-gate lproc->lpc_next->lpc_prev = lproc; 2437c478bd9Sstevel@tonic-gate lproc->lpc_prev = NULL; 2447c478bd9Sstevel@tonic-gate lcol->lcol_lprocess = lproc; 2457c478bd9Sstevel@tonic-gate 2460209230bSgjelinek debug("tracking %s %ld %d %s%s\n", 2470209230bSgjelinek (colid.rcid_type == RCIDT_PROJECT ? "project" : "zone"), 2480209230bSgjelinek (long)colid.rcid_val, 2490209230bSgjelinek (int)pid, psinfop->pr_psargs, 2507c478bd9Sstevel@tonic-gate (lproc->lpc_unscannable != 0) ? " (not scannable)" : ""); 2517c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_proc_in++; 2527c478bd9Sstevel@tonic-gate } 2537c478bd9Sstevel@tonic-gate } 2547c478bd9Sstevel@tonic-gate 2557c478bd9Sstevel@tonic-gate static int 2567c478bd9Sstevel@tonic-gate list_walk_process_cb(lcollection_t *lcol, void *arg) 2577c478bd9Sstevel@tonic-gate { 2587c478bd9Sstevel@tonic-gate int (*cb)(lcollection_t *, lprocess_t *) = 2597c478bd9Sstevel@tonic-gate (int(*)(lcollection_t *, lprocess_t *))arg; 2607c478bd9Sstevel@tonic-gate lprocess_t *member; 2617c478bd9Sstevel@tonic-gate lprocess_t *next; 2627c478bd9Sstevel@tonic-gate 2637c478bd9Sstevel@tonic-gate member = lcol->lcol_lprocess; 2647c478bd9Sstevel@tonic-gate while (member != NULL) { 2657c478bd9Sstevel@tonic-gate pid_t pid = member->lpc_pid; 2667c478bd9Sstevel@tonic-gate next = member->lpc_next; 2677c478bd9Sstevel@tonic-gate 2687c478bd9Sstevel@tonic-gate debug_high("list_walk_all lpc %d\n", (int)pid); 2697c478bd9Sstevel@tonic-gate if (cb(lcol, member) != 0) { 2707c478bd9Sstevel@tonic-gate debug_high("list_walk_all aborted at lpc %d\n", 2717c478bd9Sstevel@tonic-gate (int)pid); 2727c478bd9Sstevel@tonic-gate return (1); 2737c478bd9Sstevel@tonic-gate } 2747c478bd9Sstevel@tonic-gate member = next; 2757c478bd9Sstevel@tonic-gate } 2767c478bd9Sstevel@tonic-gate 2777c478bd9Sstevel@tonic-gate return (0); 2787c478bd9Sstevel@tonic-gate } 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate /* 2817c478bd9Sstevel@tonic-gate * Invoke the given callback for each process in each collection. Callbacks 2827c478bd9Sstevel@tonic-gate * are allowed to change the linkage of the process on which they act. 2837c478bd9Sstevel@tonic-gate */ 2847c478bd9Sstevel@tonic-gate static void 2857c478bd9Sstevel@tonic-gate list_walk_all(int (*cb)(lcollection_t *, lprocess_t *)) 2867c478bd9Sstevel@tonic-gate { 2877c478bd9Sstevel@tonic-gate list_walk_collection(list_walk_process_cb, (void *)cb); 2887c478bd9Sstevel@tonic-gate } 2897c478bd9Sstevel@tonic-gate 2907c478bd9Sstevel@tonic-gate static void 2917c478bd9Sstevel@tonic-gate revoke_psinfo(rfd_t *rfd) 2927c478bd9Sstevel@tonic-gate { 2937c478bd9Sstevel@tonic-gate lprocess_t *lpc = (lprocess_t *)rfd->rfd_data; 2947c478bd9Sstevel@tonic-gate 2957c478bd9Sstevel@tonic-gate if (lpc != NULL) { 2967c478bd9Sstevel@tonic-gate debug("revoking psinfo fd for process %d\n", (int)lpc->lpc_pid); 2977c478bd9Sstevel@tonic-gate ASSERT(lpc->lpc_psinfo_fd != -1); 2987c478bd9Sstevel@tonic-gate lpc->lpc_psinfo_fd = -1; 2997c478bd9Sstevel@tonic-gate } else 3007c478bd9Sstevel@tonic-gate debug("revoking psinfo fd for unknown process\n"); 3017c478bd9Sstevel@tonic-gate } 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate /* 3047c478bd9Sstevel@tonic-gate * Retrieve a process's psinfo via an already-opened or new file descriptor. 3057c478bd9Sstevel@tonic-gate * The supplied descriptor will be closed on failure. An optional callback 3067c478bd9Sstevel@tonic-gate * will be invoked with the last descriptor tried, and a supplied callback 3077c478bd9Sstevel@tonic-gate * argument, as its arguments, such that the new descriptor may be cached, or 3087c478bd9Sstevel@tonic-gate * an old one may be invalidated. If the result of the callback is zero, the 3097c478bd9Sstevel@tonic-gate * the caller is to assume responsibility for the file descriptor, to close it 3107c478bd9Sstevel@tonic-gate * with rfd_close(). 3117c478bd9Sstevel@tonic-gate * 3127c478bd9Sstevel@tonic-gate * On failure, a nonzero value is returned. 3137c478bd9Sstevel@tonic-gate */ 3147c478bd9Sstevel@tonic-gate int 3157c478bd9Sstevel@tonic-gate get_psinfo(pid_t pid, psinfo_t *psinfo, int cached_fd, 3167c478bd9Sstevel@tonic-gate int(*fd_update_cb)(void *, int), void *arg, lprocess_t *lpc) 3177c478bd9Sstevel@tonic-gate { 3187c478bd9Sstevel@tonic-gate int fd; 3197c478bd9Sstevel@tonic-gate int can_try_uncached; 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate ASSERT(!(cached_fd > 0 && fd_update_cb == NULL)); 3227c478bd9Sstevel@tonic-gate 3237c478bd9Sstevel@tonic-gate do { 3247c478bd9Sstevel@tonic-gate if (cached_fd >= 0) { 3257c478bd9Sstevel@tonic-gate fd = cached_fd; 3267c478bd9Sstevel@tonic-gate can_try_uncached = 1; 3277c478bd9Sstevel@tonic-gate debug_high("%d/psinfo, trying cached fd %d\n", 3287c478bd9Sstevel@tonic-gate (int)pid, fd); 3297c478bd9Sstevel@tonic-gate } else { 3307c478bd9Sstevel@tonic-gate char pathbuf[PROC_PATH_MAX]; 3317c478bd9Sstevel@tonic-gate 3327c478bd9Sstevel@tonic-gate can_try_uncached = 0; 3337c478bd9Sstevel@tonic-gate (void) snprintf(pathbuf, sizeof (pathbuf), 3347c478bd9Sstevel@tonic-gate "/proc/%d/psinfo", (int)pid); 3357c478bd9Sstevel@tonic-gate if ((fd = rfd_open(pathbuf, 1, RFD_PSINFO, 3367c478bd9Sstevel@tonic-gate revoke_psinfo, lpc, O_RDONLY, 0000)) < 0) { 3377c478bd9Sstevel@tonic-gate debug("cannot open %s", pathbuf); 3387c478bd9Sstevel@tonic-gate break; 3397c478bd9Sstevel@tonic-gate } else 3407c478bd9Sstevel@tonic-gate debug_high("opened %s, fd %d\n", pathbuf, fd); 3417c478bd9Sstevel@tonic-gate } 3427c478bd9Sstevel@tonic-gate 3437c478bd9Sstevel@tonic-gate if (pread(fd, psinfo, sizeof (*psinfo), 0) == 3447c478bd9Sstevel@tonic-gate sizeof (*psinfo) && psinfo->pr_pid == pid) 3457c478bd9Sstevel@tonic-gate break; 3467c478bd9Sstevel@tonic-gate else { 3477c478bd9Sstevel@tonic-gate debug_high("closed fd %d\n", fd); 3487c478bd9Sstevel@tonic-gate if (rfd_close(fd) != 0) 3497c478bd9Sstevel@tonic-gate debug("could not close fd %d", fd); 3507c478bd9Sstevel@tonic-gate fd = cached_fd = -1; 3517c478bd9Sstevel@tonic-gate } 3527c478bd9Sstevel@tonic-gate } while (can_try_uncached == 1); 3537c478bd9Sstevel@tonic-gate 3547c478bd9Sstevel@tonic-gate if (fd_update_cb == NULL || fd_update_cb(arg, fd) != 0) 3557c478bd9Sstevel@tonic-gate if (fd >= 0) { 3567c478bd9Sstevel@tonic-gate debug_high("closed %s fd %d\n", fd_update_cb == NULL ? 3577c478bd9Sstevel@tonic-gate "uncached" : "cached", fd); 3587c478bd9Sstevel@tonic-gate if (rfd_close(fd) != 0) 3597c478bd9Sstevel@tonic-gate debug("could not close fd %d", fd); 3607c478bd9Sstevel@tonic-gate } 3617c478bd9Sstevel@tonic-gate 3627c478bd9Sstevel@tonic-gate debug_high("get_psinfo ret %d, fd %d, %s\n", ((fd >= 0) ? 0 : -1), fd, 3637c478bd9Sstevel@tonic-gate fd_update_cb != NULL ? "cached" : "uncached"); 3647c478bd9Sstevel@tonic-gate return ((fd >= 0) ? 0 : -1); 3657c478bd9Sstevel@tonic-gate } 3667c478bd9Sstevel@tonic-gate 3677c478bd9Sstevel@tonic-gate /* 3680209230bSgjelinek * Retrieve the collection membership of all processes and update the psinfo of 3690209230bSgjelinek * those non-system, non-zombie ones in collections. For global zone processes, 3700209230bSgjelinek * we first attempt to put the process into a capped project collection. We 3710209230bSgjelinek * also want to track the process for the global zone collection as a whole. 3727c478bd9Sstevel@tonic-gate */ 3737c478bd9Sstevel@tonic-gate static void 3747c478bd9Sstevel@tonic-gate proc_cb(const pid_t pid) 3757c478bd9Sstevel@tonic-gate { 3767c478bd9Sstevel@tonic-gate psinfo_t psinfo; 3777c478bd9Sstevel@tonic-gate 3780209230bSgjelinek if (get_psinfo(pid, &psinfo, -1, NULL, NULL, NULL) == 0) { 3790209230bSgjelinek lprocess_insert_mark(&psinfo); 3800209230bSgjelinek if (gz_capped && psinfo.pr_zoneid == GLOBAL_ZONEID) { 3810209230bSgjelinek /* 3820209230bSgjelinek * We also want to track this process for the global 3830209230bSgjelinek * zone as a whole so add it to the global zone 3840209230bSgjelinek * collection as well. 3850209230bSgjelinek */ 3860209230bSgjelinek psinfo.pr_projid = -1; 3870209230bSgjelinek lprocess_insert_mark(&psinfo); 3880209230bSgjelinek } 3890209230bSgjelinek } 3907c478bd9Sstevel@tonic-gate } 3917c478bd9Sstevel@tonic-gate 3927c478bd9Sstevel@tonic-gate /* 3937c478bd9Sstevel@tonic-gate * Cache the process' psinfo fd, taking responsibility for freeing it. 3947c478bd9Sstevel@tonic-gate */ 3957c478bd9Sstevel@tonic-gate int 3967c478bd9Sstevel@tonic-gate lprocess_update_psinfo_fd_cb(void *arg, int fd) 3977c478bd9Sstevel@tonic-gate { 3987c478bd9Sstevel@tonic-gate lprocess_t *lpc = arg; 3997c478bd9Sstevel@tonic-gate 4007c478bd9Sstevel@tonic-gate lpc->lpc_psinfo_fd = fd; 4017c478bd9Sstevel@tonic-gate return (0); 4027c478bd9Sstevel@tonic-gate } 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate /* 4050209230bSgjelinek * Get the system pagesize. 4067c478bd9Sstevel@tonic-gate */ 4070209230bSgjelinek static void 4080209230bSgjelinek get_page_size(void) 4097c478bd9Sstevel@tonic-gate { 4100209230bSgjelinek page_size_kb = sysconf(_SC_PAGESIZE) / 1024; 4110209230bSgjelinek debug("physical page size: %luKB\n", page_size_kb); 4127c478bd9Sstevel@tonic-gate } 4137c478bd9Sstevel@tonic-gate 4140209230bSgjelinek static void 4150209230bSgjelinek tm_fmt(char *msg, hrtime_t t1, hrtime_t t2) 4160209230bSgjelinek { 4170209230bSgjelinek hrtime_t diff = t2 - t1; 4180209230bSgjelinek 4190209230bSgjelinek if (diff < MILLISEC) 4200209230bSgjelinek debug("%s: %lld nanoseconds\n", msg, diff); 4210209230bSgjelinek else if (diff < MICROSEC) 4220209230bSgjelinek debug("%s: %.2f microseconds\n", msg, (float)diff / MILLISEC); 4230209230bSgjelinek else if (diff < NANOSEC) 4240209230bSgjelinek debug("%s: %.2f milliseconds\n", msg, (float)diff / MICROSEC); 4250209230bSgjelinek else 4260209230bSgjelinek debug("%s: %.2f seconds\n", msg, (float)diff / NANOSEC); 4270209230bSgjelinek } 4280209230bSgjelinek 4290209230bSgjelinek /* 4300209230bSgjelinek * Get the zone's & project's RSS from the kernel. 4310209230bSgjelinek */ 4320209230bSgjelinek static void 4330209230bSgjelinek rss_sample(boolean_t my_zone_only, uint_t col_types) 4340209230bSgjelinek { 4350209230bSgjelinek size_t nres; 4360209230bSgjelinek size_t i; 4370209230bSgjelinek uint_t flags; 4380209230bSgjelinek hrtime_t t1, t2; 4390209230bSgjelinek 4400209230bSgjelinek if (my_zone_only) { 4410209230bSgjelinek flags = VMUSAGE_ZONE; 4420209230bSgjelinek } else { 4430209230bSgjelinek flags = 0; 4440209230bSgjelinek if (col_types & CAPPED_PROJECT) 4450209230bSgjelinek flags |= VMUSAGE_PROJECTS; 4460209230bSgjelinek if (col_types & CAPPED_ZONE && my_zoneid == GLOBAL_ZONEID) 4470209230bSgjelinek flags |= VMUSAGE_ALL_ZONES; 4480209230bSgjelinek } 4490209230bSgjelinek 4500209230bSgjelinek debug("vmusage sample flags 0x%x\n", flags); 4510209230bSgjelinek if (flags == 0) 4520209230bSgjelinek return; 4530209230bSgjelinek 4540209230bSgjelinek again: 4550209230bSgjelinek /* try the current buffer to see if the list will fit */ 4560209230bSgjelinek nres = vmu_vals_len; 4570209230bSgjelinek t1 = gethrtime(); 4580209230bSgjelinek if (getvmusage(flags, my_zone_only ? 0 : rcfg.rcfg_rss_sample_interval, 4590209230bSgjelinek vmu_vals, &nres) != 0) { 4600209230bSgjelinek if (errno != EOVERFLOW) { 4610209230bSgjelinek warn(gettext("can't read RSS from kernel\n")); 4620209230bSgjelinek return; 4630209230bSgjelinek } 4640209230bSgjelinek } 4650209230bSgjelinek t2 = gethrtime(); 4660209230bSgjelinek tm_fmt("getvmusage time", t1, t2); 4670209230bSgjelinek 4680209230bSgjelinek debug("kernel nres %lu\n", (ulong_t)nres); 4690209230bSgjelinek 4700209230bSgjelinek if (nres > vmu_vals_len) { 4710209230bSgjelinek /* array size is now too small, increase it and try again */ 4720209230bSgjelinek free(vmu_vals); 4730209230bSgjelinek 4740209230bSgjelinek if ((vmu_vals = (vmusage_t *)calloc(nres, 4750209230bSgjelinek sizeof (vmusage_t))) == NULL) { 4760209230bSgjelinek warn(gettext("out of memory: could not read RSS from " 4770209230bSgjelinek "kernel\n")); 4780209230bSgjelinek vmu_vals_len = nvmu_vals = 0; 4790209230bSgjelinek return; 4800209230bSgjelinek } 4810209230bSgjelinek vmu_vals_len = nres; 4820209230bSgjelinek goto again; 4830209230bSgjelinek } 4840209230bSgjelinek 4850209230bSgjelinek nvmu_vals = nres; 4860209230bSgjelinek 4870209230bSgjelinek debug("vmusage_sample\n"); 4880209230bSgjelinek for (i = 0; i < nvmu_vals; i++) { 4890209230bSgjelinek debug("%d: id: %d, type: 0x%x, rss_all: %llu (%lluKB), " 4900209230bSgjelinek "swap: %llu\n", (int)i, (int)vmu_vals[i].vmu_id, 4910209230bSgjelinek vmu_vals[i].vmu_type, 4920209230bSgjelinek (unsigned long long)vmu_vals[i].vmu_rss_all, 4930209230bSgjelinek (unsigned long long)vmu_vals[i].vmu_rss_all / 1024, 4940209230bSgjelinek (unsigned long long)vmu_vals[i].vmu_swap_all); 4950209230bSgjelinek } 4960209230bSgjelinek } 4970209230bSgjelinek 4980209230bSgjelinek static void 4990209230bSgjelinek update_col_rss(lcollection_t *lcol) 5000209230bSgjelinek { 5010209230bSgjelinek int i; 5020209230bSgjelinek 5030209230bSgjelinek lcol->lcol_rss = 0; 5040209230bSgjelinek lcol->lcol_image_size = 0; 5050209230bSgjelinek 5060209230bSgjelinek for (i = 0; i < nvmu_vals; i++) { 5070209230bSgjelinek if (vmu_vals[i].vmu_id != lcol->lcol_id.rcid_val) 5080209230bSgjelinek continue; 5090209230bSgjelinek 5100209230bSgjelinek if (vmu_vals[i].vmu_type == VMUSAGE_ZONE && 5110209230bSgjelinek lcol->lcol_id.rcid_type != RCIDT_ZONE) 5120209230bSgjelinek continue; 5130209230bSgjelinek 5140209230bSgjelinek if (vmu_vals[i].vmu_type == VMUSAGE_PROJECTS && 5150209230bSgjelinek lcol->lcol_id.rcid_type != RCIDT_PROJECT) 5160209230bSgjelinek continue; 5170209230bSgjelinek 5180209230bSgjelinek /* we found the right RSS entry, update the collection vals */ 5190209230bSgjelinek lcol->lcol_rss = vmu_vals[i].vmu_rss_all / 1024; 5200209230bSgjelinek lcol->lcol_image_size = vmu_vals[i].vmu_swap_all / 1024; 5210209230bSgjelinek break; 5220209230bSgjelinek } 5237c478bd9Sstevel@tonic-gate } 5247c478bd9Sstevel@tonic-gate 5257c478bd9Sstevel@tonic-gate /* 5267c478bd9Sstevel@tonic-gate * Sample the collection RSS, updating the collection's statistics with the 5270209230bSgjelinek * results. Also, sum the rss of all capped projects & return true if 5280209230bSgjelinek * the collection is over cap. 5297c478bd9Sstevel@tonic-gate */ 5307c478bd9Sstevel@tonic-gate static int 5317c478bd9Sstevel@tonic-gate rss_sample_col_cb(lcollection_t *lcol, void *arg) 5327c478bd9Sstevel@tonic-gate { 5337c478bd9Sstevel@tonic-gate int64_t excess; 5347c478bd9Sstevel@tonic-gate uint64_t rss; 5350209230bSgjelinek sample_col_arg_t *col_argp = (sample_col_arg_t *)arg; 5367c478bd9Sstevel@tonic-gate 5370209230bSgjelinek update_col_rss(lcol); 5387c478bd9Sstevel@tonic-gate 5397c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_rss_sample++; 5407c478bd9Sstevel@tonic-gate rss = lcol->lcol_rss; 5410209230bSgjelinek excess = rss - lcol->lcol_rss_cap; 5420209230bSgjelinek if (excess > 0) { 5437c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_rss_act_sum += rss; 5440209230bSgjelinek col_argp->sca_any_over_cap = B_TRUE; 5450209230bSgjelinek if (lcol->lcol_id.rcid_type == RCIDT_PROJECT) 5460209230bSgjelinek col_argp->sca_project_over_cap = B_TRUE; 5470209230bSgjelinek } 5487c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_rss_sum += rss; 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate if (lcol->lcol_stat.lcols_min_rss > rss) 5517c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_min_rss = rss; 5527c478bd9Sstevel@tonic-gate if (lcol->lcol_stat.lcols_max_rss < rss) 5537c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_max_rss = rss; 5547c478bd9Sstevel@tonic-gate 5557c478bd9Sstevel@tonic-gate return (0); 5567c478bd9Sstevel@tonic-gate } 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate /* 5590209230bSgjelinek * Determine if we have capped projects, capped zones or both. 5600209230bSgjelinek */ 5610209230bSgjelinek static int 5620209230bSgjelinek col_type_cb(lcollection_t *lcol, void *arg) 5630209230bSgjelinek { 5640209230bSgjelinek uint_t *col_type = (uint_t *)arg; 5650209230bSgjelinek 5660209230bSgjelinek /* skip uncapped collections */ 5670209230bSgjelinek if (lcol->lcol_rss_cap == 0) 5680209230bSgjelinek return (1); 5690209230bSgjelinek 5700209230bSgjelinek if (lcol->lcol_id.rcid_type == RCIDT_PROJECT) 5710209230bSgjelinek *col_type |= CAPPED_PROJECT; 5720209230bSgjelinek else 5730209230bSgjelinek *col_type |= CAPPED_ZONE; 5740209230bSgjelinek 5750209230bSgjelinek /* once we know everything is capped, we can stop looking */ 5760209230bSgjelinek if ((*col_type & CAPPED_ZONE) && (*col_type & CAPPED_PROJECT)) 5770209230bSgjelinek return (1); 5780209230bSgjelinek 5790209230bSgjelinek return (0); 5800209230bSgjelinek } 5810209230bSgjelinek 5820209230bSgjelinek /* 5837c478bd9Sstevel@tonic-gate * Open /proc and walk entries. 5847c478bd9Sstevel@tonic-gate */ 5857c478bd9Sstevel@tonic-gate static void 5867c478bd9Sstevel@tonic-gate proc_walk_all(void (*cb)(const pid_t)) 5877c478bd9Sstevel@tonic-gate { 5887c478bd9Sstevel@tonic-gate DIR *pdir; 5897c478bd9Sstevel@tonic-gate struct dirent *dirent; 5907c478bd9Sstevel@tonic-gate pid_t pid; 5917c478bd9Sstevel@tonic-gate 5927c478bd9Sstevel@tonic-gate (void) rfd_reserve(1); 5937c478bd9Sstevel@tonic-gate if ((pdir = opendir("/proc")) == NULL) 5947c478bd9Sstevel@tonic-gate die(gettext("couldn't open /proc!")); 5957c478bd9Sstevel@tonic-gate 5967c478bd9Sstevel@tonic-gate while ((dirent = readdir(pdir)) != NULL) { 5977c478bd9Sstevel@tonic-gate if (strcmp(".", dirent->d_name) == 0 || 5987c478bd9Sstevel@tonic-gate strcmp("..", dirent->d_name) == 0) 5997c478bd9Sstevel@tonic-gate continue; 6007c478bd9Sstevel@tonic-gate pid = atoi(dirent->d_name); 6017c478bd9Sstevel@tonic-gate ASSERT(pid != 0 || strcmp(dirent->d_name, "0") == 0); 6027c478bd9Sstevel@tonic-gate if (pid == rcapd_pid) 6037c478bd9Sstevel@tonic-gate continue; 6047c478bd9Sstevel@tonic-gate else 6057c478bd9Sstevel@tonic-gate cb(pid); 6067c478bd9Sstevel@tonic-gate } 6077c478bd9Sstevel@tonic-gate (void) closedir(pdir); 6087c478bd9Sstevel@tonic-gate } 6097c478bd9Sstevel@tonic-gate 6107c478bd9Sstevel@tonic-gate /* 6117c478bd9Sstevel@tonic-gate * Clear unmarked callback. 6127c478bd9Sstevel@tonic-gate */ 6137c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 6147c478bd9Sstevel@tonic-gate static int 6157c478bd9Sstevel@tonic-gate sweep_process_cb(lcollection_t *lcol, lprocess_t *lpc) 6167c478bd9Sstevel@tonic-gate { 6177c478bd9Sstevel@tonic-gate if (lpc->lpc_mark) { 6187c478bd9Sstevel@tonic-gate lpc->lpc_mark = 0; 6197c478bd9Sstevel@tonic-gate } else { 6207c478bd9Sstevel@tonic-gate debug("process %d finished\n", (int)lpc->lpc_pid); 6217c478bd9Sstevel@tonic-gate lprocess_free(lpc); 6227c478bd9Sstevel@tonic-gate } 6237c478bd9Sstevel@tonic-gate 6247c478bd9Sstevel@tonic-gate return (0); 6257c478bd9Sstevel@tonic-gate } 6267c478bd9Sstevel@tonic-gate 6277c478bd9Sstevel@tonic-gate /* 6287c478bd9Sstevel@tonic-gate * Print, for debugging purposes, a collection's recently-sampled RSS and 6297c478bd9Sstevel@tonic-gate * excess. 6307c478bd9Sstevel@tonic-gate */ 6317c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 6327c478bd9Sstevel@tonic-gate static int 6337c478bd9Sstevel@tonic-gate excess_print_cb(lcollection_t *lcol, void *arg) 6347c478bd9Sstevel@tonic-gate { 6357c478bd9Sstevel@tonic-gate int64_t excess = lcol->lcol_rss - lcol->lcol_rss_cap; 6367c478bd9Sstevel@tonic-gate 6377c478bd9Sstevel@tonic-gate debug("%s %s rss/cap: %llu/%llu, excess = %lld kB\n", 6380209230bSgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"), 6390209230bSgjelinek lcol->lcol_name, 6407c478bd9Sstevel@tonic-gate (unsigned long long)lcol->lcol_rss, 6417c478bd9Sstevel@tonic-gate (unsigned long long)lcol->lcol_rss_cap, 6427c478bd9Sstevel@tonic-gate (long long)excess); 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate return (0); 6457c478bd9Sstevel@tonic-gate } 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate /* 6487c478bd9Sstevel@tonic-gate * Scan those collections which have exceeded their caps. 6490209230bSgjelinek * 6500209230bSgjelinek * If we're running in the global zone it might have a cap. We don't want to 6510209230bSgjelinek * do any capping for the global zone yet since we might get under the cap by 6520209230bSgjelinek * just capping the projects in the global zone. 6537c478bd9Sstevel@tonic-gate */ 6547c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 6557c478bd9Sstevel@tonic-gate static int 6567c478bd9Sstevel@tonic-gate scan_cb(lcollection_t *lcol, void *arg) 6577c478bd9Sstevel@tonic-gate { 6587c478bd9Sstevel@tonic-gate int64_t excess; 6597c478bd9Sstevel@tonic-gate 6600209230bSgjelinek /* skip over global zone collection for now but keep track for later */ 6610209230bSgjelinek if (lcol->lcol_id.rcid_type == RCIDT_ZONE && 6620209230bSgjelinek lcol->lcol_id.rcid_val == GLOBAL_ZONEID) { 6630209230bSgjelinek gz_col = lcol; 6640209230bSgjelinek return (0); 6650209230bSgjelinek } 6660209230bSgjelinek 6677c478bd9Sstevel@tonic-gate if ((excess = lcol->lcol_rss - lcol->lcol_rss_cap) > 0) { 6687c478bd9Sstevel@tonic-gate scan(lcol, excess); 6697c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_scan++; 6707c478bd9Sstevel@tonic-gate } 6717c478bd9Sstevel@tonic-gate 6727c478bd9Sstevel@tonic-gate return (0); 6737c478bd9Sstevel@tonic-gate } 6747c478bd9Sstevel@tonic-gate 6757c478bd9Sstevel@tonic-gate /* 6760209230bSgjelinek * Scan the global zone collection and see if it still exceeds its cap. 6770209230bSgjelinek * We take into account the effects of capping any global zone projects here. 6780209230bSgjelinek */ 6790209230bSgjelinek static void 6800209230bSgjelinek scan_gz(lcollection_t *lcol, boolean_t project_over_cap) 6810209230bSgjelinek { 6820209230bSgjelinek int64_t excess; 6830209230bSgjelinek 6840209230bSgjelinek /* 6850209230bSgjelinek * If we had projects over their cap and the global zone was also over 6860209230bSgjelinek * its cap then we need to get the up-to-date global zone rss to 6870209230bSgjelinek * determine if we are still over the global zone cap. We might have 6880209230bSgjelinek * gone under while we scanned the capped projects. If there were no 6890209230bSgjelinek * projects over cap then we can use the rss value we already have for 6900209230bSgjelinek * the global zone. 6910209230bSgjelinek */ 6920209230bSgjelinek excess = lcol->lcol_rss - lcol->lcol_rss_cap; 6930209230bSgjelinek if (project_over_cap && excess > 0) { 6940209230bSgjelinek rss_sample(B_TRUE, CAPPED_ZONE); 6950209230bSgjelinek update_col_rss(lcol); 6960209230bSgjelinek excess = lcol->lcol_rss - lcol->lcol_rss_cap; 6970209230bSgjelinek } 6980209230bSgjelinek 6990209230bSgjelinek if (excess > 0) { 7000209230bSgjelinek debug("global zone excess %lldKB\n", (long long)excess); 7010209230bSgjelinek scan(lcol, excess); 7020209230bSgjelinek lcol->lcol_stat.lcols_scan++; 7030209230bSgjelinek } 7040209230bSgjelinek } 7050209230bSgjelinek 7060209230bSgjelinek /* 7077c478bd9Sstevel@tonic-gate * Do a soft scan of those collections which have excesses. A soft scan is one 7087c478bd9Sstevel@tonic-gate * in which the cap enforcement pressure is taken into account. The difference 7097c478bd9Sstevel@tonic-gate * between the utilized physical memory and the cap enforcement pressure will 7107c478bd9Sstevel@tonic-gate * be scanned-for, and each collection will be scanned proportionally by their 7117c478bd9Sstevel@tonic-gate * present excesses. 7127c478bd9Sstevel@tonic-gate */ 7137c478bd9Sstevel@tonic-gate static int 7147c478bd9Sstevel@tonic-gate soft_scan_cb(lcollection_t *lcol, void *a) 7157c478bd9Sstevel@tonic-gate { 7167c478bd9Sstevel@tonic-gate int64_t excess; 7177c478bd9Sstevel@tonic-gate soft_scan_arg_t *arg = a; 7187c478bd9Sstevel@tonic-gate 7190209230bSgjelinek /* skip over global zone collection for now but keep track for later */ 7200209230bSgjelinek if (lcol->lcol_id.rcid_type == RCIDT_ZONE && 7210209230bSgjelinek lcol->lcol_id.rcid_val == GLOBAL_ZONEID) { 7220209230bSgjelinek gz_col = lcol; 7230209230bSgjelinek return (0); 7240209230bSgjelinek } 7250209230bSgjelinek 7267c478bd9Sstevel@tonic-gate if ((excess = lcol->lcol_rss - lcol->lcol_rss_cap) > 0) { 7270209230bSgjelinek int64_t adjusted_excess = 7280209230bSgjelinek excess * arg->ssa_scan_goal / arg->ssa_sum_excess; 7290209230bSgjelinek 7300209230bSgjelinek debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, " 7310209230bSgjelinek "scanning %lld\n", 7320209230bSgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? 7330209230bSgjelinek "project" : "zone"), 7340209230bSgjelinek (long)lcol->lcol_id.rcid_val, 7357c478bd9Sstevel@tonic-gate (long long)excess, (long long)arg->ssa_scan_goal, 7367c478bd9Sstevel@tonic-gate (unsigned long long)arg->ssa_sum_excess, 7370209230bSgjelinek (long long)adjusted_excess); 7387c478bd9Sstevel@tonic-gate 7390209230bSgjelinek scan(lcol, adjusted_excess); 7407c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_scan++; 7417c478bd9Sstevel@tonic-gate } 7427c478bd9Sstevel@tonic-gate 7437c478bd9Sstevel@tonic-gate return (0); 7447c478bd9Sstevel@tonic-gate } 7457c478bd9Sstevel@tonic-gate 7460209230bSgjelinek static void 7470209230bSgjelinek soft_scan_gz(lcollection_t *lcol, void *a) 7480209230bSgjelinek { 7490209230bSgjelinek int64_t excess; 7500209230bSgjelinek soft_scan_arg_t *arg = a; 7510209230bSgjelinek 7520209230bSgjelinek /* 7530209230bSgjelinek * If we had projects over their cap and the global zone was also over 7540209230bSgjelinek * its cap then we need to get the up-to-date global zone rss to 7550209230bSgjelinek * determine if we are still over the global zone cap. We might have 7560209230bSgjelinek * gone under while we scanned the capped projects. If there were no 7570209230bSgjelinek * projects over cap then we can use the rss value we already have for 7580209230bSgjelinek * the global zone. 7590209230bSgjelinek */ 7600209230bSgjelinek excess = lcol->lcol_rss - lcol->lcol_rss_cap; 7610209230bSgjelinek if (arg->ssa_project_over_cap && excess > 0) { 7620209230bSgjelinek rss_sample(B_TRUE, CAPPED_ZONE); 7630209230bSgjelinek update_col_rss(lcol); 7640209230bSgjelinek excess = lcol->lcol_rss - lcol->lcol_rss_cap; 7650209230bSgjelinek } 7660209230bSgjelinek 7670209230bSgjelinek if (excess > 0) { 7680209230bSgjelinek int64_t adjusted_excess = 7690209230bSgjelinek excess * arg->ssa_scan_goal / arg->ssa_sum_excess; 7700209230bSgjelinek 7710209230bSgjelinek debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, " 7720209230bSgjelinek "scanning %lld\n", 7730209230bSgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? 7740209230bSgjelinek "project" : "zone"), 7750209230bSgjelinek (long)lcol->lcol_id.rcid_val, 7760209230bSgjelinek (long long)excess, (long long)arg->ssa_scan_goal, 7770209230bSgjelinek (unsigned long long)arg->ssa_sum_excess, 7780209230bSgjelinek (long long)adjusted_excess); 7790209230bSgjelinek 7800209230bSgjelinek scan(lcol, adjusted_excess); 7810209230bSgjelinek lcol->lcol_stat.lcols_scan++; 7820209230bSgjelinek } 7830209230bSgjelinek } 7840209230bSgjelinek 7857c478bd9Sstevel@tonic-gate /* 7867c478bd9Sstevel@tonic-gate * When a scan could happen, but caps aren't enforced tick the 7877c478bd9Sstevel@tonic-gate * lcols_unenforced_cap counter. 7887c478bd9Sstevel@tonic-gate */ 7897c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 7907c478bd9Sstevel@tonic-gate static int 7917c478bd9Sstevel@tonic-gate unenforced_cap_cb(lcollection_t *lcol, void *arg) 7927c478bd9Sstevel@tonic-gate { 7937c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_unenforced_cap++; 7947c478bd9Sstevel@tonic-gate 7957c478bd9Sstevel@tonic-gate return (0); 7967c478bd9Sstevel@tonic-gate } 7977c478bd9Sstevel@tonic-gate 7987c478bd9Sstevel@tonic-gate /* 7997c478bd9Sstevel@tonic-gate * Update the count of physically installed memory. 8007c478bd9Sstevel@tonic-gate */ 8017c478bd9Sstevel@tonic-gate static void 8027c478bd9Sstevel@tonic-gate update_phys_total(void) 8037c478bd9Sstevel@tonic-gate { 8047c478bd9Sstevel@tonic-gate uint64_t old_phys_total; 8057c478bd9Sstevel@tonic-gate 8067c478bd9Sstevel@tonic-gate old_phys_total = phys_total; 8070209230bSgjelinek phys_total = (uint64_t)sysconf(_SC_PHYS_PAGES) * page_size_kb; 8087c478bd9Sstevel@tonic-gate if (phys_total != old_phys_total) 8097c478bd9Sstevel@tonic-gate debug("physical memory%s: %lluM\n", (old_phys_total == 0 ? 8107c478bd9Sstevel@tonic-gate "" : " adjusted"), (unsigned long long)(phys_total / 1024)); 8117c478bd9Sstevel@tonic-gate } 8127c478bd9Sstevel@tonic-gate 8137c478bd9Sstevel@tonic-gate /* 8147c478bd9Sstevel@tonic-gate * Unlink a process from its collection, updating relevant statistics, and 8157c478bd9Sstevel@tonic-gate * freeing its associated memory. 8167c478bd9Sstevel@tonic-gate */ 8177c478bd9Sstevel@tonic-gate void 8187c478bd9Sstevel@tonic-gate lprocess_free(lprocess_t *lpc) 8197c478bd9Sstevel@tonic-gate { 8207c478bd9Sstevel@tonic-gate pid_t pid; 8217c478bd9Sstevel@tonic-gate 8227c478bd9Sstevel@tonic-gate lpc->lpc_collection->lcol_stat.lcols_proc_out++; 8237c478bd9Sstevel@tonic-gate 8247c478bd9Sstevel@tonic-gate if (lpc->lpc_prev != NULL) 8257c478bd9Sstevel@tonic-gate lpc->lpc_prev->lpc_next = lpc->lpc_next; 8267c478bd9Sstevel@tonic-gate if (lpc->lpc_next != NULL) 8277c478bd9Sstevel@tonic-gate lpc->lpc_next->lpc_prev = lpc->lpc_prev; 8287c478bd9Sstevel@tonic-gate if (lpc->lpc_collection->lcol_lprocess == lpc) 8297c478bd9Sstevel@tonic-gate lpc->lpc_collection->lcol_lprocess = (lpc->lpc_next != 8307c478bd9Sstevel@tonic-gate lpc ? lpc->lpc_next : NULL); 8317c478bd9Sstevel@tonic-gate lpc->lpc_next = lpc->lpc_prev = NULL; 8327c478bd9Sstevel@tonic-gate 8337c478bd9Sstevel@tonic-gate if (lpc->lpc_prpageheader != NULL) 8347c478bd9Sstevel@tonic-gate free(lpc->lpc_prpageheader); 8357c478bd9Sstevel@tonic-gate if (lpc->lpc_xmap != NULL) 8367c478bd9Sstevel@tonic-gate free(lpc->lpc_xmap); 8377c478bd9Sstevel@tonic-gate if (lpc->lpc_psinfo_fd >= 0) { 8387c478bd9Sstevel@tonic-gate if (rfd_close(lpc->lpc_psinfo_fd) != 0) 8397c478bd9Sstevel@tonic-gate debug("could not close %d lpc_psinfo_fd %d", 8407c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid, lpc->lpc_psinfo_fd); 8417c478bd9Sstevel@tonic-gate lpc->lpc_psinfo_fd = -1; 8427c478bd9Sstevel@tonic-gate } 8437c478bd9Sstevel@tonic-gate if (lpc->lpc_pgdata_fd >= 0) { 8447c478bd9Sstevel@tonic-gate if (rfd_close(lpc->lpc_pgdata_fd) != 0) 8457c478bd9Sstevel@tonic-gate debug("could not close %d lpc_pgdata_fd %d", 8467c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid, lpc->lpc_pgdata_fd); 8477c478bd9Sstevel@tonic-gate lpc->lpc_pgdata_fd = -1; 8487c478bd9Sstevel@tonic-gate } 8497c478bd9Sstevel@tonic-gate if (lpc->lpc_xmap_fd >= 0) { 8507c478bd9Sstevel@tonic-gate if (rfd_close(lpc->lpc_xmap_fd) != 0) 8517c478bd9Sstevel@tonic-gate debug("could not close %d lpc_xmap_fd %d", 8527c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid, lpc->lpc_xmap_fd); 8537c478bd9Sstevel@tonic-gate lpc->lpc_xmap_fd = -1; 8547c478bd9Sstevel@tonic-gate } 8557c478bd9Sstevel@tonic-gate if (lpc->lpc_ignore != NULL) 8567c478bd9Sstevel@tonic-gate lmapping_free(&lpc->lpc_ignore); 8577c478bd9Sstevel@tonic-gate pid = lpc->lpc_pid; 8587c478bd9Sstevel@tonic-gate free(lpc); 8597c478bd9Sstevel@tonic-gate debug_high("process %d freed\n", (int)pid); 8607c478bd9Sstevel@tonic-gate } 8617c478bd9Sstevel@tonic-gate 8627c478bd9Sstevel@tonic-gate /* 8637c478bd9Sstevel@tonic-gate * Collection clear callback. 8647c478bd9Sstevel@tonic-gate */ 8657c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8667c478bd9Sstevel@tonic-gate static int 8677c478bd9Sstevel@tonic-gate collection_clear_cb(lcollection_t *lcol, void *arg) 8687c478bd9Sstevel@tonic-gate { 8697c478bd9Sstevel@tonic-gate lcol->lcol_mark = 0; 8707c478bd9Sstevel@tonic-gate 8717c478bd9Sstevel@tonic-gate return (0); 8727c478bd9Sstevel@tonic-gate } 8737c478bd9Sstevel@tonic-gate 8747c478bd9Sstevel@tonic-gate /* 8757c478bd9Sstevel@tonic-gate * Respond to a terminating signal by setting a termination flag. 8767c478bd9Sstevel@tonic-gate */ 8777c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8787c478bd9Sstevel@tonic-gate static void 8797c478bd9Sstevel@tonic-gate terminate_signal(int signal) 8807c478bd9Sstevel@tonic-gate { 8817c478bd9Sstevel@tonic-gate if (termination_signal == 0) 8827c478bd9Sstevel@tonic-gate termination_signal = signal; 8837c478bd9Sstevel@tonic-gate should_run = 0; 8847c478bd9Sstevel@tonic-gate } 8857c478bd9Sstevel@tonic-gate 8867c478bd9Sstevel@tonic-gate /* 8877c478bd9Sstevel@tonic-gate * Handle any synchronous or asynchronous signals that would ordinarily cause a 8887c478bd9Sstevel@tonic-gate * process to abort. 8897c478bd9Sstevel@tonic-gate */ 8907c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8917c478bd9Sstevel@tonic-gate static void 8927c478bd9Sstevel@tonic-gate abort_signal(int signal) 8937c478bd9Sstevel@tonic-gate { 8947c478bd9Sstevel@tonic-gate /* 8957c478bd9Sstevel@tonic-gate * Allow the scanner to make a last-ditch effort to resume any stopped 8967c478bd9Sstevel@tonic-gate * processes. 8977c478bd9Sstevel@tonic-gate */ 8987c478bd9Sstevel@tonic-gate scan_abort(); 8997c478bd9Sstevel@tonic-gate abort(); 9007c478bd9Sstevel@tonic-gate } 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate /* 9037c478bd9Sstevel@tonic-gate * Clean up collections which have been removed due to configuration. Unlink 9047c478bd9Sstevel@tonic-gate * the collection from lcollection and free it. 9057c478bd9Sstevel@tonic-gate */ 9067c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 9077c478bd9Sstevel@tonic-gate static int 9087c478bd9Sstevel@tonic-gate collection_sweep_cb(lcollection_t *lcol, void *arg) 9097c478bd9Sstevel@tonic-gate { 9107c478bd9Sstevel@tonic-gate if (lcol->lcol_mark == 0) { 9110209230bSgjelinek debug("freeing %s %s\n", 9120209230bSgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? 9130209230bSgjelinek "project" : "zone"), lcol->lcol_name); 9147c478bd9Sstevel@tonic-gate lcollection_free(lcol); 9157c478bd9Sstevel@tonic-gate } 9167c478bd9Sstevel@tonic-gate 9177c478bd9Sstevel@tonic-gate return (0); 9187c478bd9Sstevel@tonic-gate } 9197c478bd9Sstevel@tonic-gate 9207c478bd9Sstevel@tonic-gate /* 9217c478bd9Sstevel@tonic-gate * Set those variables which depend on the global configuration. 9227c478bd9Sstevel@tonic-gate */ 9237c478bd9Sstevel@tonic-gate static void 9247c478bd9Sstevel@tonic-gate finish_configuration(void) 9257c478bd9Sstevel@tonic-gate { 9267c478bd9Sstevel@tonic-gate /* 9277c478bd9Sstevel@tonic-gate * Warn that any lnode (or non-project) mode specification (by an SRM 9287c478bd9Sstevel@tonic-gate * 1.3 configuration file, for example) is ignored. 9297c478bd9Sstevel@tonic-gate */ 9307c478bd9Sstevel@tonic-gate if (strcmp(rcfg.rcfg_mode_name, "project") != 0) { 9317c478bd9Sstevel@tonic-gate warn(gettext("%s mode specification ignored -- using project" 9327c478bd9Sstevel@tonic-gate " mode\n"), rcfg.rcfg_mode_name); 9337c478bd9Sstevel@tonic-gate rcfg.rcfg_mode_name = "project"; 9347c478bd9Sstevel@tonic-gate rcfg.rcfg_mode = rctype_project; 9357c478bd9Sstevel@tonic-gate } 9367c478bd9Sstevel@tonic-gate } 9377c478bd9Sstevel@tonic-gate 9387c478bd9Sstevel@tonic-gate /* 939d75e6a5dStn143363 * Cause the configuration to be reread and applied. 9407c478bd9Sstevel@tonic-gate */ 9417c478bd9Sstevel@tonic-gate static void 942d75e6a5dStn143363 reread_configuration(void) 9437c478bd9Sstevel@tonic-gate { 9447c478bd9Sstevel@tonic-gate rcfg_t rcfg_new; 9457c478bd9Sstevel@tonic-gate 946d75e6a5dStn143363 if (rcfg_read(&rcfg_new, update_statistics) != E_SUCCESS) { 947d75e6a5dStn143363 warn(gettext("can't reread configuration \n")); 948d75e6a5dStn143363 exit(SMF_EXIT_ERR_CONFIG); 949d75e6a5dStn143363 } else { 9507c478bd9Sstevel@tonic-gate /* 951d75e6a5dStn143363 * Done reading configuration. Remove existing 9527c478bd9Sstevel@tonic-gate * collections in case there is a change in collection type. 9537c478bd9Sstevel@tonic-gate */ 9547c478bd9Sstevel@tonic-gate if (rcfg.rcfg_mode != rcfg_new.rcfg_mode) { 9557c478bd9Sstevel@tonic-gate list_walk_collection(collection_clear_cb, NULL); 9567c478bd9Sstevel@tonic-gate list_walk_collection(collection_sweep_cb, NULL); 9577c478bd9Sstevel@tonic-gate } 9587c478bd9Sstevel@tonic-gate 9597c478bd9Sstevel@tonic-gate /* 9607c478bd9Sstevel@tonic-gate * Make the newly-read configuration the global one, and update 9617c478bd9Sstevel@tonic-gate * any variables that depend on it. 9627c478bd9Sstevel@tonic-gate */ 9637c478bd9Sstevel@tonic-gate rcfg = rcfg_new; 9647c478bd9Sstevel@tonic-gate finish_configuration(); 9657c478bd9Sstevel@tonic-gate } 9667c478bd9Sstevel@tonic-gate } 9677c478bd9Sstevel@tonic-gate 9687c478bd9Sstevel@tonic-gate /* 969d75e6a5dStn143363 * First, examine changes, additions, and deletions to cap definitions. 970d75e6a5dStn143363 * Then, set the next event time. 9717c478bd9Sstevel@tonic-gate */ 9727c478bd9Sstevel@tonic-gate static void 9730209230bSgjelinek reconfigure(hrtime_t now, hrtime_t *next_configuration, 9740209230bSgjelinek hrtime_t *next_proc_walk, hrtime_t *next_rss_sample) 9757c478bd9Sstevel@tonic-gate { 9767c478bd9Sstevel@tonic-gate debug("reconfigure...\n"); 9777c478bd9Sstevel@tonic-gate 9787c478bd9Sstevel@tonic-gate /* 9797c478bd9Sstevel@tonic-gate * Walk the lcollection, marking active collections so inactive ones 9807c478bd9Sstevel@tonic-gate * can be freed. 9817c478bd9Sstevel@tonic-gate */ 9827c478bd9Sstevel@tonic-gate list_walk_collection(collection_clear_cb, NULL); 9837c478bd9Sstevel@tonic-gate lcollection_update(LCU_ACTIVE_ONLY); /* mark */ 9847c478bd9Sstevel@tonic-gate list_walk_collection(collection_sweep_cb, NULL); 9850209230bSgjelinek 9860209230bSgjelinek *next_configuration = NEXT_EVENT_TIME(now, 9870209230bSgjelinek rcfg.rcfg_reconfiguration_interval); 9880209230bSgjelinek 9890209230bSgjelinek /* 9900209230bSgjelinek * Reset each event time to the shorter of the previous and new 9910209230bSgjelinek * intervals. 9920209230bSgjelinek */ 9930209230bSgjelinek if (next_report == 0 && rcfg.rcfg_report_interval > 0) 9940209230bSgjelinek next_report = now; 9950209230bSgjelinek else 9960209230bSgjelinek next_report = POSITIVE_MIN(next_report, 9970209230bSgjelinek NEXT_REPORT_EVENT_TIME(now, rcfg.rcfg_report_interval)); 9980209230bSgjelinek 9990209230bSgjelinek if (*next_proc_walk == 0 && rcfg.rcfg_proc_walk_interval > 0) 10000209230bSgjelinek *next_proc_walk = now; 10010209230bSgjelinek else 10020209230bSgjelinek *next_proc_walk = POSITIVE_MIN(*next_proc_walk, 10030209230bSgjelinek NEXT_EVENT_TIME(now, rcfg.rcfg_proc_walk_interval)); 10040209230bSgjelinek 10050209230bSgjelinek if (*next_rss_sample == 0 && rcfg.rcfg_rss_sample_interval > 0) 10060209230bSgjelinek *next_rss_sample = now; 10070209230bSgjelinek else 10080209230bSgjelinek *next_rss_sample = POSITIVE_MIN(*next_rss_sample, 10090209230bSgjelinek NEXT_EVENT_TIME(now, rcfg.rcfg_rss_sample_interval)); 10107c478bd9Sstevel@tonic-gate } 10117c478bd9Sstevel@tonic-gate 10127c478bd9Sstevel@tonic-gate /* 1013d75e6a5dStn143363 * Respond to SIGHUP by triggering the rereading the configuration and cap 10147c478bd9Sstevel@tonic-gate * definitions. 10157c478bd9Sstevel@tonic-gate */ 10167c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 10177c478bd9Sstevel@tonic-gate static void 10187c478bd9Sstevel@tonic-gate sighup(int signal) 10197c478bd9Sstevel@tonic-gate { 10207c478bd9Sstevel@tonic-gate should_reconfigure = 1; 10217c478bd9Sstevel@tonic-gate } 10227c478bd9Sstevel@tonic-gate 10237c478bd9Sstevel@tonic-gate /* 10247c478bd9Sstevel@tonic-gate * Print, for debugging purposes, each collection's interval statistics. 10257c478bd9Sstevel@tonic-gate */ 10267c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 10277c478bd9Sstevel@tonic-gate static int 10287c478bd9Sstevel@tonic-gate simple_report_collection_cb(lcollection_t *lcol, void *arg) 10297c478bd9Sstevel@tonic-gate { 10307c478bd9Sstevel@tonic-gate #define DELTA(field) \ 10310209230bSgjelinek (unsigned long long)( \ 10327c478bd9Sstevel@tonic-gate (lcol->lcol_stat.field - lcol->lcol_stat_old.field)) 10337c478bd9Sstevel@tonic-gate 10347c478bd9Sstevel@tonic-gate debug("%s %s status: succeeded/attempted (k): %llu/%llu, " 10357c478bd9Sstevel@tonic-gate "ineffective/scans/unenforced/samplings: %llu/%llu/%llu/%llu, RSS " 10367c478bd9Sstevel@tonic-gate "min/max (k): %llu/%llu, cap %llu kB, processes/thpt: %llu/%llu, " 10370209230bSgjelinek "%llu scans over %llu ms\n", 10380209230bSgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"), 10390209230bSgjelinek lcol->lcol_name, 10407c478bd9Sstevel@tonic-gate DELTA(lcols_pg_eff), DELTA(lcols_pg_att), 10417c478bd9Sstevel@tonic-gate DELTA(lcols_scan_ineffective), DELTA(lcols_scan), 10427c478bd9Sstevel@tonic-gate DELTA(lcols_unenforced_cap), DELTA(lcols_rss_sample), 10430209230bSgjelinek (unsigned long long)lcol->lcol_stat.lcols_min_rss, 10440209230bSgjelinek (unsigned long long)lcol->lcol_stat.lcols_max_rss, 10457c478bd9Sstevel@tonic-gate (unsigned long long)lcol->lcol_rss_cap, 10467c478bd9Sstevel@tonic-gate (unsigned long long)(lcol->lcol_stat.lcols_proc_in - 10477c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_proc_out), DELTA(lcols_proc_out), 1048*19449258SJosef 'Jeff' Sipek DELTA(lcols_scan_count), 1049*19449258SJosef 'Jeff' Sipek NSEC2MSEC(DELTA(lcols_scan_time_complete))); 10507c478bd9Sstevel@tonic-gate 10517c478bd9Sstevel@tonic-gate #undef DELTA 10527c478bd9Sstevel@tonic-gate 10537c478bd9Sstevel@tonic-gate return (0); 10547c478bd9Sstevel@tonic-gate } 10557c478bd9Sstevel@tonic-gate 10567c478bd9Sstevel@tonic-gate /* 10577c478bd9Sstevel@tonic-gate * Record each collection's interval statistics in the statistics file. 10587c478bd9Sstevel@tonic-gate */ 10597c478bd9Sstevel@tonic-gate static int 10607c478bd9Sstevel@tonic-gate report_collection_cb(lcollection_t *lcol, void *arg) 10617c478bd9Sstevel@tonic-gate { 10627c478bd9Sstevel@tonic-gate lcollection_report_t dc; 10637c478bd9Sstevel@tonic-gate int fd = (intptr_t)arg; 10647c478bd9Sstevel@tonic-gate 10657c478bd9Sstevel@tonic-gate /* 10667c478bd9Sstevel@tonic-gate * Copy the relevant fields to the collection's record. 10677c478bd9Sstevel@tonic-gate */ 10687c478bd9Sstevel@tonic-gate bzero(&dc, sizeof (dc)); 10697c478bd9Sstevel@tonic-gate dc.lcol_id = lcol->lcol_id; 10707c478bd9Sstevel@tonic-gate (void) strcpy(dc.lcol_name, lcol->lcol_name); 10717c478bd9Sstevel@tonic-gate dc.lcol_rss = lcol->lcol_rss; 10727c478bd9Sstevel@tonic-gate dc.lcol_image_size = lcol->lcol_image_size; 10737c478bd9Sstevel@tonic-gate dc.lcol_rss_cap = lcol->lcol_rss_cap; 10747c478bd9Sstevel@tonic-gate dc.lcol_stat = lcol->lcol_stat; 10757c478bd9Sstevel@tonic-gate 10767c478bd9Sstevel@tonic-gate if (write(fd, &dc, sizeof (dc)) == sizeof (dc)) { 10770209230bSgjelinek lcol->lcol_stat_old = lcol->lcol_stat; 10787c478bd9Sstevel@tonic-gate } else { 10790209230bSgjelinek debug("can't write %s %s statistics", 10800209230bSgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? 10810209230bSgjelinek "project" : "zone"), 10827c478bd9Sstevel@tonic-gate lcol->lcol_name); 10837c478bd9Sstevel@tonic-gate } 10847c478bd9Sstevel@tonic-gate 10857c478bd9Sstevel@tonic-gate return (0); 10867c478bd9Sstevel@tonic-gate } 10877c478bd9Sstevel@tonic-gate 10887c478bd9Sstevel@tonic-gate /* 10897c478bd9Sstevel@tonic-gate * Determine the count of pages scanned by the global page scanner, obtained 10907c478bd9Sstevel@tonic-gate * from the cpu_stat:*::scan kstats. Return zero on success. 10917c478bd9Sstevel@tonic-gate */ 10927c478bd9Sstevel@tonic-gate static int 10937c478bd9Sstevel@tonic-gate get_globally_scanned_pages(uint64_t *scannedp) 10947c478bd9Sstevel@tonic-gate { 10957c478bd9Sstevel@tonic-gate kstat_t *ksp; 10967c478bd9Sstevel@tonic-gate uint64_t scanned = 0; 10977c478bd9Sstevel@tonic-gate 10987c478bd9Sstevel@tonic-gate if (kstat_chain_update(kctl) == -1) { 10997c478bd9Sstevel@tonic-gate warn(gettext("can't update kstat chain")); 11007c478bd9Sstevel@tonic-gate return (0); 11017c478bd9Sstevel@tonic-gate } 11027c478bd9Sstevel@tonic-gate 11037c478bd9Sstevel@tonic-gate for (ksp = kctl->kc_chain; ksp != NULL; ksp = ksp->ks_next) { 11047c478bd9Sstevel@tonic-gate if (strcmp(ksp->ks_module, "cpu_stat") == 0) { 11057c478bd9Sstevel@tonic-gate if (kstat_read(kctl, ksp, NULL) != -1) { 11067c478bd9Sstevel@tonic-gate scanned += ((cpu_stat_t *) 11077c478bd9Sstevel@tonic-gate ksp->ks_data)->cpu_vminfo.scan; 11080209230bSgjelinek } else { 11097c478bd9Sstevel@tonic-gate return (-1); 11107c478bd9Sstevel@tonic-gate } 11117c478bd9Sstevel@tonic-gate } 11120209230bSgjelinek } 11137c478bd9Sstevel@tonic-gate 11147c478bd9Sstevel@tonic-gate *scannedp = scanned; 11157c478bd9Sstevel@tonic-gate return (0); 11167c478bd9Sstevel@tonic-gate } 11177c478bd9Sstevel@tonic-gate 11187c478bd9Sstevel@tonic-gate /* 11190209230bSgjelinek * Determine if the global page scanner is running, during which no memory 11200209230bSgjelinek * caps should be enforced, to prevent interference with the global page 11210209230bSgjelinek * scanner. 11220209230bSgjelinek */ 11230209230bSgjelinek static boolean_t 11240209230bSgjelinek is_global_scanner_running() 11250209230bSgjelinek { 11260209230bSgjelinek /* measure delta in page scan count */ 11270209230bSgjelinek static uint64_t new_sp = 0; 11280209230bSgjelinek static uint64_t old_sp = 0; 11290209230bSgjelinek boolean_t res = B_FALSE; 11300209230bSgjelinek 11310209230bSgjelinek if (get_globally_scanned_pages(&new_sp) == 0) { 11320209230bSgjelinek if (old_sp != 0 && (new_sp - old_sp) > 0) { 11330209230bSgjelinek debug("global memory pressure detected (%llu " 11340209230bSgjelinek "pages scanned since last interval)\n", 11350209230bSgjelinek (unsigned long long)(new_sp - old_sp)); 11360209230bSgjelinek res = B_TRUE; 11370209230bSgjelinek } 11380209230bSgjelinek old_sp = new_sp; 11390209230bSgjelinek } else { 11400209230bSgjelinek warn(gettext("unable to read cpu statistics")); 11410209230bSgjelinek new_sp = old_sp; 11420209230bSgjelinek } 11430209230bSgjelinek 11440209230bSgjelinek return (res); 11450209230bSgjelinek } 11460209230bSgjelinek 11470209230bSgjelinek /* 11480209230bSgjelinek * If soft caps are in use, determine if global memory pressure exceeds the 11490209230bSgjelinek * configured maximum above which soft caps are enforced. 11500209230bSgjelinek */ 11510209230bSgjelinek static boolean_t 11520209230bSgjelinek must_enforce_soft_caps() 11530209230bSgjelinek { 11540209230bSgjelinek /* 11550209230bSgjelinek * Check for changes to the amount of installed physical memory, to 11560209230bSgjelinek * compute the current memory pressure. 11570209230bSgjelinek */ 11580209230bSgjelinek update_phys_total(); 11590209230bSgjelinek 11600209230bSgjelinek memory_pressure = 100 - (int)((sysconf(_SC_AVPHYS_PAGES) * page_size_kb) 11610209230bSgjelinek * 100.0 / phys_total); 11620209230bSgjelinek memory_pressure_sample++; 11630209230bSgjelinek if (rcfg.rcfg_memory_cap_enforcement_pressure > 0 && 11640209230bSgjelinek memory_pressure > rcfg.rcfg_memory_cap_enforcement_pressure) { 11650209230bSgjelinek return (B_TRUE); 11660209230bSgjelinek } 11670209230bSgjelinek 11680209230bSgjelinek return (B_FALSE); 11690209230bSgjelinek } 11700209230bSgjelinek 11710209230bSgjelinek /* 11727c478bd9Sstevel@tonic-gate * Update the shared statistics file with each collection's current statistics. 11737c478bd9Sstevel@tonic-gate * Return zero on success. 11747c478bd9Sstevel@tonic-gate */ 11757c478bd9Sstevel@tonic-gate static int 11767c478bd9Sstevel@tonic-gate update_statistics(void) 11777c478bd9Sstevel@tonic-gate { 11787c478bd9Sstevel@tonic-gate int fd, res; 11797c478bd9Sstevel@tonic-gate static char template[LINELEN]; 118094a877c4Sgm149974 118194a877c4Sgm149974 /* 1182c4d5c63eSgm149974 * Try to create a directory irrespective of whether it is existing 1183c4d5c63eSgm149974 * or not. If it is not there then it will create. Otherwise any way 1184c4d5c63eSgm149974 * it will fail at mkstemp call below. 1185c4d5c63eSgm149974 */ 1186c4d5c63eSgm149974 (void) mkdir(STAT_FILE_DIR, 0755); 1187c4d5c63eSgm149974 1188c4d5c63eSgm149974 /* 11897c478bd9Sstevel@tonic-gate * Create a temporary file. 11907c478bd9Sstevel@tonic-gate */ 11917c478bd9Sstevel@tonic-gate if (sizeof (template) < (strlen(rcfg.rcfg_stat_file) + 11927c478bd9Sstevel@tonic-gate strlen(STAT_TEMPLATE_SUFFIX) + 1)) { 11937c478bd9Sstevel@tonic-gate debug("temporary file template size too small\n"); 11947c478bd9Sstevel@tonic-gate return (-1); 11957c478bd9Sstevel@tonic-gate } 11967c478bd9Sstevel@tonic-gate (void) strcpy(template, rcfg.rcfg_stat_file); 11977c478bd9Sstevel@tonic-gate (void) strcat(template, STAT_TEMPLATE_SUFFIX); 11987c478bd9Sstevel@tonic-gate (void) rfd_reserve(1); 11997c478bd9Sstevel@tonic-gate fd = mkstemp(template); 12007c478bd9Sstevel@tonic-gate 12017c478bd9Sstevel@tonic-gate /* 12027c478bd9Sstevel@tonic-gate * Write the header and per-collection statistics. 12037c478bd9Sstevel@tonic-gate */ 12047c478bd9Sstevel@tonic-gate if (fd >= 0) { 12057c478bd9Sstevel@tonic-gate rcapd_stat_hdr_t rs; 12067c478bd9Sstevel@tonic-gate 12077c478bd9Sstevel@tonic-gate rs.rs_pid = rcapd_pid; 12087c478bd9Sstevel@tonic-gate rs.rs_time = gethrtime(); 12097c478bd9Sstevel@tonic-gate ASSERT(sizeof (rs.rs_mode) > strlen(rcfg.rcfg_mode_name)); 12107c478bd9Sstevel@tonic-gate (void) strcpy(rs.rs_mode, rcfg.rcfg_mode_name); 12117c478bd9Sstevel@tonic-gate rs.rs_pressure_cur = memory_pressure; 12127c478bd9Sstevel@tonic-gate rs.rs_pressure_cap = rcfg.rcfg_memory_cap_enforcement_pressure; 12137c478bd9Sstevel@tonic-gate rs.rs_pressure_sample = memory_pressure_sample; 12147c478bd9Sstevel@tonic-gate 12157c478bd9Sstevel@tonic-gate if (fchmod(fd, 0644) == 0 && write(fd, &rs, sizeof (rs)) == 12167c478bd9Sstevel@tonic-gate sizeof (rs)) { 12177c478bd9Sstevel@tonic-gate list_walk_collection(report_collection_cb, 12187c478bd9Sstevel@tonic-gate (void *)(intptr_t)fd); 12197c478bd9Sstevel@tonic-gate /* 12207c478bd9Sstevel@tonic-gate * Replace the existing statistics file with this new 12217c478bd9Sstevel@tonic-gate * one. 12227c478bd9Sstevel@tonic-gate */ 12237c478bd9Sstevel@tonic-gate res = rename(template, rcfg.rcfg_stat_file); 12247c478bd9Sstevel@tonic-gate } else 12257c478bd9Sstevel@tonic-gate res = -1; 12267c478bd9Sstevel@tonic-gate (void) close(fd); 12277c478bd9Sstevel@tonic-gate } else 12287c478bd9Sstevel@tonic-gate res = -1; 12297c478bd9Sstevel@tonic-gate 12307c478bd9Sstevel@tonic-gate return (res); 12317c478bd9Sstevel@tonic-gate } 12327c478bd9Sstevel@tonic-gate 12337c478bd9Sstevel@tonic-gate /* 12347c478bd9Sstevel@tonic-gate * Verify the statistics file can be created and written to, and die if an 12357c478bd9Sstevel@tonic-gate * existing file may be in use by another rcapd. 12367c478bd9Sstevel@tonic-gate */ 12377c478bd9Sstevel@tonic-gate static int 12387c478bd9Sstevel@tonic-gate verify_statistics(void) 12397c478bd9Sstevel@tonic-gate { 12407c478bd9Sstevel@tonic-gate pid_t pid; 12417c478bd9Sstevel@tonic-gate 12427c478bd9Sstevel@tonic-gate /* 12437c478bd9Sstevel@tonic-gate * Warn if another instance of rcapd might be active. 12447c478bd9Sstevel@tonic-gate */ 12457c478bd9Sstevel@tonic-gate (void) rfd_reserve(1); 12467c478bd9Sstevel@tonic-gate pid = stat_get_rcapd_pid(rcfg.rcfg_stat_file); 12477c478bd9Sstevel@tonic-gate if (pid != rcapd_pid && pid != -1) 12487c478bd9Sstevel@tonic-gate die(gettext("%s exists; rcapd may already be active\n"), 12497c478bd9Sstevel@tonic-gate rcfg.rcfg_stat_file); 12507c478bd9Sstevel@tonic-gate 12517c478bd9Sstevel@tonic-gate return (update_statistics()); 12527c478bd9Sstevel@tonic-gate } 12537c478bd9Sstevel@tonic-gate 12547c478bd9Sstevel@tonic-gate static int 12557c478bd9Sstevel@tonic-gate sum_excess_cb(lcollection_t *lcol, void *arg) 12567c478bd9Sstevel@tonic-gate { 12577c478bd9Sstevel@tonic-gate uint64_t *sum_excess = arg; 12587c478bd9Sstevel@tonic-gate 12597c478bd9Sstevel@tonic-gate *sum_excess += MAX((int64_t)0, (int64_t)(lcol->lcol_rss - 12607c478bd9Sstevel@tonic-gate lcol->lcol_rss_cap)); 12617c478bd9Sstevel@tonic-gate return (0); 12627c478bd9Sstevel@tonic-gate } 12637c478bd9Sstevel@tonic-gate 12640209230bSgjelinek /* 12650209230bSgjelinek * Compute the quantity of memory (in kilobytes) above the cap enforcement 12660209230bSgjelinek * pressure. Set the scan goal to that quantity (or at most the excess). 12670209230bSgjelinek */ 12680209230bSgjelinek static void 12690209230bSgjelinek compute_soft_scan_goal(soft_scan_arg_t *argp) 12700209230bSgjelinek { 12710209230bSgjelinek /* 12720209230bSgjelinek * Compute the sum of the collections' excesses, which will be the 12730209230bSgjelinek * denominator. 12740209230bSgjelinek */ 12750209230bSgjelinek argp->ssa_sum_excess = 0; 12760209230bSgjelinek list_walk_collection(sum_excess_cb, &(argp->ssa_sum_excess)); 12770209230bSgjelinek 12780209230bSgjelinek argp->ssa_scan_goal = MIN((sysconf(_SC_PHYS_PAGES) * 12790209230bSgjelinek (100 - rcfg.rcfg_memory_cap_enforcement_pressure) / 100 - 12800209230bSgjelinek sysconf(_SC_AVPHYS_PAGES)) * page_size_kb, 12810209230bSgjelinek argp->ssa_sum_excess); 12820209230bSgjelinek } 12830209230bSgjelinek 12847c478bd9Sstevel@tonic-gate static void 12857c478bd9Sstevel@tonic-gate rcapd_usage(void) 12867c478bd9Sstevel@tonic-gate { 12877c478bd9Sstevel@tonic-gate info(gettext("usage: rcapd [-d]\n")); 12887c478bd9Sstevel@tonic-gate } 12897c478bd9Sstevel@tonic-gate 12907c478bd9Sstevel@tonic-gate void 12917c478bd9Sstevel@tonic-gate check_update_statistics(void) 12927c478bd9Sstevel@tonic-gate { 12937c478bd9Sstevel@tonic-gate hrtime_t now = gethrtime(); 12947c478bd9Sstevel@tonic-gate 12957c478bd9Sstevel@tonic-gate if (EVENT_TIME(now, next_report)) { 12967c478bd9Sstevel@tonic-gate debug("updating statistics...\n"); 12977c478bd9Sstevel@tonic-gate list_walk_collection(simple_report_collection_cb, NULL); 12987c478bd9Sstevel@tonic-gate if (update_statistics() != 0) 12997c478bd9Sstevel@tonic-gate debug("couldn't update statistics"); 13007c478bd9Sstevel@tonic-gate next_report = NEXT_REPORT_EVENT_TIME(now, 13017c478bd9Sstevel@tonic-gate rcfg.rcfg_report_interval); 13027c478bd9Sstevel@tonic-gate } 13037c478bd9Sstevel@tonic-gate } 13047c478bd9Sstevel@tonic-gate 13057c478bd9Sstevel@tonic-gate static void 13067c478bd9Sstevel@tonic-gate verify_and_set_privileges(void) 13077c478bd9Sstevel@tonic-gate { 13087c478bd9Sstevel@tonic-gate priv_set_t *required = 13097c478bd9Sstevel@tonic-gate priv_str_to_set("zone,sys_resource,proc_owner", ",", NULL); 13107c478bd9Sstevel@tonic-gate 13117c478bd9Sstevel@tonic-gate /* 13127c478bd9Sstevel@tonic-gate * Ensure the required privileges, suitable for controlling processes, 13137c478bd9Sstevel@tonic-gate * are possessed. 13147c478bd9Sstevel@tonic-gate */ 13157c478bd9Sstevel@tonic-gate if (setppriv(PRIV_SET, PRIV_PERMITTED, required) != 0 || setppriv( 13167c478bd9Sstevel@tonic-gate PRIV_SET, PRIV_EFFECTIVE, required) != 0) 13177c478bd9Sstevel@tonic-gate die(gettext("can't set requisite privileges")); 13187c478bd9Sstevel@tonic-gate 13197c478bd9Sstevel@tonic-gate /* 13207c478bd9Sstevel@tonic-gate * Ensure access to /var/run/daemon. 13217c478bd9Sstevel@tonic-gate */ 13227c478bd9Sstevel@tonic-gate if (setreuid(DAEMON_UID, DAEMON_UID) != 0) 13237c478bd9Sstevel@tonic-gate die(gettext("cannot become user daemon")); 13247c478bd9Sstevel@tonic-gate 13257c478bd9Sstevel@tonic-gate priv_freeset(required); 13267c478bd9Sstevel@tonic-gate } 13277c478bd9Sstevel@tonic-gate 13280209230bSgjelinek /* 13290209230bSgjelinek * This function does the top-level work to determine if we should do any 13300209230bSgjelinek * memory capping, and if so, it invokes the right call-backs to do the work. 13310209230bSgjelinek */ 13320209230bSgjelinek static void 13330209230bSgjelinek do_capping(hrtime_t now, hrtime_t *next_proc_walk) 13340209230bSgjelinek { 13350209230bSgjelinek boolean_t enforce_caps; 13360209230bSgjelinek /* soft cap enforcement flag, depending on memory pressure */ 13370209230bSgjelinek boolean_t enforce_soft_caps; 13380209230bSgjelinek /* avoid interference with kernel's page scanner */ 13390209230bSgjelinek boolean_t global_scanner_running; 13400209230bSgjelinek sample_col_arg_t col_arg; 13410209230bSgjelinek soft_scan_arg_t arg; 13420209230bSgjelinek uint_t col_types = 0; 13430209230bSgjelinek 13440209230bSgjelinek /* check what kind of collections (project/zone) are capped */ 13450209230bSgjelinek list_walk_collection(col_type_cb, &col_types); 13460209230bSgjelinek debug("collection types: 0x%x\n", col_types); 13470209230bSgjelinek 13480209230bSgjelinek /* no capped collections, skip checking rss */ 13490209230bSgjelinek if (col_types == 0) 13500209230bSgjelinek return; 13510209230bSgjelinek 13520209230bSgjelinek /* Determine if soft caps are enforced. */ 13530209230bSgjelinek enforce_soft_caps = must_enforce_soft_caps(); 13540209230bSgjelinek 13550209230bSgjelinek /* Determine if the global page scanner is running. */ 13560209230bSgjelinek global_scanner_running = is_global_scanner_running(); 13570209230bSgjelinek 13580209230bSgjelinek /* 13590209230bSgjelinek * Sample collections' member processes RSSes and recompute 13600209230bSgjelinek * collections' excess. 13610209230bSgjelinek */ 13620209230bSgjelinek rss_sample(B_FALSE, col_types); 13630209230bSgjelinek 13640209230bSgjelinek col_arg.sca_any_over_cap = B_FALSE; 13650209230bSgjelinek col_arg.sca_project_over_cap = B_FALSE; 13660209230bSgjelinek list_walk_collection(rss_sample_col_cb, &col_arg); 13670209230bSgjelinek list_walk_collection(excess_print_cb, NULL); 13680209230bSgjelinek debug("any collection/project over cap = %d, %d\n", 13690209230bSgjelinek col_arg.sca_any_over_cap, col_arg.sca_project_over_cap); 13700209230bSgjelinek 13710209230bSgjelinek if (enforce_soft_caps) 13720209230bSgjelinek debug("memory pressure %d%%\n", memory_pressure); 13730209230bSgjelinek 13740209230bSgjelinek /* 13750209230bSgjelinek * Cap enforcement is determined by the previous conditions. 13760209230bSgjelinek */ 13770209230bSgjelinek enforce_caps = !global_scanner_running && col_arg.sca_any_over_cap && 13780209230bSgjelinek (rcfg.rcfg_memory_cap_enforcement_pressure == 0 || 13790209230bSgjelinek enforce_soft_caps); 13800209230bSgjelinek 13810209230bSgjelinek debug("%senforcing caps\n", enforce_caps ? "" : "not "); 13820209230bSgjelinek 13830209230bSgjelinek /* 13840209230bSgjelinek * If soft caps are in use, determine the size of the portion from each 13850209230bSgjelinek * collection to scan for. 13860209230bSgjelinek */ 13870209230bSgjelinek if (enforce_caps && enforce_soft_caps) 13880209230bSgjelinek compute_soft_scan_goal(&arg); 13890209230bSgjelinek 13900209230bSgjelinek /* 13910209230bSgjelinek * Victimize offending collections. 13920209230bSgjelinek */ 13930209230bSgjelinek if (enforce_caps && (!enforce_soft_caps || 13940209230bSgjelinek (arg.ssa_scan_goal > 0 && arg.ssa_sum_excess > 0))) { 13950209230bSgjelinek 13960209230bSgjelinek /* 13970209230bSgjelinek * Since at least one collection is over its cap & needs 13980209230bSgjelinek * enforcing, check if it is at least time for a process walk 13990209230bSgjelinek * (we could be well past time since we only walk /proc when 14000209230bSgjelinek * we need to) and if so, update each collections process list 14010209230bSgjelinek * in a single pass through /proc. 14020209230bSgjelinek */ 14030209230bSgjelinek if (EVENT_TIME(now, *next_proc_walk)) { 14040209230bSgjelinek debug("scanning process list...\n"); 14050209230bSgjelinek proc_walk_all(proc_cb); /* insert & mark */ 14060209230bSgjelinek list_walk_all(sweep_process_cb); /* free dead procs */ 14070209230bSgjelinek *next_proc_walk = NEXT_EVENT_TIME(now, 14080209230bSgjelinek rcfg.rcfg_proc_walk_interval); 14090209230bSgjelinek } 14100209230bSgjelinek 14110209230bSgjelinek gz_col = NULL; 14120209230bSgjelinek if (enforce_soft_caps) { 14130209230bSgjelinek debug("scan goal is %lldKB\n", 14140209230bSgjelinek (long long)arg.ssa_scan_goal); 14150209230bSgjelinek list_walk_collection(soft_scan_cb, &arg); 14160209230bSgjelinek if (gz_capped && gz_col != NULL) { 14170209230bSgjelinek /* process global zone */ 14180209230bSgjelinek arg.ssa_project_over_cap = 14190209230bSgjelinek col_arg.sca_project_over_cap; 14200209230bSgjelinek soft_scan_gz(gz_col, &arg); 14210209230bSgjelinek } 14220209230bSgjelinek } else { 14230209230bSgjelinek list_walk_collection(scan_cb, NULL); 14240209230bSgjelinek if (gz_capped && gz_col != NULL) { 14250209230bSgjelinek /* process global zone */ 14260209230bSgjelinek scan_gz(gz_col, col_arg.sca_project_over_cap); 14270209230bSgjelinek } 14280209230bSgjelinek } 14290209230bSgjelinek } else if (col_arg.sca_any_over_cap) { 14300209230bSgjelinek list_walk_collection(unenforced_cap_cb, NULL); 14310209230bSgjelinek } 14320209230bSgjelinek } 14330209230bSgjelinek 14347c478bd9Sstevel@tonic-gate int 14357c478bd9Sstevel@tonic-gate main(int argc, char *argv[]) 14367c478bd9Sstevel@tonic-gate { 14377c478bd9Sstevel@tonic-gate int res; 14387c478bd9Sstevel@tonic-gate int should_fork = 1; /* fork flag */ 14397c478bd9Sstevel@tonic-gate hrtime_t now; /* current time */ 14407c478bd9Sstevel@tonic-gate hrtime_t next; /* time of next event */ 14417c478bd9Sstevel@tonic-gate int sig; /* signal iteration */ 14427c478bd9Sstevel@tonic-gate struct rlimit rl; 14437c478bd9Sstevel@tonic-gate hrtime_t next_proc_walk; /* time of next /proc scan */ 14447c478bd9Sstevel@tonic-gate hrtime_t next_configuration; /* time of next configuration */ 14457c478bd9Sstevel@tonic-gate hrtime_t next_rss_sample; /* (latest) time of next RSS sample */ 14467c478bd9Sstevel@tonic-gate 14477c478bd9Sstevel@tonic-gate (void) set_message_priority(RCM_INFO); 144823a1cceaSRoger A. Faulkner (void) setpname("rcapd"); 14497c478bd9Sstevel@tonic-gate rcapd_pid = getpid(); 14507c478bd9Sstevel@tonic-gate (void) chdir("/"); 14517c478bd9Sstevel@tonic-gate should_run = 1; 14527c478bd9Sstevel@tonic-gate ever_ran = 0; 14537c478bd9Sstevel@tonic-gate 14547c478bd9Sstevel@tonic-gate (void) setlocale(LC_ALL, ""); 14557c478bd9Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN); 14567c478bd9Sstevel@tonic-gate 14577c478bd9Sstevel@tonic-gate /* 14587c478bd9Sstevel@tonic-gate * Parse command-line options. 14597c478bd9Sstevel@tonic-gate */ 14607c478bd9Sstevel@tonic-gate while ((res = getopt(argc, argv, "dF")) > 0) 14617c478bd9Sstevel@tonic-gate switch (res) { 14627c478bd9Sstevel@tonic-gate case 'd': 14637c478bd9Sstevel@tonic-gate should_fork = 0; 14647c478bd9Sstevel@tonic-gate if (debug_mode == 0) { 14657c478bd9Sstevel@tonic-gate debug_mode = 1; 14667c478bd9Sstevel@tonic-gate (void) set_message_priority(RCM_DEBUG); 14677c478bd9Sstevel@tonic-gate } else 14687c478bd9Sstevel@tonic-gate (void) set_message_priority(RCM_DEBUG_HIGH); 14697c478bd9Sstevel@tonic-gate break; 14707c478bd9Sstevel@tonic-gate case 'F': 14717c478bd9Sstevel@tonic-gate should_fork = 0; 14727c478bd9Sstevel@tonic-gate break; 14737c478bd9Sstevel@tonic-gate default: 14747c478bd9Sstevel@tonic-gate rcapd_usage(); 14757c478bd9Sstevel@tonic-gate return (E_USAGE); 14767c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 14777c478bd9Sstevel@tonic-gate } 14787c478bd9Sstevel@tonic-gate 14797c478bd9Sstevel@tonic-gate /* 1480d75e6a5dStn143363 * Read the configuration. 1481d75e6a5dStn143363 */ 1482d75e6a5dStn143363 if (rcfg_read(&rcfg, verify_statistics) != E_SUCCESS) { 1483d75e6a5dStn143363 warn(gettext("resource caps not configured\n")); 1484d75e6a5dStn143363 return (SMF_EXIT_ERR_CONFIG); 1485d75e6a5dStn143363 } 1486d75e6a5dStn143363 1487d75e6a5dStn143363 /* 14887c478bd9Sstevel@tonic-gate * If not debugging, fork and continue operating, changing the 14897c478bd9Sstevel@tonic-gate * destination of messages to syslog(). 14907c478bd9Sstevel@tonic-gate */ 14917c478bd9Sstevel@tonic-gate if (should_fork == 1) { 14927c478bd9Sstevel@tonic-gate pid_t child; 14937c478bd9Sstevel@tonic-gate debug("forking\n"); 14947c478bd9Sstevel@tonic-gate child = fork(); 14957c478bd9Sstevel@tonic-gate if (child == -1) 14967c478bd9Sstevel@tonic-gate die(gettext("cannot fork")); 14977c478bd9Sstevel@tonic-gate if (child > 0) 14987c478bd9Sstevel@tonic-gate return (0); 14997c478bd9Sstevel@tonic-gate else { 15007c478bd9Sstevel@tonic-gate rcapd_pid = getpid(); 15017c478bd9Sstevel@tonic-gate (void) set_message_destination(RCD_SYSLOG); 15027c478bd9Sstevel@tonic-gate (void) fclose(stdin); 15037c478bd9Sstevel@tonic-gate (void) fclose(stdout); 15047c478bd9Sstevel@tonic-gate (void) fclose(stderr); 15057c478bd9Sstevel@tonic-gate } 15067c478bd9Sstevel@tonic-gate /* 15077c478bd9Sstevel@tonic-gate * Start a new session and detatch from the controlling tty. 15087c478bd9Sstevel@tonic-gate */ 15097c478bd9Sstevel@tonic-gate if (setsid() == (pid_t)-1) 15107c478bd9Sstevel@tonic-gate debug(gettext("setsid() failed; cannot detach from " 15117c478bd9Sstevel@tonic-gate "terminal")); 15127c478bd9Sstevel@tonic-gate } 15137c478bd9Sstevel@tonic-gate 15147c478bd9Sstevel@tonic-gate finish_configuration(); 15157c478bd9Sstevel@tonic-gate should_reconfigure = 0; 15167c478bd9Sstevel@tonic-gate 15177c478bd9Sstevel@tonic-gate /* 15187c478bd9Sstevel@tonic-gate * Check that required privileges are possessed. 15197c478bd9Sstevel@tonic-gate */ 15207c478bd9Sstevel@tonic-gate verify_and_set_privileges(); 15217c478bd9Sstevel@tonic-gate 15227c478bd9Sstevel@tonic-gate now = next_report = next_proc_walk = next_rss_sample = gethrtime(); 15237c478bd9Sstevel@tonic-gate next_configuration = NEXT_EVENT_TIME(gethrtime(), 15247c478bd9Sstevel@tonic-gate rcfg.rcfg_reconfiguration_interval); 15257c478bd9Sstevel@tonic-gate 15267c478bd9Sstevel@tonic-gate /* 15277c478bd9Sstevel@tonic-gate * Open the kstat chain. 15287c478bd9Sstevel@tonic-gate */ 15297c478bd9Sstevel@tonic-gate kctl = kstat_open(); 15307c478bd9Sstevel@tonic-gate if (kctl == NULL) 15317c478bd9Sstevel@tonic-gate die(gettext("can't open kstats")); 15327c478bd9Sstevel@tonic-gate 15337c478bd9Sstevel@tonic-gate /* 15347c478bd9Sstevel@tonic-gate * Set RLIMIT_NOFILE as high as practical, so roughly 10K processes can 15357c478bd9Sstevel@tonic-gate * be effectively managed without revoking descriptors (at 3 per 15367c478bd9Sstevel@tonic-gate * process). 15377c478bd9Sstevel@tonic-gate */ 15387c478bd9Sstevel@tonic-gate rl.rlim_cur = 32 * 1024; 15397c478bd9Sstevel@tonic-gate rl.rlim_max = 32 * 1024; 15407c478bd9Sstevel@tonic-gate if (setrlimit(RLIMIT_NOFILE, &rl) != 0 && 15417c478bd9Sstevel@tonic-gate getrlimit(RLIMIT_NOFILE, &rl) == 0) { 15427c478bd9Sstevel@tonic-gate rl.rlim_cur = rl.rlim_max; 15437c478bd9Sstevel@tonic-gate (void) setrlimit(RLIMIT_NOFILE, &rl); 15447c478bd9Sstevel@tonic-gate } 1545004388ebScasper (void) enable_extended_FILE_stdio(-1, -1); 1546004388ebScasper 15477c478bd9Sstevel@tonic-gate if (getrlimit(RLIMIT_NOFILE, &rl) == 0) 15487c478bd9Sstevel@tonic-gate debug("fd limit: %lu\n", rl.rlim_cur); 15497c478bd9Sstevel@tonic-gate else 15507c478bd9Sstevel@tonic-gate debug("fd limit: unknown\n"); 15517c478bd9Sstevel@tonic-gate 15520209230bSgjelinek get_page_size(); 15530209230bSgjelinek my_zoneid = getzoneid(); 15540209230bSgjelinek 15557c478bd9Sstevel@tonic-gate /* 15567c478bd9Sstevel@tonic-gate * Handle those signals whose (default) exit disposition 15577c478bd9Sstevel@tonic-gate * prevents rcapd from finishing scanning before terminating. 15587c478bd9Sstevel@tonic-gate */ 15597c478bd9Sstevel@tonic-gate (void) sigset(SIGINT, terminate_signal); 15607c478bd9Sstevel@tonic-gate (void) sigset(SIGQUIT, abort_signal); 15617c478bd9Sstevel@tonic-gate (void) sigset(SIGILL, abort_signal); 15627c478bd9Sstevel@tonic-gate (void) sigset(SIGEMT, abort_signal); 15637c478bd9Sstevel@tonic-gate (void) sigset(SIGFPE, abort_signal); 15647c478bd9Sstevel@tonic-gate (void) sigset(SIGBUS, abort_signal); 15657c478bd9Sstevel@tonic-gate (void) sigset(SIGSEGV, abort_signal); 15667c478bd9Sstevel@tonic-gate (void) sigset(SIGSYS, abort_signal); 15677c478bd9Sstevel@tonic-gate (void) sigset(SIGPIPE, terminate_signal); 15687c478bd9Sstevel@tonic-gate (void) sigset(SIGALRM, terminate_signal); 15697c478bd9Sstevel@tonic-gate (void) sigset(SIGTERM, terminate_signal); 15707c478bd9Sstevel@tonic-gate (void) sigset(SIGUSR1, terminate_signal); 15717c478bd9Sstevel@tonic-gate (void) sigset(SIGUSR2, terminate_signal); 15727c478bd9Sstevel@tonic-gate (void) sigset(SIGPOLL, terminate_signal); 15737c478bd9Sstevel@tonic-gate (void) sigset(SIGVTALRM, terminate_signal); 15747c478bd9Sstevel@tonic-gate (void) sigset(SIGXCPU, abort_signal); 15757c478bd9Sstevel@tonic-gate (void) sigset(SIGXFSZ, abort_signal); 15767c478bd9Sstevel@tonic-gate for (sig = SIGRTMIN; sig <= SIGRTMAX; sig++) 15777c478bd9Sstevel@tonic-gate (void) sigset(sig, terminate_signal); 15787c478bd9Sstevel@tonic-gate 15797c478bd9Sstevel@tonic-gate /* 15807c478bd9Sstevel@tonic-gate * Install a signal handler for reconfiguration processing. 15817c478bd9Sstevel@tonic-gate */ 15827c478bd9Sstevel@tonic-gate (void) sigset(SIGHUP, sighup); 15837c478bd9Sstevel@tonic-gate 15847c478bd9Sstevel@tonic-gate /* 15857c478bd9Sstevel@tonic-gate * Determine which process collections to cap. 15867c478bd9Sstevel@tonic-gate */ 15877c478bd9Sstevel@tonic-gate lcollection_update(LCU_COMPLETE); 15887c478bd9Sstevel@tonic-gate 15897c478bd9Sstevel@tonic-gate /* 15907c478bd9Sstevel@tonic-gate * Loop forever, monitoring collections' resident set sizes and 15910209230bSgjelinek * enforcing their caps. Look for changes in caps as well as 15920209230bSgjelinek * responding to requests to reread the configuration. Update 15930209230bSgjelinek * per-collection statistics periodically. 15947c478bd9Sstevel@tonic-gate */ 15957c478bd9Sstevel@tonic-gate while (should_run != 0) { 15967c478bd9Sstevel@tonic-gate struct timespec ts; 15977c478bd9Sstevel@tonic-gate 15987c478bd9Sstevel@tonic-gate /* 15997c478bd9Sstevel@tonic-gate * Announce that rcapd is starting. 16007c478bd9Sstevel@tonic-gate */ 16017c478bd9Sstevel@tonic-gate if (ever_ran == 0) { 16027c478bd9Sstevel@tonic-gate info(gettext("starting\n")); 16037c478bd9Sstevel@tonic-gate ever_ran = 1; 16047c478bd9Sstevel@tonic-gate } 16057c478bd9Sstevel@tonic-gate 16067c478bd9Sstevel@tonic-gate /* 16070209230bSgjelinek * Check the configuration at every next_configuration interval. 16080209230bSgjelinek * Update the rss data once every next_rss_sample interval. 16090209230bSgjelinek * The condition of global memory pressure is also checked at 16100209230bSgjelinek * the same frequency, if strict caps are in use. 16117c478bd9Sstevel@tonic-gate */ 16127c478bd9Sstevel@tonic-gate now = gethrtime(); 16137c478bd9Sstevel@tonic-gate 16147c478bd9Sstevel@tonic-gate /* 1615d75e6a5dStn143363 * Detect configuration and cap changes only when SIGHUP 1616d75e6a5dStn143363 * is received. Call reconfigure to apply new configuration 1617d75e6a5dStn143363 * parameters. 16187c478bd9Sstevel@tonic-gate */ 1619d75e6a5dStn143363 if (should_reconfigure == 1) { 1620d75e6a5dStn143363 reread_configuration(); 1621d75e6a5dStn143363 should_reconfigure = 0; 16220209230bSgjelinek reconfigure(now, &next_configuration, &next_proc_walk, 16230209230bSgjelinek &next_rss_sample); 1624d75e6a5dStn143363 } 1625d75e6a5dStn143363 1626d75e6a5dStn143363 if (EVENT_TIME(now, next_configuration)) { 1627d75e6a5dStn143363 reconfigure(now, &next_configuration, &next_proc_walk, 1628d75e6a5dStn143363 &next_rss_sample); 16297c478bd9Sstevel@tonic-gate } 16307c478bd9Sstevel@tonic-gate 16310209230bSgjelinek /* 16320209230bSgjelinek * Do the main work for enforcing caps. 16330209230bSgjelinek */ 16347c478bd9Sstevel@tonic-gate if (EVENT_TIME(now, next_rss_sample)) { 16350209230bSgjelinek do_capping(now, &next_proc_walk); 16367c478bd9Sstevel@tonic-gate 16377c478bd9Sstevel@tonic-gate next_rss_sample = NEXT_EVENT_TIME(now, 16387c478bd9Sstevel@tonic-gate rcfg.rcfg_rss_sample_interval); 16397c478bd9Sstevel@tonic-gate } 16407c478bd9Sstevel@tonic-gate 16417c478bd9Sstevel@tonic-gate /* 16427c478bd9Sstevel@tonic-gate * Update the statistics file, if it's time. 16437c478bd9Sstevel@tonic-gate */ 16447c478bd9Sstevel@tonic-gate check_update_statistics(); 16457c478bd9Sstevel@tonic-gate 16467c478bd9Sstevel@tonic-gate /* 16477c478bd9Sstevel@tonic-gate * Sleep for some time before repeating. 16487c478bd9Sstevel@tonic-gate */ 16497c478bd9Sstevel@tonic-gate now = gethrtime(); 16507c478bd9Sstevel@tonic-gate next = next_configuration; 16517c478bd9Sstevel@tonic-gate next = POSITIVE_MIN(next, next_report); 16527c478bd9Sstevel@tonic-gate next = POSITIVE_MIN(next, next_rss_sample); 16537c478bd9Sstevel@tonic-gate if (next > now && should_run != 0) { 16547c478bd9Sstevel@tonic-gate debug("sleeping %-4.2f seconds\n", (float)(next - 16557c478bd9Sstevel@tonic-gate now) / (float)NANOSEC); 16567c478bd9Sstevel@tonic-gate hrt2ts(next - now, &ts); 16577c478bd9Sstevel@tonic-gate (void) nanosleep(&ts, NULL); 16587c478bd9Sstevel@tonic-gate } 16597c478bd9Sstevel@tonic-gate } 16607c478bd9Sstevel@tonic-gate if (termination_signal != 0) 16617c478bd9Sstevel@tonic-gate debug("exiting due to signal %d\n", termination_signal); 16627c478bd9Sstevel@tonic-gate if (ever_ran != 0) 16637c478bd9Sstevel@tonic-gate info(gettext("exiting\n")); 16647c478bd9Sstevel@tonic-gate 16657c478bd9Sstevel@tonic-gate /* 16667c478bd9Sstevel@tonic-gate * Unlink the statistics file before exiting. 16677c478bd9Sstevel@tonic-gate */ 16687c478bd9Sstevel@tonic-gate if (rcfg.rcfg_stat_file[0] != 0) 16697c478bd9Sstevel@tonic-gate (void) unlink(rcfg.rcfg_stat_file); 16707c478bd9Sstevel@tonic-gate 16717c478bd9Sstevel@tonic-gate return (E_SUCCESS); 16727c478bd9Sstevel@tonic-gate } 1673