xref: /titanic_50/usr/src/cmd/rcap/rcapd/rcapd_main.c (revision 19449258028e6813f0b7a606b554b2fa37a390ec)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5004388ebScasper  * Common Development and Distribution License (the "License").
6004388ebScasper  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate 
2223a1cceaSRoger A. Faulkner /*
2323a1cceaSRoger A. Faulkner  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
2423a1cceaSRoger A. Faulkner  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * rcapd is a long-running daemon enforcing project-based resource caps (see
287c478bd9Sstevel@tonic-gate  * rcapd(1M)).  Each instance of a process aggregate (project or, generically,
297c478bd9Sstevel@tonic-gate  * "collection") may have a memory cap.  A single thread monitors the resource
307c478bd9Sstevel@tonic-gate  * utilization of capped collections, enforces caps when they are exceeded (and
317c478bd9Sstevel@tonic-gate  * other conditions are met), and incorporates changes in configuration or
327c478bd9Sstevel@tonic-gate  * caps.  Each of these actions occurs not more frequently than the rate
337c478bd9Sstevel@tonic-gate  * specified with rcapadm(1M).
347c478bd9Sstevel@tonic-gate  */
357c478bd9Sstevel@tonic-gate 
367c478bd9Sstevel@tonic-gate #include <sys/priocntl.h>
377c478bd9Sstevel@tonic-gate #include <sys/proc.h>
387c478bd9Sstevel@tonic-gate #include <sys/resource.h>
397c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h>
407c478bd9Sstevel@tonic-gate #include <sys/stat.h>
417c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
427c478bd9Sstevel@tonic-gate #include <sys/time.h>
437c478bd9Sstevel@tonic-gate #include <sys/types.h>
447c478bd9Sstevel@tonic-gate #include <dirent.h>
457c478bd9Sstevel@tonic-gate #include <errno.h>
467c478bd9Sstevel@tonic-gate #include <fcntl.h>
477c478bd9Sstevel@tonic-gate #include <kstat.h>
487c478bd9Sstevel@tonic-gate #include <libintl.h>
497c478bd9Sstevel@tonic-gate #include <limits.h>
507c478bd9Sstevel@tonic-gate #include <locale.h>
517c478bd9Sstevel@tonic-gate #include <priv.h>
527c478bd9Sstevel@tonic-gate #include <signal.h>
537c478bd9Sstevel@tonic-gate #include <stdarg.h>
547c478bd9Sstevel@tonic-gate #include <stdio.h>
55004388ebScasper #include <stdio_ext.h>
567c478bd9Sstevel@tonic-gate #include <stdlib.h>
57d75e6a5dStn143363 #include <libscf.h>
587c478bd9Sstevel@tonic-gate #include <strings.h>
597c478bd9Sstevel@tonic-gate #include <time.h>
607c478bd9Sstevel@tonic-gate #include <unistd.h>
617c478bd9Sstevel@tonic-gate #include <zone.h>
627c478bd9Sstevel@tonic-gate #include <assert.h>
630209230bSgjelinek #include <sys/vm_usage.h>
647c478bd9Sstevel@tonic-gate #include "rcapd.h"
657c478bd9Sstevel@tonic-gate #include "rcapd_mapping.h"
667c478bd9Sstevel@tonic-gate #include "rcapd_rfd.h"
677c478bd9Sstevel@tonic-gate #include "rcapd_stat.h"
687c478bd9Sstevel@tonic-gate #include "utils.h"
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate #define	POSITIVE_MIN(x, y) \
717c478bd9Sstevel@tonic-gate 	(((x) <= 0) ? (y) : ((y) <= 0) ? (x) : MIN(x, y))
727c478bd9Sstevel@tonic-gate #define	NEXT_EVENT_TIME(base, seconds) \
737c478bd9Sstevel@tonic-gate 	(((int)seconds > 0) ? (base + (hrtime_t)seconds * (hrtime_t)NANOSEC) \
747c478bd9Sstevel@tonic-gate 	: (hrtime_t)0)
757c478bd9Sstevel@tonic-gate #define	NEXT_REPORT_EVENT_TIME(base, seconds) \
767c478bd9Sstevel@tonic-gate 	((rcfg.rcfg_stat_file[0] != 0) ?  \
777c478bd9Sstevel@tonic-gate 	    NEXT_EVENT_TIME(gethrtime(), seconds) : (hrtime_t)0)
787c478bd9Sstevel@tonic-gate #define	EVENT_TIME(time, eventtime) \
797c478bd9Sstevel@tonic-gate 	(((time) > (eventtime)) && (eventtime) != 0)
807c478bd9Sstevel@tonic-gate #define	STAT_TEMPLATE_SUFFIX	".XXXXXX"	/* suffix of mkstemp() arg */
817c478bd9Sstevel@tonic-gate #define	DAEMON_UID		1		/* uid to use */
827c478bd9Sstevel@tonic-gate 
830209230bSgjelinek #define	CAPPED_PROJECT	0x01
840209230bSgjelinek #define	CAPPED_ZONE	0x02
850209230bSgjelinek 
867c478bd9Sstevel@tonic-gate typedef struct soft_scan_arg {
877c478bd9Sstevel@tonic-gate 	uint64_t ssa_sum_excess;
887c478bd9Sstevel@tonic-gate 	int64_t ssa_scan_goal;
890209230bSgjelinek 	boolean_t ssa_project_over_cap;
907c478bd9Sstevel@tonic-gate } soft_scan_arg_t;
917c478bd9Sstevel@tonic-gate 
920209230bSgjelinek typedef struct sample_col_arg {
930209230bSgjelinek 	boolean_t sca_any_over_cap;
940209230bSgjelinek 	boolean_t sca_project_over_cap;
950209230bSgjelinek } sample_col_arg_t;
960209230bSgjelinek 
970209230bSgjelinek 
987c478bd9Sstevel@tonic-gate static int debug_mode = 0;		/* debug mode flag */
997c478bd9Sstevel@tonic-gate static pid_t rcapd_pid;			/* rcapd's pid to ensure it's not */
1007c478bd9Sstevel@tonic-gate 					/* scanned */
1017c478bd9Sstevel@tonic-gate static kstat_ctl_t *kctl;		/* kstat chain */
1027c478bd9Sstevel@tonic-gate static int memory_pressure = 0;		/* physical memory utilization (%) */
1037c478bd9Sstevel@tonic-gate static int memory_pressure_sample = 0;	/* count of samples */
1040209230bSgjelinek static long page_size_kb = 0;		/* system page size in KB */
1050209230bSgjelinek static size_t nvmu_vals = 0;		/* # of kernel RSS/swap vals in array */
1060209230bSgjelinek static size_t vmu_vals_len = 0;		/* size of RSS/swap vals array */
1070209230bSgjelinek static vmusage_t *vmu_vals = NULL;	/* snapshot of kernel RSS/swap values */
1087c478bd9Sstevel@tonic-gate static hrtime_t next_report;		/* time of next report */
1097c478bd9Sstevel@tonic-gate static int termination_signal = 0;	/* terminating signal */
1100209230bSgjelinek static zoneid_t my_zoneid = (zoneid_t)-1;
1110209230bSgjelinek static lcollection_t *gz_col;		/* global zone collection */
1127c478bd9Sstevel@tonic-gate 
1137c478bd9Sstevel@tonic-gate rcfg_t rcfg;
1140209230bSgjelinek /*
1150209230bSgjelinek  * Updated when we re-read the collection configurations if this rcapd instance
1160209230bSgjelinek  * is running in the global zone and the global zone is capped.
1170209230bSgjelinek  */
1180209230bSgjelinek boolean_t gz_capped = B_FALSE;
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate /*
1217c478bd9Sstevel@tonic-gate  * Flags.
1227c478bd9Sstevel@tonic-gate  */
1237c478bd9Sstevel@tonic-gate static int ever_ran;
1247c478bd9Sstevel@tonic-gate int should_run;
1257c478bd9Sstevel@tonic-gate static int should_reconfigure;
1267c478bd9Sstevel@tonic-gate 
1277c478bd9Sstevel@tonic-gate static int verify_statistics(void);
1287c478bd9Sstevel@tonic-gate static int update_statistics(void);
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate /*
1310209230bSgjelinek  * Checks if a process is marked 'system'.  Returns FALSE only when it is not.
1327c478bd9Sstevel@tonic-gate  */
1330209230bSgjelinek static boolean_t
proc_issystem(pid_t pid)1347c478bd9Sstevel@tonic-gate proc_issystem(pid_t pid)
1357c478bd9Sstevel@tonic-gate {
1367c478bd9Sstevel@tonic-gate 	char pc_clname[PC_CLNMSZ];
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate 	if (priocntl(P_PID, pid, PC_GETXPARMS, NULL, PC_KY_CLNAME, pc_clname,
1397c478bd9Sstevel@tonic-gate 	    PC_KY_NULL) != -1) {
1407c478bd9Sstevel@tonic-gate 		return (strcmp(pc_clname, "SYS") == 0);
1417c478bd9Sstevel@tonic-gate 	} else {
1427c478bd9Sstevel@tonic-gate 		debug("cannot get class-specific scheduling parameters; "
1430209230bSgjelinek 		    "assuming system process\n");
1440209230bSgjelinek 		return (B_TRUE);
1457c478bd9Sstevel@tonic-gate 	}
1467c478bd9Sstevel@tonic-gate }
1477c478bd9Sstevel@tonic-gate 
1487c478bd9Sstevel@tonic-gate static void
lprocess_insert_mark(psinfo_t * psinfop)1490209230bSgjelinek lprocess_insert_mark(psinfo_t *psinfop)
1507c478bd9Sstevel@tonic-gate {
1510209230bSgjelinek 	pid_t pid = psinfop->pr_pid;
1520209230bSgjelinek 	/* flag indicating whether the process should be scanned. */
1530209230bSgjelinek 	int unscannable = psinfop->pr_nlwp == 0;
1540209230bSgjelinek 	rcid_t colid;
1557c478bd9Sstevel@tonic-gate 	lcollection_t *lcol;
1567c478bd9Sstevel@tonic-gate 	lprocess_t *lproc;
1577c478bd9Sstevel@tonic-gate 
1580209230bSgjelinek 	/*
1590209230bSgjelinek 	 * Determine which collection to put this process into.  We only have
1600209230bSgjelinek 	 * to worry about tracking both zone and project capped processes if
1610209230bSgjelinek 	 * this rcapd instance is running in the global zone, since we'll only
1620209230bSgjelinek 	 * see processes in our own projects in a non-global zone.  In the
1630209230bSgjelinek 	 * global zone, if the process belongs to a non-global zone, we only
1640209230bSgjelinek 	 * need to track it for the capped non-global zone collection.  For
1650209230bSgjelinek 	 * global zone processes, we first attempt to put the process into a
1660209230bSgjelinek 	 * capped project collection.  On the second pass into this function
1670209230bSgjelinek 	 * the projid will be cleared so we will just track the process for the
1680209230bSgjelinek 	 * global zone collection as a whole.
1690209230bSgjelinek 	 */
1700209230bSgjelinek 	if (psinfop->pr_zoneid == my_zoneid && psinfop->pr_projid != -1) {
1710209230bSgjelinek 		colid.rcid_type = RCIDT_PROJECT;
1720209230bSgjelinek 		colid.rcid_val = psinfop->pr_projid;
1730209230bSgjelinek 	} else {
1740209230bSgjelinek 		/* try to add to zone collection */
1750209230bSgjelinek 		colid.rcid_type = RCIDT_ZONE;
1760209230bSgjelinek 		colid.rcid_val = psinfop->pr_zoneid;
1770209230bSgjelinek 	}
1780209230bSgjelinek 
1790209230bSgjelinek 	if ((lcol = lcollection_find(&colid)) == NULL)
1807c478bd9Sstevel@tonic-gate 		return;
1817c478bd9Sstevel@tonic-gate 
1827c478bd9Sstevel@tonic-gate 	/*
1837c478bd9Sstevel@tonic-gate 	 * If the process is already being tracked, update the unscannable flag,
1847c478bd9Sstevel@tonic-gate 	 * as determined by the caller, from the process's psinfo.
1857c478bd9Sstevel@tonic-gate 	 */
1867c478bd9Sstevel@tonic-gate 	lproc = lcol->lcol_lprocess;
1877c478bd9Sstevel@tonic-gate 	while (lproc != NULL) {
1887c478bd9Sstevel@tonic-gate 		if (lproc->lpc_pid == pid) {
1897c478bd9Sstevel@tonic-gate 			lproc->lpc_mark = 1;
1907c478bd9Sstevel@tonic-gate 			if (unscannable != 0 && lproc->lpc_unscannable == 0) {
1917c478bd9Sstevel@tonic-gate 				debug("process %d: became unscannable\n",
1927c478bd9Sstevel@tonic-gate 				    (int)lproc->lpc_pid);
1937c478bd9Sstevel@tonic-gate 				lproc->lpc_unscannable = 1;
1947c478bd9Sstevel@tonic-gate 			}
1957c478bd9Sstevel@tonic-gate 			return;
1967c478bd9Sstevel@tonic-gate 		}
1977c478bd9Sstevel@tonic-gate 		lproc = lproc->lpc_next;
1987c478bd9Sstevel@tonic-gate 	}
1997c478bd9Sstevel@tonic-gate 
2007c478bd9Sstevel@tonic-gate 	/*
2017c478bd9Sstevel@tonic-gate 	 * We've fallen off the list without finding our current process;
2027c478bd9Sstevel@tonic-gate 	 * insert it at the list head.
2037c478bd9Sstevel@tonic-gate 	 */
2047c478bd9Sstevel@tonic-gate 	if ((lproc = malloc(sizeof (*lproc))) == NULL)
2057c478bd9Sstevel@tonic-gate 		debug("insufficient memory to track new process %d", (int)pid);
2067c478bd9Sstevel@tonic-gate 	else {
2077c478bd9Sstevel@tonic-gate 		(void) bzero(lproc, sizeof (*lproc));
2087c478bd9Sstevel@tonic-gate 		lproc->lpc_pid = pid;
2097c478bd9Sstevel@tonic-gate 		lproc->lpc_mark = 1;
2107c478bd9Sstevel@tonic-gate 		lproc->lpc_collection = lcol;
2117c478bd9Sstevel@tonic-gate 		lproc->lpc_psinfo_fd = -1;
2127c478bd9Sstevel@tonic-gate 		lproc->lpc_pgdata_fd = -1;
2137c478bd9Sstevel@tonic-gate 		lproc->lpc_xmap_fd = -1;
2147c478bd9Sstevel@tonic-gate 
2157c478bd9Sstevel@tonic-gate 		/*
2167c478bd9Sstevel@tonic-gate 		 * If the caller didn't flag this process as unscannable
2177c478bd9Sstevel@tonic-gate 		 * already, do some more checking.
2187c478bd9Sstevel@tonic-gate 		 */
2197c478bd9Sstevel@tonic-gate 		lproc->lpc_unscannable = unscannable || proc_issystem(pid);
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate #ifdef DEBUG
2227c478bd9Sstevel@tonic-gate 		/*
2237c478bd9Sstevel@tonic-gate 		 * Verify the sanity of lprocess.  It should not contain the
2247c478bd9Sstevel@tonic-gate 		 * process we are about to prepend.
2257c478bd9Sstevel@tonic-gate 		 */
2267c478bd9Sstevel@tonic-gate 		if (lcollection_member(lcol, lproc)) {
2277c478bd9Sstevel@tonic-gate 			lprocess_t *cur = lcol->lcol_lprocess;
2287c478bd9Sstevel@tonic-gate 			debug("The collection %lld already has these members, "
2290209230bSgjelinek 			    "including me, %d!\n",
2300209230bSgjelinek 			    (long long)lcol->lcol_id.rcid_val,
2317c478bd9Sstevel@tonic-gate 			    (int)lproc->lpc_pid);
2327c478bd9Sstevel@tonic-gate 			while (cur != NULL) {
2337c478bd9Sstevel@tonic-gate 				debug("\t%d\n", (int)cur->lpc_pid);
2347c478bd9Sstevel@tonic-gate 				cur = cur->lpc_next;
2357c478bd9Sstevel@tonic-gate 			}
2367c478bd9Sstevel@tonic-gate 			info(gettext("process already on lprocess\n"));
2377c478bd9Sstevel@tonic-gate 			abort();
2387c478bd9Sstevel@tonic-gate 		}
2397c478bd9Sstevel@tonic-gate #endif /* DEBUG */
2407c478bd9Sstevel@tonic-gate 		lproc->lpc_next = lcol->lcol_lprocess;
2417c478bd9Sstevel@tonic-gate 		if (lproc->lpc_next != NULL)
2427c478bd9Sstevel@tonic-gate 			lproc->lpc_next->lpc_prev = lproc;
2437c478bd9Sstevel@tonic-gate 		lproc->lpc_prev = NULL;
2447c478bd9Sstevel@tonic-gate 		lcol->lcol_lprocess = lproc;
2457c478bd9Sstevel@tonic-gate 
2460209230bSgjelinek 		debug("tracking %s %ld %d %s%s\n",
2470209230bSgjelinek 		    (colid.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
2480209230bSgjelinek 		    (long)colid.rcid_val,
2490209230bSgjelinek 		    (int)pid, psinfop->pr_psargs,
2507c478bd9Sstevel@tonic-gate 		    (lproc->lpc_unscannable != 0) ? " (not scannable)" : "");
2517c478bd9Sstevel@tonic-gate 		lcol->lcol_stat.lcols_proc_in++;
2527c478bd9Sstevel@tonic-gate 	}
2537c478bd9Sstevel@tonic-gate }
2547c478bd9Sstevel@tonic-gate 
2557c478bd9Sstevel@tonic-gate static int
list_walk_process_cb(lcollection_t * lcol,void * arg)2567c478bd9Sstevel@tonic-gate list_walk_process_cb(lcollection_t *lcol, void *arg)
2577c478bd9Sstevel@tonic-gate {
2587c478bd9Sstevel@tonic-gate 	int (*cb)(lcollection_t *, lprocess_t *) =
2597c478bd9Sstevel@tonic-gate 	    (int(*)(lcollection_t *, lprocess_t *))arg;
2607c478bd9Sstevel@tonic-gate 	lprocess_t *member;
2617c478bd9Sstevel@tonic-gate 	lprocess_t *next;
2627c478bd9Sstevel@tonic-gate 
2637c478bd9Sstevel@tonic-gate 	member = lcol->lcol_lprocess;
2647c478bd9Sstevel@tonic-gate 	while (member != NULL) {
2657c478bd9Sstevel@tonic-gate 		pid_t pid = member->lpc_pid;
2667c478bd9Sstevel@tonic-gate 		next = member->lpc_next;
2677c478bd9Sstevel@tonic-gate 
2687c478bd9Sstevel@tonic-gate 		debug_high("list_walk_all lpc %d\n", (int)pid);
2697c478bd9Sstevel@tonic-gate 		if (cb(lcol, member) != 0) {
2707c478bd9Sstevel@tonic-gate 			debug_high("list_walk_all aborted at lpc %d\n",
2717c478bd9Sstevel@tonic-gate 			    (int)pid);
2727c478bd9Sstevel@tonic-gate 			return (1);
2737c478bd9Sstevel@tonic-gate 		}
2747c478bd9Sstevel@tonic-gate 		member = next;
2757c478bd9Sstevel@tonic-gate 	}
2767c478bd9Sstevel@tonic-gate 
2777c478bd9Sstevel@tonic-gate 	return (0);
2787c478bd9Sstevel@tonic-gate }
2797c478bd9Sstevel@tonic-gate 
2807c478bd9Sstevel@tonic-gate /*
2817c478bd9Sstevel@tonic-gate  * Invoke the given callback for each process in each collection.  Callbacks
2827c478bd9Sstevel@tonic-gate  * are allowed to change the linkage of the process on which they act.
2837c478bd9Sstevel@tonic-gate  */
2847c478bd9Sstevel@tonic-gate static void
list_walk_all(int (* cb)(lcollection_t *,lprocess_t *))2857c478bd9Sstevel@tonic-gate list_walk_all(int (*cb)(lcollection_t *, lprocess_t *))
2867c478bd9Sstevel@tonic-gate {
2877c478bd9Sstevel@tonic-gate 	list_walk_collection(list_walk_process_cb, (void *)cb);
2887c478bd9Sstevel@tonic-gate }
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate static void
revoke_psinfo(rfd_t * rfd)2917c478bd9Sstevel@tonic-gate revoke_psinfo(rfd_t *rfd)
2927c478bd9Sstevel@tonic-gate {
2937c478bd9Sstevel@tonic-gate 	lprocess_t *lpc = (lprocess_t *)rfd->rfd_data;
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate 	if (lpc != NULL) {
2967c478bd9Sstevel@tonic-gate 		debug("revoking psinfo fd for process %d\n", (int)lpc->lpc_pid);
2977c478bd9Sstevel@tonic-gate 		ASSERT(lpc->lpc_psinfo_fd != -1);
2987c478bd9Sstevel@tonic-gate 		lpc->lpc_psinfo_fd = -1;
2997c478bd9Sstevel@tonic-gate 	} else
3007c478bd9Sstevel@tonic-gate 		debug("revoking psinfo fd for unknown process\n");
3017c478bd9Sstevel@tonic-gate }
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate /*
3047c478bd9Sstevel@tonic-gate  * Retrieve a process's psinfo via an already-opened or new file descriptor.
3057c478bd9Sstevel@tonic-gate  * The supplied descriptor will be closed on failure.  An optional callback
3067c478bd9Sstevel@tonic-gate  * will be invoked with the last descriptor tried, and a supplied callback
3077c478bd9Sstevel@tonic-gate  * argument, as its arguments, such that the new descriptor may be cached, or
3087c478bd9Sstevel@tonic-gate  * an old one may be invalidated.  If the result of the callback is zero, the
3097c478bd9Sstevel@tonic-gate  * the caller is to assume responsibility for the file descriptor, to close it
3107c478bd9Sstevel@tonic-gate  * with rfd_close().
3117c478bd9Sstevel@tonic-gate  *
3127c478bd9Sstevel@tonic-gate  * On failure, a nonzero value is returned.
3137c478bd9Sstevel@tonic-gate  */
3147c478bd9Sstevel@tonic-gate int
get_psinfo(pid_t pid,psinfo_t * psinfo,int cached_fd,int (* fd_update_cb)(void *,int),void * arg,lprocess_t * lpc)3157c478bd9Sstevel@tonic-gate get_psinfo(pid_t pid, psinfo_t *psinfo, int cached_fd,
3167c478bd9Sstevel@tonic-gate     int(*fd_update_cb)(void *, int), void *arg, lprocess_t *lpc)
3177c478bd9Sstevel@tonic-gate {
3187c478bd9Sstevel@tonic-gate 	int fd;
3197c478bd9Sstevel@tonic-gate 	int can_try_uncached;
3207c478bd9Sstevel@tonic-gate 
3217c478bd9Sstevel@tonic-gate 	ASSERT(!(cached_fd > 0 && fd_update_cb == NULL));
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate 	do {
3247c478bd9Sstevel@tonic-gate 		if (cached_fd >= 0) {
3257c478bd9Sstevel@tonic-gate 			fd = cached_fd;
3267c478bd9Sstevel@tonic-gate 			can_try_uncached = 1;
3277c478bd9Sstevel@tonic-gate 			debug_high("%d/psinfo, trying cached fd %d\n",
3287c478bd9Sstevel@tonic-gate 			    (int)pid, fd);
3297c478bd9Sstevel@tonic-gate 		} else {
3307c478bd9Sstevel@tonic-gate 			char pathbuf[PROC_PATH_MAX];
3317c478bd9Sstevel@tonic-gate 
3327c478bd9Sstevel@tonic-gate 			can_try_uncached = 0;
3337c478bd9Sstevel@tonic-gate 			(void) snprintf(pathbuf, sizeof (pathbuf),
3347c478bd9Sstevel@tonic-gate 			    "/proc/%d/psinfo", (int)pid);
3357c478bd9Sstevel@tonic-gate 			if ((fd = rfd_open(pathbuf, 1, RFD_PSINFO,
3367c478bd9Sstevel@tonic-gate 			    revoke_psinfo, lpc, O_RDONLY, 0000)) < 0) {
3377c478bd9Sstevel@tonic-gate 				debug("cannot open %s", pathbuf);
3387c478bd9Sstevel@tonic-gate 				break;
3397c478bd9Sstevel@tonic-gate 			} else
3407c478bd9Sstevel@tonic-gate 				debug_high("opened %s, fd %d\n", pathbuf, fd);
3417c478bd9Sstevel@tonic-gate 		}
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate 		if (pread(fd, psinfo, sizeof (*psinfo), 0) ==
3447c478bd9Sstevel@tonic-gate 		    sizeof (*psinfo) && psinfo->pr_pid == pid)
3457c478bd9Sstevel@tonic-gate 			break;
3467c478bd9Sstevel@tonic-gate 		else {
3477c478bd9Sstevel@tonic-gate 			debug_high("closed fd %d\n", fd);
3487c478bd9Sstevel@tonic-gate 			if (rfd_close(fd) != 0)
3497c478bd9Sstevel@tonic-gate 				debug("could not close fd %d", fd);
3507c478bd9Sstevel@tonic-gate 			fd = cached_fd = -1;
3517c478bd9Sstevel@tonic-gate 		}
3527c478bd9Sstevel@tonic-gate 	} while (can_try_uncached == 1);
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate 	if (fd_update_cb == NULL || fd_update_cb(arg, fd) != 0)
3557c478bd9Sstevel@tonic-gate 		if (fd >= 0) {
3567c478bd9Sstevel@tonic-gate 			debug_high("closed %s fd %d\n", fd_update_cb == NULL ?
3577c478bd9Sstevel@tonic-gate 			    "uncached" : "cached", fd);
3587c478bd9Sstevel@tonic-gate 			if (rfd_close(fd) != 0)
3597c478bd9Sstevel@tonic-gate 				debug("could not close fd %d", fd);
3607c478bd9Sstevel@tonic-gate 		}
3617c478bd9Sstevel@tonic-gate 
3627c478bd9Sstevel@tonic-gate 	debug_high("get_psinfo ret %d, fd %d, %s\n", ((fd >= 0) ? 0 : -1), fd,
3637c478bd9Sstevel@tonic-gate 	    fd_update_cb != NULL ? "cached" : "uncached");
3647c478bd9Sstevel@tonic-gate 	return ((fd >= 0) ? 0 : -1);
3657c478bd9Sstevel@tonic-gate }
3667c478bd9Sstevel@tonic-gate 
3677c478bd9Sstevel@tonic-gate /*
3680209230bSgjelinek  * Retrieve the collection membership of all processes and update the psinfo of
3690209230bSgjelinek  * those non-system, non-zombie ones in collections.  For global zone processes,
3700209230bSgjelinek  * we first attempt to put the process into a capped project collection.  We
3710209230bSgjelinek  * also want to track the process for the global zone collection as a whole.
3727c478bd9Sstevel@tonic-gate  */
3737c478bd9Sstevel@tonic-gate static void
proc_cb(const pid_t pid)3747c478bd9Sstevel@tonic-gate proc_cb(const pid_t pid)
3757c478bd9Sstevel@tonic-gate {
3767c478bd9Sstevel@tonic-gate 	psinfo_t psinfo;
3777c478bd9Sstevel@tonic-gate 
3780209230bSgjelinek 	if (get_psinfo(pid, &psinfo, -1, NULL, NULL, NULL) == 0) {
3790209230bSgjelinek 		lprocess_insert_mark(&psinfo);
3800209230bSgjelinek 		if (gz_capped && psinfo.pr_zoneid == GLOBAL_ZONEID) {
3810209230bSgjelinek 			/*
3820209230bSgjelinek 			 * We also want to track this process for the global
3830209230bSgjelinek 			 * zone as a whole so add it to the global zone
3840209230bSgjelinek 			 * collection as well.
3850209230bSgjelinek 			 */
3860209230bSgjelinek 			psinfo.pr_projid = -1;
3870209230bSgjelinek 			lprocess_insert_mark(&psinfo);
3880209230bSgjelinek 		}
3890209230bSgjelinek 	}
3907c478bd9Sstevel@tonic-gate }
3917c478bd9Sstevel@tonic-gate 
3927c478bd9Sstevel@tonic-gate /*
3937c478bd9Sstevel@tonic-gate  * Cache the process' psinfo fd, taking responsibility for freeing it.
3947c478bd9Sstevel@tonic-gate  */
3957c478bd9Sstevel@tonic-gate int
lprocess_update_psinfo_fd_cb(void * arg,int fd)3967c478bd9Sstevel@tonic-gate lprocess_update_psinfo_fd_cb(void *arg, int fd)
3977c478bd9Sstevel@tonic-gate {
3987c478bd9Sstevel@tonic-gate 	lprocess_t *lpc = arg;
3997c478bd9Sstevel@tonic-gate 
4007c478bd9Sstevel@tonic-gate 	lpc->lpc_psinfo_fd = fd;
4017c478bd9Sstevel@tonic-gate 	return (0);
4027c478bd9Sstevel@tonic-gate }
4037c478bd9Sstevel@tonic-gate 
4047c478bd9Sstevel@tonic-gate /*
4050209230bSgjelinek  * Get the system pagesize.
4067c478bd9Sstevel@tonic-gate  */
4070209230bSgjelinek static void
get_page_size(void)4080209230bSgjelinek get_page_size(void)
4097c478bd9Sstevel@tonic-gate {
4100209230bSgjelinek 	page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
4110209230bSgjelinek 	debug("physical page size: %luKB\n", page_size_kb);
4127c478bd9Sstevel@tonic-gate }
4137c478bd9Sstevel@tonic-gate 
4140209230bSgjelinek static void
tm_fmt(char * msg,hrtime_t t1,hrtime_t t2)4150209230bSgjelinek tm_fmt(char *msg, hrtime_t t1, hrtime_t t2)
4160209230bSgjelinek {
4170209230bSgjelinek 	hrtime_t diff = t2 - t1;
4180209230bSgjelinek 
4190209230bSgjelinek 	if (diff < MILLISEC)
4200209230bSgjelinek 		debug("%s: %lld nanoseconds\n", msg, diff);
4210209230bSgjelinek 	else if (diff < MICROSEC)
4220209230bSgjelinek 		debug("%s: %.2f microseconds\n", msg, (float)diff / MILLISEC);
4230209230bSgjelinek 	else if (diff < NANOSEC)
4240209230bSgjelinek 		debug("%s: %.2f milliseconds\n", msg, (float)diff / MICROSEC);
4250209230bSgjelinek 	else
4260209230bSgjelinek 		debug("%s: %.2f seconds\n", msg, (float)diff / NANOSEC);
4270209230bSgjelinek }
4280209230bSgjelinek 
4290209230bSgjelinek /*
4300209230bSgjelinek  * Get the zone's & project's RSS from the kernel.
4310209230bSgjelinek  */
4320209230bSgjelinek static void
rss_sample(boolean_t my_zone_only,uint_t col_types)4330209230bSgjelinek rss_sample(boolean_t my_zone_only, uint_t col_types)
4340209230bSgjelinek {
4350209230bSgjelinek 	size_t nres;
4360209230bSgjelinek 	size_t i;
4370209230bSgjelinek 	uint_t flags;
4380209230bSgjelinek 	hrtime_t t1, t2;
4390209230bSgjelinek 
4400209230bSgjelinek 	if (my_zone_only) {
4410209230bSgjelinek 		flags = VMUSAGE_ZONE;
4420209230bSgjelinek 	} else {
4430209230bSgjelinek 		flags = 0;
4440209230bSgjelinek 		if (col_types & CAPPED_PROJECT)
4450209230bSgjelinek 			flags |= VMUSAGE_PROJECTS;
4460209230bSgjelinek 		if (col_types & CAPPED_ZONE && my_zoneid == GLOBAL_ZONEID)
4470209230bSgjelinek 			flags |= VMUSAGE_ALL_ZONES;
4480209230bSgjelinek 	}
4490209230bSgjelinek 
4500209230bSgjelinek 	debug("vmusage sample flags 0x%x\n", flags);
4510209230bSgjelinek 	if (flags == 0)
4520209230bSgjelinek 		return;
4530209230bSgjelinek 
4540209230bSgjelinek again:
4550209230bSgjelinek 	/* try the current buffer to see if the list will fit */
4560209230bSgjelinek 	nres = vmu_vals_len;
4570209230bSgjelinek 	t1 = gethrtime();
4580209230bSgjelinek 	if (getvmusage(flags, my_zone_only ? 0 : rcfg.rcfg_rss_sample_interval,
4590209230bSgjelinek 	    vmu_vals, &nres) != 0) {
4600209230bSgjelinek 		if (errno != EOVERFLOW) {
4610209230bSgjelinek 			warn(gettext("can't read RSS from kernel\n"));
4620209230bSgjelinek 			return;
4630209230bSgjelinek 		}
4640209230bSgjelinek 	}
4650209230bSgjelinek 	t2 = gethrtime();
4660209230bSgjelinek 	tm_fmt("getvmusage time", t1, t2);
4670209230bSgjelinek 
4680209230bSgjelinek 	debug("kernel nres %lu\n", (ulong_t)nres);
4690209230bSgjelinek 
4700209230bSgjelinek 	if (nres > vmu_vals_len) {
4710209230bSgjelinek 		/* array size is now too small, increase it and try again */
4720209230bSgjelinek 		free(vmu_vals);
4730209230bSgjelinek 
4740209230bSgjelinek 		if ((vmu_vals = (vmusage_t *)calloc(nres,
4750209230bSgjelinek 		    sizeof (vmusage_t))) == NULL) {
4760209230bSgjelinek 			warn(gettext("out of memory: could not read RSS from "
4770209230bSgjelinek 			    "kernel\n"));
4780209230bSgjelinek 			vmu_vals_len = nvmu_vals = 0;
4790209230bSgjelinek 			return;
4800209230bSgjelinek 		}
4810209230bSgjelinek 		vmu_vals_len = nres;
4820209230bSgjelinek 		goto again;
4830209230bSgjelinek 	}
4840209230bSgjelinek 
4850209230bSgjelinek 	nvmu_vals = nres;
4860209230bSgjelinek 
4870209230bSgjelinek 	debug("vmusage_sample\n");
4880209230bSgjelinek 	for (i = 0; i < nvmu_vals; i++) {
4890209230bSgjelinek 		debug("%d: id: %d, type: 0x%x, rss_all: %llu (%lluKB), "
4900209230bSgjelinek 		    "swap: %llu\n", (int)i, (int)vmu_vals[i].vmu_id,
4910209230bSgjelinek 		    vmu_vals[i].vmu_type,
4920209230bSgjelinek 		    (unsigned long long)vmu_vals[i].vmu_rss_all,
4930209230bSgjelinek 		    (unsigned long long)vmu_vals[i].vmu_rss_all / 1024,
4940209230bSgjelinek 		    (unsigned long long)vmu_vals[i].vmu_swap_all);
4950209230bSgjelinek 	}
4960209230bSgjelinek }
4970209230bSgjelinek 
4980209230bSgjelinek static void
update_col_rss(lcollection_t * lcol)4990209230bSgjelinek update_col_rss(lcollection_t *lcol)
5000209230bSgjelinek {
5010209230bSgjelinek 	int i;
5020209230bSgjelinek 
5030209230bSgjelinek 	lcol->lcol_rss = 0;
5040209230bSgjelinek 	lcol->lcol_image_size = 0;
5050209230bSgjelinek 
5060209230bSgjelinek 	for (i = 0; i < nvmu_vals; i++) {
5070209230bSgjelinek 		if (vmu_vals[i].vmu_id != lcol->lcol_id.rcid_val)
5080209230bSgjelinek 			continue;
5090209230bSgjelinek 
5100209230bSgjelinek 		if (vmu_vals[i].vmu_type == VMUSAGE_ZONE &&
5110209230bSgjelinek 		    lcol->lcol_id.rcid_type != RCIDT_ZONE)
5120209230bSgjelinek 			continue;
5130209230bSgjelinek 
5140209230bSgjelinek 		if (vmu_vals[i].vmu_type == VMUSAGE_PROJECTS &&
5150209230bSgjelinek 		    lcol->lcol_id.rcid_type != RCIDT_PROJECT)
5160209230bSgjelinek 			continue;
5170209230bSgjelinek 
5180209230bSgjelinek 		/* we found the right RSS entry, update the collection vals */
5190209230bSgjelinek 		lcol->lcol_rss = vmu_vals[i].vmu_rss_all / 1024;
5200209230bSgjelinek 		lcol->lcol_image_size = vmu_vals[i].vmu_swap_all / 1024;
5210209230bSgjelinek 		break;
5220209230bSgjelinek 	}
5237c478bd9Sstevel@tonic-gate }
5247c478bd9Sstevel@tonic-gate 
5257c478bd9Sstevel@tonic-gate /*
5267c478bd9Sstevel@tonic-gate  * Sample the collection RSS, updating the collection's statistics with the
5270209230bSgjelinek  * results.  Also, sum the rss of all capped projects & return true if
5280209230bSgjelinek  * the collection is over cap.
5297c478bd9Sstevel@tonic-gate  */
5307c478bd9Sstevel@tonic-gate static int
rss_sample_col_cb(lcollection_t * lcol,void * arg)5317c478bd9Sstevel@tonic-gate rss_sample_col_cb(lcollection_t *lcol, void *arg)
5327c478bd9Sstevel@tonic-gate {
5337c478bd9Sstevel@tonic-gate 	int64_t excess;
5347c478bd9Sstevel@tonic-gate 	uint64_t rss;
5350209230bSgjelinek 	sample_col_arg_t *col_argp = (sample_col_arg_t *)arg;
5367c478bd9Sstevel@tonic-gate 
5370209230bSgjelinek 	update_col_rss(lcol);
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 	lcol->lcol_stat.lcols_rss_sample++;
5407c478bd9Sstevel@tonic-gate 	rss = lcol->lcol_rss;
5410209230bSgjelinek 	excess = rss - lcol->lcol_rss_cap;
5420209230bSgjelinek 	if (excess > 0) {
5437c478bd9Sstevel@tonic-gate 		lcol->lcol_stat.lcols_rss_act_sum += rss;
5440209230bSgjelinek 		col_argp->sca_any_over_cap = B_TRUE;
5450209230bSgjelinek 		if (lcol->lcol_id.rcid_type == RCIDT_PROJECT)
5460209230bSgjelinek 			col_argp->sca_project_over_cap = B_TRUE;
5470209230bSgjelinek 	}
5487c478bd9Sstevel@tonic-gate 	lcol->lcol_stat.lcols_rss_sum += rss;
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 	if (lcol->lcol_stat.lcols_min_rss > rss)
5517c478bd9Sstevel@tonic-gate 		lcol->lcol_stat.lcols_min_rss = rss;
5527c478bd9Sstevel@tonic-gate 	if (lcol->lcol_stat.lcols_max_rss < rss)
5537c478bd9Sstevel@tonic-gate 		lcol->lcol_stat.lcols_max_rss = rss;
5547c478bd9Sstevel@tonic-gate 
5557c478bd9Sstevel@tonic-gate 	return (0);
5567c478bd9Sstevel@tonic-gate }
5577c478bd9Sstevel@tonic-gate 
5587c478bd9Sstevel@tonic-gate /*
5590209230bSgjelinek  * Determine if we have capped projects, capped zones or both.
5600209230bSgjelinek  */
5610209230bSgjelinek static int
col_type_cb(lcollection_t * lcol,void * arg)5620209230bSgjelinek col_type_cb(lcollection_t *lcol, void *arg)
5630209230bSgjelinek {
5640209230bSgjelinek 	uint_t *col_type = (uint_t *)arg;
5650209230bSgjelinek 
5660209230bSgjelinek 	/* skip uncapped collections */
5670209230bSgjelinek 	if (lcol->lcol_rss_cap == 0)
5680209230bSgjelinek 		return (1);
5690209230bSgjelinek 
5700209230bSgjelinek 	if (lcol->lcol_id.rcid_type == RCIDT_PROJECT)
5710209230bSgjelinek 		*col_type |= CAPPED_PROJECT;
5720209230bSgjelinek 	else
5730209230bSgjelinek 		*col_type |= CAPPED_ZONE;
5740209230bSgjelinek 
5750209230bSgjelinek 	/* once we know everything is capped, we can stop looking */
5760209230bSgjelinek 	if ((*col_type & CAPPED_ZONE) && (*col_type & CAPPED_PROJECT))
5770209230bSgjelinek 		return (1);
5780209230bSgjelinek 
5790209230bSgjelinek 	return (0);
5800209230bSgjelinek }
5810209230bSgjelinek 
5820209230bSgjelinek /*
5837c478bd9Sstevel@tonic-gate  * Open /proc and walk entries.
5847c478bd9Sstevel@tonic-gate  */
5857c478bd9Sstevel@tonic-gate static void
proc_walk_all(void (* cb)(const pid_t))5867c478bd9Sstevel@tonic-gate proc_walk_all(void (*cb)(const pid_t))
5877c478bd9Sstevel@tonic-gate {
5887c478bd9Sstevel@tonic-gate 	DIR *pdir;
5897c478bd9Sstevel@tonic-gate 	struct dirent *dirent;
5907c478bd9Sstevel@tonic-gate 	pid_t pid;
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 	(void) rfd_reserve(1);
5937c478bd9Sstevel@tonic-gate 	if ((pdir = opendir("/proc")) == NULL)
5947c478bd9Sstevel@tonic-gate 		die(gettext("couldn't open /proc!"));
5957c478bd9Sstevel@tonic-gate 
5967c478bd9Sstevel@tonic-gate 	while ((dirent = readdir(pdir)) != NULL) {
5977c478bd9Sstevel@tonic-gate 		if (strcmp(".", dirent->d_name) == 0 ||
5987c478bd9Sstevel@tonic-gate 		    strcmp("..", dirent->d_name) == 0)
5997c478bd9Sstevel@tonic-gate 			continue;
6007c478bd9Sstevel@tonic-gate 		pid = atoi(dirent->d_name);
6017c478bd9Sstevel@tonic-gate 		ASSERT(pid != 0 || strcmp(dirent->d_name, "0") == 0);
6027c478bd9Sstevel@tonic-gate 		if (pid == rcapd_pid)
6037c478bd9Sstevel@tonic-gate 			continue;
6047c478bd9Sstevel@tonic-gate 		else
6057c478bd9Sstevel@tonic-gate 			cb(pid);
6067c478bd9Sstevel@tonic-gate 	}
6077c478bd9Sstevel@tonic-gate 	(void) closedir(pdir);
6087c478bd9Sstevel@tonic-gate }
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate /*
6117c478bd9Sstevel@tonic-gate  * Clear unmarked callback.
6127c478bd9Sstevel@tonic-gate  */
6137c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6147c478bd9Sstevel@tonic-gate static int
sweep_process_cb(lcollection_t * lcol,lprocess_t * lpc)6157c478bd9Sstevel@tonic-gate sweep_process_cb(lcollection_t *lcol, lprocess_t *lpc)
6167c478bd9Sstevel@tonic-gate {
6177c478bd9Sstevel@tonic-gate 	if (lpc->lpc_mark) {
6187c478bd9Sstevel@tonic-gate 		lpc->lpc_mark = 0;
6197c478bd9Sstevel@tonic-gate 	} else {
6207c478bd9Sstevel@tonic-gate 		debug("process %d finished\n", (int)lpc->lpc_pid);
6217c478bd9Sstevel@tonic-gate 		lprocess_free(lpc);
6227c478bd9Sstevel@tonic-gate 	}
6237c478bd9Sstevel@tonic-gate 
6247c478bd9Sstevel@tonic-gate 	return (0);
6257c478bd9Sstevel@tonic-gate }
6267c478bd9Sstevel@tonic-gate 
6277c478bd9Sstevel@tonic-gate /*
6287c478bd9Sstevel@tonic-gate  * Print, for debugging purposes, a collection's recently-sampled RSS and
6297c478bd9Sstevel@tonic-gate  * excess.
6307c478bd9Sstevel@tonic-gate  */
6317c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6327c478bd9Sstevel@tonic-gate static int
excess_print_cb(lcollection_t * lcol,void * arg)6337c478bd9Sstevel@tonic-gate excess_print_cb(lcollection_t *lcol, void *arg)
6347c478bd9Sstevel@tonic-gate {
6357c478bd9Sstevel@tonic-gate 	int64_t excess = lcol->lcol_rss - lcol->lcol_rss_cap;
6367c478bd9Sstevel@tonic-gate 
6377c478bd9Sstevel@tonic-gate 	debug("%s %s rss/cap: %llu/%llu, excess = %lld kB\n",
6380209230bSgjelinek 	    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
6390209230bSgjelinek 	    lcol->lcol_name,
6407c478bd9Sstevel@tonic-gate 	    (unsigned long long)lcol->lcol_rss,
6417c478bd9Sstevel@tonic-gate 	    (unsigned long long)lcol->lcol_rss_cap,
6427c478bd9Sstevel@tonic-gate 	    (long long)excess);
6437c478bd9Sstevel@tonic-gate 
6447c478bd9Sstevel@tonic-gate 	return (0);
6457c478bd9Sstevel@tonic-gate }
6467c478bd9Sstevel@tonic-gate 
6477c478bd9Sstevel@tonic-gate /*
6487c478bd9Sstevel@tonic-gate  * Scan those collections which have exceeded their caps.
6490209230bSgjelinek  *
6500209230bSgjelinek  * If we're running in the global zone it might have a cap.  We don't want to
6510209230bSgjelinek  * do any capping for the global zone yet since we might get under the cap by
6520209230bSgjelinek  * just capping the projects in the global zone.
6537c478bd9Sstevel@tonic-gate  */
6547c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6557c478bd9Sstevel@tonic-gate static int
scan_cb(lcollection_t * lcol,void * arg)6567c478bd9Sstevel@tonic-gate scan_cb(lcollection_t *lcol, void *arg)
6577c478bd9Sstevel@tonic-gate {
6587c478bd9Sstevel@tonic-gate 	int64_t excess;
6597c478bd9Sstevel@tonic-gate 
6600209230bSgjelinek 	/* skip over global zone collection for now but keep track for later */
6610209230bSgjelinek 	if (lcol->lcol_id.rcid_type == RCIDT_ZONE &&
6620209230bSgjelinek 	    lcol->lcol_id.rcid_val == GLOBAL_ZONEID) {
6630209230bSgjelinek 		gz_col = lcol;
6640209230bSgjelinek 		return (0);
6650209230bSgjelinek 	}
6660209230bSgjelinek 
6677c478bd9Sstevel@tonic-gate 	if ((excess = lcol->lcol_rss - lcol->lcol_rss_cap) > 0) {
6687c478bd9Sstevel@tonic-gate 		scan(lcol, excess);
6697c478bd9Sstevel@tonic-gate 		lcol->lcol_stat.lcols_scan++;
6707c478bd9Sstevel@tonic-gate 	}
6717c478bd9Sstevel@tonic-gate 
6727c478bd9Sstevel@tonic-gate 	return (0);
6737c478bd9Sstevel@tonic-gate }
6747c478bd9Sstevel@tonic-gate 
6757c478bd9Sstevel@tonic-gate /*
6760209230bSgjelinek  * Scan the global zone collection and see if it still exceeds its cap.
6770209230bSgjelinek  * We take into account the effects of capping any global zone projects here.
6780209230bSgjelinek  */
6790209230bSgjelinek static void
scan_gz(lcollection_t * lcol,boolean_t project_over_cap)6800209230bSgjelinek scan_gz(lcollection_t *lcol, boolean_t project_over_cap)
6810209230bSgjelinek {
6820209230bSgjelinek 	int64_t excess;
6830209230bSgjelinek 
6840209230bSgjelinek 	/*
6850209230bSgjelinek 	 * If we had projects over their cap and the global zone was also over
6860209230bSgjelinek 	 * its cap then we need to get the up-to-date global zone rss to
6870209230bSgjelinek 	 * determine if we are still over the global zone cap.  We might have
6880209230bSgjelinek 	 * gone under while we scanned the capped projects.  If there were no
6890209230bSgjelinek 	 * projects over cap then we can use the rss value we already have for
6900209230bSgjelinek 	 * the global zone.
6910209230bSgjelinek 	 */
6920209230bSgjelinek 	excess = lcol->lcol_rss - lcol->lcol_rss_cap;
6930209230bSgjelinek 	if (project_over_cap && excess > 0) {
6940209230bSgjelinek 		rss_sample(B_TRUE, CAPPED_ZONE);
6950209230bSgjelinek 		update_col_rss(lcol);
6960209230bSgjelinek 		excess = lcol->lcol_rss - lcol->lcol_rss_cap;
6970209230bSgjelinek 	}
6980209230bSgjelinek 
6990209230bSgjelinek 	if (excess > 0) {
7000209230bSgjelinek 		debug("global zone excess %lldKB\n", (long long)excess);
7010209230bSgjelinek 		scan(lcol, excess);
7020209230bSgjelinek 		lcol->lcol_stat.lcols_scan++;
7030209230bSgjelinek 	}
7040209230bSgjelinek }
7050209230bSgjelinek 
7060209230bSgjelinek /*
7077c478bd9Sstevel@tonic-gate  * Do a soft scan of those collections which have excesses.  A soft scan is one
7087c478bd9Sstevel@tonic-gate  * in which the cap enforcement pressure is taken into account.  The difference
7097c478bd9Sstevel@tonic-gate  * between the utilized physical memory and the cap enforcement pressure will
7107c478bd9Sstevel@tonic-gate  * be scanned-for, and each collection will be scanned proportionally by their
7117c478bd9Sstevel@tonic-gate  * present excesses.
7127c478bd9Sstevel@tonic-gate  */
7137c478bd9Sstevel@tonic-gate static int
soft_scan_cb(lcollection_t * lcol,void * a)7147c478bd9Sstevel@tonic-gate soft_scan_cb(lcollection_t *lcol, void *a)
7157c478bd9Sstevel@tonic-gate {
7167c478bd9Sstevel@tonic-gate 	int64_t excess;
7177c478bd9Sstevel@tonic-gate 	soft_scan_arg_t *arg = a;
7187c478bd9Sstevel@tonic-gate 
7190209230bSgjelinek 	/* skip over global zone collection for now but keep track for later */
7200209230bSgjelinek 	if (lcol->lcol_id.rcid_type == RCIDT_ZONE &&
7210209230bSgjelinek 	    lcol->lcol_id.rcid_val == GLOBAL_ZONEID) {
7220209230bSgjelinek 		gz_col = lcol;
7230209230bSgjelinek 		return (0);
7240209230bSgjelinek 	}
7250209230bSgjelinek 
7267c478bd9Sstevel@tonic-gate 	if ((excess = lcol->lcol_rss - lcol->lcol_rss_cap) > 0) {
7270209230bSgjelinek 		int64_t adjusted_excess =
7280209230bSgjelinek 		    excess * arg->ssa_scan_goal / arg->ssa_sum_excess;
7290209230bSgjelinek 
7300209230bSgjelinek 		debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, "
7310209230bSgjelinek 		    "scanning %lld\n",
7320209230bSgjelinek 		    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
7330209230bSgjelinek 		    "project" : "zone"),
7340209230bSgjelinek 		    (long)lcol->lcol_id.rcid_val,
7357c478bd9Sstevel@tonic-gate 		    (long long)excess, (long long)arg->ssa_scan_goal,
7367c478bd9Sstevel@tonic-gate 		    (unsigned long long)arg->ssa_sum_excess,
7370209230bSgjelinek 		    (long long)adjusted_excess);
7387c478bd9Sstevel@tonic-gate 
7390209230bSgjelinek 		scan(lcol, adjusted_excess);
7407c478bd9Sstevel@tonic-gate 		lcol->lcol_stat.lcols_scan++;
7417c478bd9Sstevel@tonic-gate 	}
7427c478bd9Sstevel@tonic-gate 
7437c478bd9Sstevel@tonic-gate 	return (0);
7447c478bd9Sstevel@tonic-gate }
7457c478bd9Sstevel@tonic-gate 
7460209230bSgjelinek static void
soft_scan_gz(lcollection_t * lcol,void * a)7470209230bSgjelinek soft_scan_gz(lcollection_t *lcol, void *a)
7480209230bSgjelinek {
7490209230bSgjelinek 	int64_t excess;
7500209230bSgjelinek 	soft_scan_arg_t *arg = a;
7510209230bSgjelinek 
7520209230bSgjelinek 	/*
7530209230bSgjelinek 	 * If we had projects over their cap and the global zone was also over
7540209230bSgjelinek 	 * its cap then we need to get the up-to-date global zone rss to
7550209230bSgjelinek 	 * determine if we are still over the global zone cap.  We might have
7560209230bSgjelinek 	 * gone under while we scanned the capped projects.  If there were no
7570209230bSgjelinek 	 * projects over cap then we can use the rss value we already have for
7580209230bSgjelinek 	 * the global zone.
7590209230bSgjelinek 	 */
7600209230bSgjelinek 	excess = lcol->lcol_rss - lcol->lcol_rss_cap;
7610209230bSgjelinek 	if (arg->ssa_project_over_cap && excess > 0) {
7620209230bSgjelinek 		rss_sample(B_TRUE, CAPPED_ZONE);
7630209230bSgjelinek 		update_col_rss(lcol);
7640209230bSgjelinek 		excess = lcol->lcol_rss - lcol->lcol_rss_cap;
7650209230bSgjelinek 	}
7660209230bSgjelinek 
7670209230bSgjelinek 	if (excess > 0) {
7680209230bSgjelinek 		int64_t adjusted_excess =
7690209230bSgjelinek 		    excess * arg->ssa_scan_goal / arg->ssa_sum_excess;
7700209230bSgjelinek 
7710209230bSgjelinek 		debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, "
7720209230bSgjelinek 		    "scanning %lld\n",
7730209230bSgjelinek 		    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
7740209230bSgjelinek 		    "project" : "zone"),
7750209230bSgjelinek 		    (long)lcol->lcol_id.rcid_val,
7760209230bSgjelinek 		    (long long)excess, (long long)arg->ssa_scan_goal,
7770209230bSgjelinek 		    (unsigned long long)arg->ssa_sum_excess,
7780209230bSgjelinek 		    (long long)adjusted_excess);
7790209230bSgjelinek 
7800209230bSgjelinek 		scan(lcol, adjusted_excess);
7810209230bSgjelinek 		lcol->lcol_stat.lcols_scan++;
7820209230bSgjelinek 	}
7830209230bSgjelinek }
7840209230bSgjelinek 
7857c478bd9Sstevel@tonic-gate /*
7867c478bd9Sstevel@tonic-gate  * When a scan could happen, but caps aren't enforced tick the
7877c478bd9Sstevel@tonic-gate  * lcols_unenforced_cap counter.
7887c478bd9Sstevel@tonic-gate  */
7897c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7907c478bd9Sstevel@tonic-gate static int
unenforced_cap_cb(lcollection_t * lcol,void * arg)7917c478bd9Sstevel@tonic-gate unenforced_cap_cb(lcollection_t *lcol, void *arg)
7927c478bd9Sstevel@tonic-gate {
7937c478bd9Sstevel@tonic-gate 	lcol->lcol_stat.lcols_unenforced_cap++;
7947c478bd9Sstevel@tonic-gate 
7957c478bd9Sstevel@tonic-gate 	return (0);
7967c478bd9Sstevel@tonic-gate }
7977c478bd9Sstevel@tonic-gate 
7987c478bd9Sstevel@tonic-gate /*
7997c478bd9Sstevel@tonic-gate  * Update the count of physically installed memory.
8007c478bd9Sstevel@tonic-gate  */
8017c478bd9Sstevel@tonic-gate static void
update_phys_total(void)8027c478bd9Sstevel@tonic-gate update_phys_total(void)
8037c478bd9Sstevel@tonic-gate {
8047c478bd9Sstevel@tonic-gate 	uint64_t old_phys_total;
8057c478bd9Sstevel@tonic-gate 
8067c478bd9Sstevel@tonic-gate 	old_phys_total = phys_total;
8070209230bSgjelinek 	phys_total = (uint64_t)sysconf(_SC_PHYS_PAGES) * page_size_kb;
8087c478bd9Sstevel@tonic-gate 	if (phys_total != old_phys_total)
8097c478bd9Sstevel@tonic-gate 		debug("physical memory%s: %lluM\n", (old_phys_total == 0 ?
8107c478bd9Sstevel@tonic-gate 		    "" : " adjusted"), (unsigned long long)(phys_total / 1024));
8117c478bd9Sstevel@tonic-gate }
8127c478bd9Sstevel@tonic-gate 
8137c478bd9Sstevel@tonic-gate /*
8147c478bd9Sstevel@tonic-gate  * Unlink a process from its collection, updating relevant statistics, and
8157c478bd9Sstevel@tonic-gate  * freeing its associated memory.
8167c478bd9Sstevel@tonic-gate  */
8177c478bd9Sstevel@tonic-gate void
lprocess_free(lprocess_t * lpc)8187c478bd9Sstevel@tonic-gate lprocess_free(lprocess_t *lpc)
8197c478bd9Sstevel@tonic-gate {
8207c478bd9Sstevel@tonic-gate 	pid_t pid;
8217c478bd9Sstevel@tonic-gate 
8227c478bd9Sstevel@tonic-gate 	lpc->lpc_collection->lcol_stat.lcols_proc_out++;
8237c478bd9Sstevel@tonic-gate 
8247c478bd9Sstevel@tonic-gate 	if (lpc->lpc_prev != NULL)
8257c478bd9Sstevel@tonic-gate 		lpc->lpc_prev->lpc_next = lpc->lpc_next;
8267c478bd9Sstevel@tonic-gate 	if (lpc->lpc_next != NULL)
8277c478bd9Sstevel@tonic-gate 		lpc->lpc_next->lpc_prev = lpc->lpc_prev;
8287c478bd9Sstevel@tonic-gate 	if (lpc->lpc_collection->lcol_lprocess == lpc)
8297c478bd9Sstevel@tonic-gate 		lpc->lpc_collection->lcol_lprocess = (lpc->lpc_next !=
8307c478bd9Sstevel@tonic-gate 		    lpc ? lpc->lpc_next : NULL);
8317c478bd9Sstevel@tonic-gate 	lpc->lpc_next = lpc->lpc_prev = NULL;
8327c478bd9Sstevel@tonic-gate 
8337c478bd9Sstevel@tonic-gate 	if (lpc->lpc_prpageheader != NULL)
8347c478bd9Sstevel@tonic-gate 		free(lpc->lpc_prpageheader);
8357c478bd9Sstevel@tonic-gate 	if (lpc->lpc_xmap != NULL)
8367c478bd9Sstevel@tonic-gate 		free(lpc->lpc_xmap);
8377c478bd9Sstevel@tonic-gate 	if (lpc->lpc_psinfo_fd >= 0) {
8387c478bd9Sstevel@tonic-gate 		if (rfd_close(lpc->lpc_psinfo_fd) != 0)
8397c478bd9Sstevel@tonic-gate 			debug("could not close %d lpc_psinfo_fd %d",
8407c478bd9Sstevel@tonic-gate 			    (int)lpc->lpc_pid, lpc->lpc_psinfo_fd);
8417c478bd9Sstevel@tonic-gate 		lpc->lpc_psinfo_fd = -1;
8427c478bd9Sstevel@tonic-gate 	}
8437c478bd9Sstevel@tonic-gate 	if (lpc->lpc_pgdata_fd >= 0) {
8447c478bd9Sstevel@tonic-gate 		if (rfd_close(lpc->lpc_pgdata_fd) != 0)
8457c478bd9Sstevel@tonic-gate 			debug("could not close %d lpc_pgdata_fd %d",
8467c478bd9Sstevel@tonic-gate 			    (int)lpc->lpc_pid, lpc->lpc_pgdata_fd);
8477c478bd9Sstevel@tonic-gate 		lpc->lpc_pgdata_fd = -1;
8487c478bd9Sstevel@tonic-gate 	}
8497c478bd9Sstevel@tonic-gate 	if (lpc->lpc_xmap_fd >= 0) {
8507c478bd9Sstevel@tonic-gate 		if (rfd_close(lpc->lpc_xmap_fd) != 0)
8517c478bd9Sstevel@tonic-gate 			debug("could not close %d lpc_xmap_fd %d",
8527c478bd9Sstevel@tonic-gate 			    (int)lpc->lpc_pid, lpc->lpc_xmap_fd);
8537c478bd9Sstevel@tonic-gate 		lpc->lpc_xmap_fd = -1;
8547c478bd9Sstevel@tonic-gate 	}
8557c478bd9Sstevel@tonic-gate 	if (lpc->lpc_ignore != NULL)
8567c478bd9Sstevel@tonic-gate 		lmapping_free(&lpc->lpc_ignore);
8577c478bd9Sstevel@tonic-gate 	pid = lpc->lpc_pid;
8587c478bd9Sstevel@tonic-gate 	free(lpc);
8597c478bd9Sstevel@tonic-gate 	debug_high("process %d freed\n", (int)pid);
8607c478bd9Sstevel@tonic-gate }
8617c478bd9Sstevel@tonic-gate 
8627c478bd9Sstevel@tonic-gate /*
8637c478bd9Sstevel@tonic-gate  * Collection clear callback.
8647c478bd9Sstevel@tonic-gate  */
8657c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8667c478bd9Sstevel@tonic-gate static int
collection_clear_cb(lcollection_t * lcol,void * arg)8677c478bd9Sstevel@tonic-gate collection_clear_cb(lcollection_t *lcol, void *arg)
8687c478bd9Sstevel@tonic-gate {
8697c478bd9Sstevel@tonic-gate 	lcol->lcol_mark = 0;
8707c478bd9Sstevel@tonic-gate 
8717c478bd9Sstevel@tonic-gate 	return (0);
8727c478bd9Sstevel@tonic-gate }
8737c478bd9Sstevel@tonic-gate 
8747c478bd9Sstevel@tonic-gate /*
8757c478bd9Sstevel@tonic-gate  * Respond to a terminating signal by setting a termination flag.
8767c478bd9Sstevel@tonic-gate  */
8777c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8787c478bd9Sstevel@tonic-gate static void
terminate_signal(int signal)8797c478bd9Sstevel@tonic-gate terminate_signal(int signal)
8807c478bd9Sstevel@tonic-gate {
8817c478bd9Sstevel@tonic-gate 	if (termination_signal == 0)
8827c478bd9Sstevel@tonic-gate 		termination_signal = signal;
8837c478bd9Sstevel@tonic-gate 	should_run = 0;
8847c478bd9Sstevel@tonic-gate }
8857c478bd9Sstevel@tonic-gate 
8867c478bd9Sstevel@tonic-gate /*
8877c478bd9Sstevel@tonic-gate  * Handle any synchronous or asynchronous signals that would ordinarily cause a
8887c478bd9Sstevel@tonic-gate  * process to abort.
8897c478bd9Sstevel@tonic-gate  */
8907c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8917c478bd9Sstevel@tonic-gate static void
abort_signal(int signal)8927c478bd9Sstevel@tonic-gate abort_signal(int signal)
8937c478bd9Sstevel@tonic-gate {
8947c478bd9Sstevel@tonic-gate 	/*
8957c478bd9Sstevel@tonic-gate 	 * Allow the scanner to make a last-ditch effort to resume any stopped
8967c478bd9Sstevel@tonic-gate 	 * processes.
8977c478bd9Sstevel@tonic-gate 	 */
8987c478bd9Sstevel@tonic-gate 	scan_abort();
8997c478bd9Sstevel@tonic-gate 	abort();
9007c478bd9Sstevel@tonic-gate }
9017c478bd9Sstevel@tonic-gate 
9027c478bd9Sstevel@tonic-gate /*
9037c478bd9Sstevel@tonic-gate  * Clean up collections which have been removed due to configuration.  Unlink
9047c478bd9Sstevel@tonic-gate  * the collection from lcollection and free it.
9057c478bd9Sstevel@tonic-gate  */
9067c478bd9Sstevel@tonic-gate /*ARGSUSED*/
9077c478bd9Sstevel@tonic-gate static int
collection_sweep_cb(lcollection_t * lcol,void * arg)9087c478bd9Sstevel@tonic-gate collection_sweep_cb(lcollection_t *lcol, void *arg)
9097c478bd9Sstevel@tonic-gate {
9107c478bd9Sstevel@tonic-gate 	if (lcol->lcol_mark == 0) {
9110209230bSgjelinek 		debug("freeing %s %s\n",
9120209230bSgjelinek 		    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
9130209230bSgjelinek 		    "project" : "zone"), lcol->lcol_name);
9147c478bd9Sstevel@tonic-gate 		lcollection_free(lcol);
9157c478bd9Sstevel@tonic-gate 	}
9167c478bd9Sstevel@tonic-gate 
9177c478bd9Sstevel@tonic-gate 	return (0);
9187c478bd9Sstevel@tonic-gate }
9197c478bd9Sstevel@tonic-gate 
9207c478bd9Sstevel@tonic-gate /*
9217c478bd9Sstevel@tonic-gate  * Set those variables which depend on the global configuration.
9227c478bd9Sstevel@tonic-gate  */
9237c478bd9Sstevel@tonic-gate static void
finish_configuration(void)9247c478bd9Sstevel@tonic-gate finish_configuration(void)
9257c478bd9Sstevel@tonic-gate {
9267c478bd9Sstevel@tonic-gate 	/*
9277c478bd9Sstevel@tonic-gate 	 * Warn that any lnode (or non-project) mode specification (by an SRM
9287c478bd9Sstevel@tonic-gate 	 * 1.3 configuration file, for example) is ignored.
9297c478bd9Sstevel@tonic-gate 	 */
9307c478bd9Sstevel@tonic-gate 	if (strcmp(rcfg.rcfg_mode_name, "project") != 0) {
9317c478bd9Sstevel@tonic-gate 		warn(gettext("%s mode specification ignored -- using project"
9327c478bd9Sstevel@tonic-gate 		    " mode\n"), rcfg.rcfg_mode_name);
9337c478bd9Sstevel@tonic-gate 		rcfg.rcfg_mode_name = "project";
9347c478bd9Sstevel@tonic-gate 		rcfg.rcfg_mode = rctype_project;
9357c478bd9Sstevel@tonic-gate 	}
9367c478bd9Sstevel@tonic-gate }
9377c478bd9Sstevel@tonic-gate 
9387c478bd9Sstevel@tonic-gate /*
939d75e6a5dStn143363  * Cause the configuration to be reread and applied.
9407c478bd9Sstevel@tonic-gate  */
9417c478bd9Sstevel@tonic-gate static void
reread_configuration(void)942d75e6a5dStn143363 reread_configuration(void)
9437c478bd9Sstevel@tonic-gate {
9447c478bd9Sstevel@tonic-gate 	rcfg_t rcfg_new;
9457c478bd9Sstevel@tonic-gate 
946d75e6a5dStn143363 	if (rcfg_read(&rcfg_new, update_statistics) != E_SUCCESS) {
947d75e6a5dStn143363 		warn(gettext("can't reread configuration \n"));
948d75e6a5dStn143363 		exit(SMF_EXIT_ERR_CONFIG);
949d75e6a5dStn143363 	} else {
9507c478bd9Sstevel@tonic-gate 		/*
951d75e6a5dStn143363 		 * Done reading configuration.  Remove existing
9527c478bd9Sstevel@tonic-gate 		 * collections in case there is a change in collection type.
9537c478bd9Sstevel@tonic-gate 		 */
9547c478bd9Sstevel@tonic-gate 		if (rcfg.rcfg_mode != rcfg_new.rcfg_mode) {
9557c478bd9Sstevel@tonic-gate 			list_walk_collection(collection_clear_cb, NULL);
9567c478bd9Sstevel@tonic-gate 			list_walk_collection(collection_sweep_cb, NULL);
9577c478bd9Sstevel@tonic-gate 		}
9587c478bd9Sstevel@tonic-gate 
9597c478bd9Sstevel@tonic-gate 		/*
9607c478bd9Sstevel@tonic-gate 		 * Make the newly-read configuration the global one, and update
9617c478bd9Sstevel@tonic-gate 		 * any variables that depend on it.
9627c478bd9Sstevel@tonic-gate 		 */
9637c478bd9Sstevel@tonic-gate 		rcfg = rcfg_new;
9647c478bd9Sstevel@tonic-gate 		finish_configuration();
9657c478bd9Sstevel@tonic-gate 	}
9667c478bd9Sstevel@tonic-gate }
9677c478bd9Sstevel@tonic-gate 
9687c478bd9Sstevel@tonic-gate /*
969d75e6a5dStn143363  * First, examine changes, additions, and deletions to cap definitions.
970d75e6a5dStn143363  * Then, set the next event time.
9717c478bd9Sstevel@tonic-gate  */
9727c478bd9Sstevel@tonic-gate static void
reconfigure(hrtime_t now,hrtime_t * next_configuration,hrtime_t * next_proc_walk,hrtime_t * next_rss_sample)9730209230bSgjelinek reconfigure(hrtime_t now, hrtime_t *next_configuration,
9740209230bSgjelinek     hrtime_t *next_proc_walk, hrtime_t *next_rss_sample)
9757c478bd9Sstevel@tonic-gate {
9767c478bd9Sstevel@tonic-gate 	debug("reconfigure...\n");
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 	/*
9797c478bd9Sstevel@tonic-gate 	 * Walk the lcollection, marking active collections so inactive ones
9807c478bd9Sstevel@tonic-gate 	 * can be freed.
9817c478bd9Sstevel@tonic-gate 	 */
9827c478bd9Sstevel@tonic-gate 	list_walk_collection(collection_clear_cb, NULL);
9837c478bd9Sstevel@tonic-gate 	lcollection_update(LCU_ACTIVE_ONLY); /* mark */
9847c478bd9Sstevel@tonic-gate 	list_walk_collection(collection_sweep_cb, NULL);
9850209230bSgjelinek 
9860209230bSgjelinek 	*next_configuration = NEXT_EVENT_TIME(now,
9870209230bSgjelinek 	    rcfg.rcfg_reconfiguration_interval);
9880209230bSgjelinek 
9890209230bSgjelinek 	/*
9900209230bSgjelinek 	 * Reset each event time to the shorter of the previous and new
9910209230bSgjelinek 	 * intervals.
9920209230bSgjelinek 	 */
9930209230bSgjelinek 	if (next_report == 0 && rcfg.rcfg_report_interval > 0)
9940209230bSgjelinek 		next_report = now;
9950209230bSgjelinek 	else
9960209230bSgjelinek 		next_report = POSITIVE_MIN(next_report,
9970209230bSgjelinek 		    NEXT_REPORT_EVENT_TIME(now, rcfg.rcfg_report_interval));
9980209230bSgjelinek 
9990209230bSgjelinek 	if (*next_proc_walk == 0 && rcfg.rcfg_proc_walk_interval > 0)
10000209230bSgjelinek 		*next_proc_walk = now;
10010209230bSgjelinek 	else
10020209230bSgjelinek 		*next_proc_walk = POSITIVE_MIN(*next_proc_walk,
10030209230bSgjelinek 		    NEXT_EVENT_TIME(now, rcfg.rcfg_proc_walk_interval));
10040209230bSgjelinek 
10050209230bSgjelinek 	if (*next_rss_sample == 0 && rcfg.rcfg_rss_sample_interval > 0)
10060209230bSgjelinek 		*next_rss_sample = now;
10070209230bSgjelinek 	else
10080209230bSgjelinek 		*next_rss_sample = POSITIVE_MIN(*next_rss_sample,
10090209230bSgjelinek 		    NEXT_EVENT_TIME(now, rcfg.rcfg_rss_sample_interval));
10107c478bd9Sstevel@tonic-gate }
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate /*
1013d75e6a5dStn143363  * Respond to SIGHUP by triggering the rereading the configuration and cap
10147c478bd9Sstevel@tonic-gate  * definitions.
10157c478bd9Sstevel@tonic-gate  */
10167c478bd9Sstevel@tonic-gate /*ARGSUSED*/
10177c478bd9Sstevel@tonic-gate static void
sighup(int signal)10187c478bd9Sstevel@tonic-gate sighup(int signal)
10197c478bd9Sstevel@tonic-gate {
10207c478bd9Sstevel@tonic-gate 	should_reconfigure = 1;
10217c478bd9Sstevel@tonic-gate }
10227c478bd9Sstevel@tonic-gate 
10237c478bd9Sstevel@tonic-gate /*
10247c478bd9Sstevel@tonic-gate  * Print, for debugging purposes, each collection's interval statistics.
10257c478bd9Sstevel@tonic-gate  */
10267c478bd9Sstevel@tonic-gate /*ARGSUSED*/
10277c478bd9Sstevel@tonic-gate static int
simple_report_collection_cb(lcollection_t * lcol,void * arg)10287c478bd9Sstevel@tonic-gate simple_report_collection_cb(lcollection_t *lcol, void *arg)
10297c478bd9Sstevel@tonic-gate {
10307c478bd9Sstevel@tonic-gate #define	DELTA(field) \
10310209230bSgjelinek 	(unsigned long long)( \
10327c478bd9Sstevel@tonic-gate 	    (lcol->lcol_stat.field - lcol->lcol_stat_old.field))
10337c478bd9Sstevel@tonic-gate 
10347c478bd9Sstevel@tonic-gate 	debug("%s %s status: succeeded/attempted (k): %llu/%llu, "
10357c478bd9Sstevel@tonic-gate 	    "ineffective/scans/unenforced/samplings:  %llu/%llu/%llu/%llu, RSS "
10367c478bd9Sstevel@tonic-gate 	    "min/max (k): %llu/%llu, cap %llu kB, processes/thpt: %llu/%llu, "
10370209230bSgjelinek 	    "%llu scans over %llu ms\n",
10380209230bSgjelinek 	    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
10390209230bSgjelinek 	    lcol->lcol_name,
10407c478bd9Sstevel@tonic-gate 	    DELTA(lcols_pg_eff), DELTA(lcols_pg_att),
10417c478bd9Sstevel@tonic-gate 	    DELTA(lcols_scan_ineffective), DELTA(lcols_scan),
10427c478bd9Sstevel@tonic-gate 	    DELTA(lcols_unenforced_cap), DELTA(lcols_rss_sample),
10430209230bSgjelinek 	    (unsigned long long)lcol->lcol_stat.lcols_min_rss,
10440209230bSgjelinek 	    (unsigned long long)lcol->lcol_stat.lcols_max_rss,
10457c478bd9Sstevel@tonic-gate 	    (unsigned long long)lcol->lcol_rss_cap,
10467c478bd9Sstevel@tonic-gate 	    (unsigned long long)(lcol->lcol_stat.lcols_proc_in -
10477c478bd9Sstevel@tonic-gate 	    lcol->lcol_stat.lcols_proc_out), DELTA(lcols_proc_out),
1048*19449258SJosef 'Jeff' Sipek 	    DELTA(lcols_scan_count),
1049*19449258SJosef 'Jeff' Sipek 	    NSEC2MSEC(DELTA(lcols_scan_time_complete)));
10507c478bd9Sstevel@tonic-gate 
10517c478bd9Sstevel@tonic-gate #undef DELTA
10527c478bd9Sstevel@tonic-gate 
10537c478bd9Sstevel@tonic-gate 	return (0);
10547c478bd9Sstevel@tonic-gate }
10557c478bd9Sstevel@tonic-gate 
10567c478bd9Sstevel@tonic-gate /*
10577c478bd9Sstevel@tonic-gate  * Record each collection's interval statistics in the statistics file.
10587c478bd9Sstevel@tonic-gate  */
10597c478bd9Sstevel@tonic-gate static int
report_collection_cb(lcollection_t * lcol,void * arg)10607c478bd9Sstevel@tonic-gate report_collection_cb(lcollection_t *lcol, void *arg)
10617c478bd9Sstevel@tonic-gate {
10627c478bd9Sstevel@tonic-gate 	lcollection_report_t dc;
10637c478bd9Sstevel@tonic-gate 	int fd = (intptr_t)arg;
10647c478bd9Sstevel@tonic-gate 
10657c478bd9Sstevel@tonic-gate 	/*
10667c478bd9Sstevel@tonic-gate 	 * Copy the relevant fields to the collection's record.
10677c478bd9Sstevel@tonic-gate 	 */
10687c478bd9Sstevel@tonic-gate 	bzero(&dc, sizeof (dc));
10697c478bd9Sstevel@tonic-gate 	dc.lcol_id = lcol->lcol_id;
10707c478bd9Sstevel@tonic-gate 	(void) strcpy(dc.lcol_name, lcol->lcol_name);
10717c478bd9Sstevel@tonic-gate 	dc.lcol_rss = lcol->lcol_rss;
10727c478bd9Sstevel@tonic-gate 	dc.lcol_image_size = lcol->lcol_image_size;
10737c478bd9Sstevel@tonic-gate 	dc.lcol_rss_cap = lcol->lcol_rss_cap;
10747c478bd9Sstevel@tonic-gate 	dc.lcol_stat = lcol->lcol_stat;
10757c478bd9Sstevel@tonic-gate 
10767c478bd9Sstevel@tonic-gate 	if (write(fd, &dc, sizeof (dc)) == sizeof (dc)) {
10770209230bSgjelinek 		lcol->lcol_stat_old = lcol->lcol_stat;
10787c478bd9Sstevel@tonic-gate 	} else {
10790209230bSgjelinek 		debug("can't write %s %s statistics",
10800209230bSgjelinek 		    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
10810209230bSgjelinek 		    "project" : "zone"),
10827c478bd9Sstevel@tonic-gate 		    lcol->lcol_name);
10837c478bd9Sstevel@tonic-gate 	}
10847c478bd9Sstevel@tonic-gate 
10857c478bd9Sstevel@tonic-gate 	return (0);
10867c478bd9Sstevel@tonic-gate }
10877c478bd9Sstevel@tonic-gate 
10887c478bd9Sstevel@tonic-gate /*
10897c478bd9Sstevel@tonic-gate  * Determine the count of pages scanned by the global page scanner, obtained
10907c478bd9Sstevel@tonic-gate  * from the cpu_stat:*::scan kstats.  Return zero on success.
10917c478bd9Sstevel@tonic-gate  */
10927c478bd9Sstevel@tonic-gate static int
get_globally_scanned_pages(uint64_t * scannedp)10937c478bd9Sstevel@tonic-gate get_globally_scanned_pages(uint64_t *scannedp)
10947c478bd9Sstevel@tonic-gate {
10957c478bd9Sstevel@tonic-gate 	kstat_t *ksp;
10967c478bd9Sstevel@tonic-gate 	uint64_t scanned = 0;
10977c478bd9Sstevel@tonic-gate 
10987c478bd9Sstevel@tonic-gate 	if (kstat_chain_update(kctl) == -1) {
10997c478bd9Sstevel@tonic-gate 		warn(gettext("can't update kstat chain"));
11007c478bd9Sstevel@tonic-gate 		return (0);
11017c478bd9Sstevel@tonic-gate 	}
11027c478bd9Sstevel@tonic-gate 
11037c478bd9Sstevel@tonic-gate 	for (ksp = kctl->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
11047c478bd9Sstevel@tonic-gate 		if (strcmp(ksp->ks_module, "cpu_stat") == 0) {
11057c478bd9Sstevel@tonic-gate 			if (kstat_read(kctl, ksp, NULL) != -1) {
11067c478bd9Sstevel@tonic-gate 				scanned += ((cpu_stat_t *)
11077c478bd9Sstevel@tonic-gate 				    ksp->ks_data)->cpu_vminfo.scan;
11080209230bSgjelinek 			} else {
11097c478bd9Sstevel@tonic-gate 				return (-1);
11107c478bd9Sstevel@tonic-gate 			}
11117c478bd9Sstevel@tonic-gate 		}
11120209230bSgjelinek 	}
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate 	*scannedp = scanned;
11157c478bd9Sstevel@tonic-gate 	return (0);
11167c478bd9Sstevel@tonic-gate }
11177c478bd9Sstevel@tonic-gate 
11187c478bd9Sstevel@tonic-gate /*
11190209230bSgjelinek  * Determine if the global page scanner is running, during which no memory
11200209230bSgjelinek  * caps should be enforced, to prevent interference with the global page
11210209230bSgjelinek  * scanner.
11220209230bSgjelinek  */
11230209230bSgjelinek static boolean_t
is_global_scanner_running()11240209230bSgjelinek is_global_scanner_running()
11250209230bSgjelinek {
11260209230bSgjelinek 	/* measure delta in page scan count */
11270209230bSgjelinek 	static uint64_t new_sp = 0;
11280209230bSgjelinek 	static uint64_t old_sp = 0;
11290209230bSgjelinek 	boolean_t res = B_FALSE;
11300209230bSgjelinek 
11310209230bSgjelinek 	if (get_globally_scanned_pages(&new_sp) == 0) {
11320209230bSgjelinek 		if (old_sp != 0 && (new_sp - old_sp) > 0) {
11330209230bSgjelinek 			debug("global memory pressure detected (%llu "
11340209230bSgjelinek 			    "pages scanned since last interval)\n",
11350209230bSgjelinek 			    (unsigned long long)(new_sp - old_sp));
11360209230bSgjelinek 			res = B_TRUE;
11370209230bSgjelinek 		}
11380209230bSgjelinek 		old_sp = new_sp;
11390209230bSgjelinek 	} else {
11400209230bSgjelinek 		warn(gettext("unable to read cpu statistics"));
11410209230bSgjelinek 		new_sp = old_sp;
11420209230bSgjelinek 	}
11430209230bSgjelinek 
11440209230bSgjelinek 	return (res);
11450209230bSgjelinek }
11460209230bSgjelinek 
11470209230bSgjelinek /*
11480209230bSgjelinek  * If soft caps are in use, determine if global memory pressure exceeds the
11490209230bSgjelinek  * configured maximum above which soft caps are enforced.
11500209230bSgjelinek  */
11510209230bSgjelinek static boolean_t
must_enforce_soft_caps()11520209230bSgjelinek must_enforce_soft_caps()
11530209230bSgjelinek {
11540209230bSgjelinek 	/*
11550209230bSgjelinek 	 * Check for changes to the amount of installed physical memory, to
11560209230bSgjelinek 	 * compute the current memory pressure.
11570209230bSgjelinek 	 */
11580209230bSgjelinek 	update_phys_total();
11590209230bSgjelinek 
11600209230bSgjelinek 	memory_pressure = 100 - (int)((sysconf(_SC_AVPHYS_PAGES) * page_size_kb)
11610209230bSgjelinek 	    * 100.0 / phys_total);
11620209230bSgjelinek 	memory_pressure_sample++;
11630209230bSgjelinek 	if (rcfg.rcfg_memory_cap_enforcement_pressure > 0 &&
11640209230bSgjelinek 	    memory_pressure > rcfg.rcfg_memory_cap_enforcement_pressure) {
11650209230bSgjelinek 		return (B_TRUE);
11660209230bSgjelinek 	}
11670209230bSgjelinek 
11680209230bSgjelinek 	return (B_FALSE);
11690209230bSgjelinek }
11700209230bSgjelinek 
11710209230bSgjelinek /*
11727c478bd9Sstevel@tonic-gate  * Update the shared statistics file with each collection's current statistics.
11737c478bd9Sstevel@tonic-gate  * Return zero on success.
11747c478bd9Sstevel@tonic-gate  */
11757c478bd9Sstevel@tonic-gate static int
update_statistics(void)11767c478bd9Sstevel@tonic-gate update_statistics(void)
11777c478bd9Sstevel@tonic-gate {
11787c478bd9Sstevel@tonic-gate 	int fd, res;
11797c478bd9Sstevel@tonic-gate 	static char template[LINELEN];
118094a877c4Sgm149974 
118194a877c4Sgm149974 	/*
1182c4d5c63eSgm149974 	 * Try to create a directory irrespective of whether it is existing
1183c4d5c63eSgm149974 	 * or not. If it is not there then it will create. Otherwise any way
1184c4d5c63eSgm149974 	 * it will fail at mkstemp call below.
1185c4d5c63eSgm149974 	 */
1186c4d5c63eSgm149974 	(void) mkdir(STAT_FILE_DIR, 0755);
1187c4d5c63eSgm149974 
1188c4d5c63eSgm149974 	/*
11897c478bd9Sstevel@tonic-gate 	 * Create a temporary file.
11907c478bd9Sstevel@tonic-gate 	 */
11917c478bd9Sstevel@tonic-gate 	if (sizeof (template) < (strlen(rcfg.rcfg_stat_file) +
11927c478bd9Sstevel@tonic-gate 	    strlen(STAT_TEMPLATE_SUFFIX) + 1)) {
11937c478bd9Sstevel@tonic-gate 		debug("temporary file template size too small\n");
11947c478bd9Sstevel@tonic-gate 		return (-1);
11957c478bd9Sstevel@tonic-gate 	}
11967c478bd9Sstevel@tonic-gate 	(void) strcpy(template, rcfg.rcfg_stat_file);
11977c478bd9Sstevel@tonic-gate 	(void) strcat(template, STAT_TEMPLATE_SUFFIX);
11987c478bd9Sstevel@tonic-gate 	(void) rfd_reserve(1);
11997c478bd9Sstevel@tonic-gate 	fd = mkstemp(template);
12007c478bd9Sstevel@tonic-gate 
12017c478bd9Sstevel@tonic-gate 	/*
12027c478bd9Sstevel@tonic-gate 	 * Write the header and per-collection statistics.
12037c478bd9Sstevel@tonic-gate 	 */
12047c478bd9Sstevel@tonic-gate 	if (fd >= 0) {
12057c478bd9Sstevel@tonic-gate 		rcapd_stat_hdr_t rs;
12067c478bd9Sstevel@tonic-gate 
12077c478bd9Sstevel@tonic-gate 		rs.rs_pid = rcapd_pid;
12087c478bd9Sstevel@tonic-gate 		rs.rs_time = gethrtime();
12097c478bd9Sstevel@tonic-gate 		ASSERT(sizeof (rs.rs_mode) > strlen(rcfg.rcfg_mode_name));
12107c478bd9Sstevel@tonic-gate 		(void) strcpy(rs.rs_mode, rcfg.rcfg_mode_name);
12117c478bd9Sstevel@tonic-gate 		rs.rs_pressure_cur = memory_pressure;
12127c478bd9Sstevel@tonic-gate 		rs.rs_pressure_cap = rcfg.rcfg_memory_cap_enforcement_pressure;
12137c478bd9Sstevel@tonic-gate 		rs.rs_pressure_sample = memory_pressure_sample;
12147c478bd9Sstevel@tonic-gate 
12157c478bd9Sstevel@tonic-gate 		if (fchmod(fd, 0644) == 0 && write(fd, &rs, sizeof (rs)) ==
12167c478bd9Sstevel@tonic-gate 		    sizeof (rs)) {
12177c478bd9Sstevel@tonic-gate 			list_walk_collection(report_collection_cb,
12187c478bd9Sstevel@tonic-gate 			    (void *)(intptr_t)fd);
12197c478bd9Sstevel@tonic-gate 			/*
12207c478bd9Sstevel@tonic-gate 			 * Replace the existing statistics file with this new
12217c478bd9Sstevel@tonic-gate 			 * one.
12227c478bd9Sstevel@tonic-gate 			 */
12237c478bd9Sstevel@tonic-gate 			res = rename(template, rcfg.rcfg_stat_file);
12247c478bd9Sstevel@tonic-gate 		} else
12257c478bd9Sstevel@tonic-gate 			res = -1;
12267c478bd9Sstevel@tonic-gate 		(void) close(fd);
12277c478bd9Sstevel@tonic-gate 	} else
12287c478bd9Sstevel@tonic-gate 		res = -1;
12297c478bd9Sstevel@tonic-gate 
12307c478bd9Sstevel@tonic-gate 	return (res);
12317c478bd9Sstevel@tonic-gate }
12327c478bd9Sstevel@tonic-gate 
12337c478bd9Sstevel@tonic-gate /*
12347c478bd9Sstevel@tonic-gate  * Verify the statistics file can be created and written to, and die if an
12357c478bd9Sstevel@tonic-gate  * existing file may be in use by another rcapd.
12367c478bd9Sstevel@tonic-gate  */
12377c478bd9Sstevel@tonic-gate static int
verify_statistics(void)12387c478bd9Sstevel@tonic-gate verify_statistics(void)
12397c478bd9Sstevel@tonic-gate {
12407c478bd9Sstevel@tonic-gate 	pid_t pid;
12417c478bd9Sstevel@tonic-gate 
12427c478bd9Sstevel@tonic-gate 	/*
12437c478bd9Sstevel@tonic-gate 	 * Warn if another instance of rcapd might be active.
12447c478bd9Sstevel@tonic-gate 	 */
12457c478bd9Sstevel@tonic-gate 	(void) rfd_reserve(1);
12467c478bd9Sstevel@tonic-gate 	pid = stat_get_rcapd_pid(rcfg.rcfg_stat_file);
12477c478bd9Sstevel@tonic-gate 	if (pid != rcapd_pid && pid != -1)
12487c478bd9Sstevel@tonic-gate 		die(gettext("%s exists; rcapd may already be active\n"),
12497c478bd9Sstevel@tonic-gate 		    rcfg.rcfg_stat_file);
12507c478bd9Sstevel@tonic-gate 
12517c478bd9Sstevel@tonic-gate 	return (update_statistics());
12527c478bd9Sstevel@tonic-gate }
12537c478bd9Sstevel@tonic-gate 
12547c478bd9Sstevel@tonic-gate static int
sum_excess_cb(lcollection_t * lcol,void * arg)12557c478bd9Sstevel@tonic-gate sum_excess_cb(lcollection_t *lcol, void *arg)
12567c478bd9Sstevel@tonic-gate {
12577c478bd9Sstevel@tonic-gate 	uint64_t *sum_excess = arg;
12587c478bd9Sstevel@tonic-gate 
12597c478bd9Sstevel@tonic-gate 	*sum_excess += MAX((int64_t)0, (int64_t)(lcol->lcol_rss -
12607c478bd9Sstevel@tonic-gate 	    lcol->lcol_rss_cap));
12617c478bd9Sstevel@tonic-gate 	return (0);
12627c478bd9Sstevel@tonic-gate }
12637c478bd9Sstevel@tonic-gate 
12640209230bSgjelinek /*
12650209230bSgjelinek  * Compute the quantity of memory (in kilobytes) above the cap enforcement
12660209230bSgjelinek  * pressure.  Set the scan goal to that quantity (or at most the excess).
12670209230bSgjelinek  */
12680209230bSgjelinek static void
compute_soft_scan_goal(soft_scan_arg_t * argp)12690209230bSgjelinek compute_soft_scan_goal(soft_scan_arg_t *argp)
12700209230bSgjelinek {
12710209230bSgjelinek 	/*
12720209230bSgjelinek 	 * Compute the sum of the collections' excesses, which will be the
12730209230bSgjelinek 	 * denominator.
12740209230bSgjelinek 	 */
12750209230bSgjelinek 	argp->ssa_sum_excess = 0;
12760209230bSgjelinek 	list_walk_collection(sum_excess_cb, &(argp->ssa_sum_excess));
12770209230bSgjelinek 
12780209230bSgjelinek 	argp->ssa_scan_goal = MIN((sysconf(_SC_PHYS_PAGES) *
12790209230bSgjelinek 	    (100 - rcfg.rcfg_memory_cap_enforcement_pressure) / 100 -
12800209230bSgjelinek 	    sysconf(_SC_AVPHYS_PAGES)) * page_size_kb,
12810209230bSgjelinek 	    argp->ssa_sum_excess);
12820209230bSgjelinek }
12830209230bSgjelinek 
12847c478bd9Sstevel@tonic-gate static void
rcapd_usage(void)12857c478bd9Sstevel@tonic-gate rcapd_usage(void)
12867c478bd9Sstevel@tonic-gate {
12877c478bd9Sstevel@tonic-gate 	info(gettext("usage: rcapd [-d]\n"));
12887c478bd9Sstevel@tonic-gate }
12897c478bd9Sstevel@tonic-gate 
12907c478bd9Sstevel@tonic-gate void
check_update_statistics(void)12917c478bd9Sstevel@tonic-gate check_update_statistics(void)
12927c478bd9Sstevel@tonic-gate {
12937c478bd9Sstevel@tonic-gate 	hrtime_t now = gethrtime();
12947c478bd9Sstevel@tonic-gate 
12957c478bd9Sstevel@tonic-gate 	if (EVENT_TIME(now, next_report)) {
12967c478bd9Sstevel@tonic-gate 		debug("updating statistics...\n");
12977c478bd9Sstevel@tonic-gate 		list_walk_collection(simple_report_collection_cb, NULL);
12987c478bd9Sstevel@tonic-gate 		if (update_statistics() != 0)
12997c478bd9Sstevel@tonic-gate 			debug("couldn't update statistics");
13007c478bd9Sstevel@tonic-gate 		next_report = NEXT_REPORT_EVENT_TIME(now,
13017c478bd9Sstevel@tonic-gate 		    rcfg.rcfg_report_interval);
13027c478bd9Sstevel@tonic-gate 	}
13037c478bd9Sstevel@tonic-gate }
13047c478bd9Sstevel@tonic-gate 
13057c478bd9Sstevel@tonic-gate static void
verify_and_set_privileges(void)13067c478bd9Sstevel@tonic-gate verify_and_set_privileges(void)
13077c478bd9Sstevel@tonic-gate {
13087c478bd9Sstevel@tonic-gate 	priv_set_t *required =
13097c478bd9Sstevel@tonic-gate 	    priv_str_to_set("zone,sys_resource,proc_owner", ",", NULL);
13107c478bd9Sstevel@tonic-gate 
13117c478bd9Sstevel@tonic-gate 	/*
13127c478bd9Sstevel@tonic-gate 	 * Ensure the required privileges, suitable for controlling processes,
13137c478bd9Sstevel@tonic-gate 	 * are possessed.
13147c478bd9Sstevel@tonic-gate 	 */
13157c478bd9Sstevel@tonic-gate 	if (setppriv(PRIV_SET, PRIV_PERMITTED, required) != 0 || setppriv(
13167c478bd9Sstevel@tonic-gate 	    PRIV_SET, PRIV_EFFECTIVE, required) != 0)
13177c478bd9Sstevel@tonic-gate 		die(gettext("can't set requisite privileges"));
13187c478bd9Sstevel@tonic-gate 
13197c478bd9Sstevel@tonic-gate 	/*
13207c478bd9Sstevel@tonic-gate 	 * Ensure access to /var/run/daemon.
13217c478bd9Sstevel@tonic-gate 	 */
13227c478bd9Sstevel@tonic-gate 	if (setreuid(DAEMON_UID, DAEMON_UID) != 0)
13237c478bd9Sstevel@tonic-gate 		die(gettext("cannot become user daemon"));
13247c478bd9Sstevel@tonic-gate 
13257c478bd9Sstevel@tonic-gate 	priv_freeset(required);
13267c478bd9Sstevel@tonic-gate }
13277c478bd9Sstevel@tonic-gate 
13280209230bSgjelinek /*
13290209230bSgjelinek  * This function does the top-level work to determine if we should do any
13300209230bSgjelinek  * memory capping, and if so, it invokes the right call-backs to do the work.
13310209230bSgjelinek  */
13320209230bSgjelinek static void
do_capping(hrtime_t now,hrtime_t * next_proc_walk)13330209230bSgjelinek do_capping(hrtime_t now, hrtime_t *next_proc_walk)
13340209230bSgjelinek {
13350209230bSgjelinek 	boolean_t enforce_caps;
13360209230bSgjelinek 	/* soft cap enforcement flag, depending on memory pressure */
13370209230bSgjelinek 	boolean_t enforce_soft_caps;
13380209230bSgjelinek 	/* avoid interference with kernel's page scanner */
13390209230bSgjelinek 	boolean_t global_scanner_running;
13400209230bSgjelinek 	sample_col_arg_t col_arg;
13410209230bSgjelinek 	soft_scan_arg_t arg;
13420209230bSgjelinek 	uint_t col_types = 0;
13430209230bSgjelinek 
13440209230bSgjelinek 	/* check what kind of collections (project/zone) are capped */
13450209230bSgjelinek 	list_walk_collection(col_type_cb, &col_types);
13460209230bSgjelinek 	debug("collection types: 0x%x\n", col_types);
13470209230bSgjelinek 
13480209230bSgjelinek 	/* no capped collections, skip checking rss */
13490209230bSgjelinek 	if (col_types == 0)
13500209230bSgjelinek 		return;
13510209230bSgjelinek 
13520209230bSgjelinek 	/* Determine if soft caps are enforced. */
13530209230bSgjelinek 	enforce_soft_caps = must_enforce_soft_caps();
13540209230bSgjelinek 
13550209230bSgjelinek 	/* Determine if the global page scanner is running. */
13560209230bSgjelinek 	global_scanner_running = is_global_scanner_running();
13570209230bSgjelinek 
13580209230bSgjelinek 	/*
13590209230bSgjelinek 	 * Sample collections' member processes RSSes and recompute
13600209230bSgjelinek 	 * collections' excess.
13610209230bSgjelinek 	 */
13620209230bSgjelinek 	rss_sample(B_FALSE, col_types);
13630209230bSgjelinek 
13640209230bSgjelinek 	col_arg.sca_any_over_cap = B_FALSE;
13650209230bSgjelinek 	col_arg.sca_project_over_cap = B_FALSE;
13660209230bSgjelinek 	list_walk_collection(rss_sample_col_cb, &col_arg);
13670209230bSgjelinek 	list_walk_collection(excess_print_cb, NULL);
13680209230bSgjelinek 	debug("any collection/project over cap = %d, %d\n",
13690209230bSgjelinek 	    col_arg.sca_any_over_cap, col_arg.sca_project_over_cap);
13700209230bSgjelinek 
13710209230bSgjelinek 	if (enforce_soft_caps)
13720209230bSgjelinek 		debug("memory pressure %d%%\n", memory_pressure);
13730209230bSgjelinek 
13740209230bSgjelinek 	/*
13750209230bSgjelinek 	 * Cap enforcement is determined by the previous conditions.
13760209230bSgjelinek 	 */
13770209230bSgjelinek 	enforce_caps = !global_scanner_running && col_arg.sca_any_over_cap &&
13780209230bSgjelinek 	    (rcfg.rcfg_memory_cap_enforcement_pressure == 0 ||
13790209230bSgjelinek 	    enforce_soft_caps);
13800209230bSgjelinek 
13810209230bSgjelinek 	debug("%senforcing caps\n", enforce_caps ? "" : "not ");
13820209230bSgjelinek 
13830209230bSgjelinek 	/*
13840209230bSgjelinek 	 * If soft caps are in use, determine the size of the portion from each
13850209230bSgjelinek 	 * collection to scan for.
13860209230bSgjelinek 	 */
13870209230bSgjelinek 	if (enforce_caps && enforce_soft_caps)
13880209230bSgjelinek 		compute_soft_scan_goal(&arg);
13890209230bSgjelinek 
13900209230bSgjelinek 	/*
13910209230bSgjelinek 	 * Victimize offending collections.
13920209230bSgjelinek 	 */
13930209230bSgjelinek 	if (enforce_caps && (!enforce_soft_caps ||
13940209230bSgjelinek 	    (arg.ssa_scan_goal > 0 && arg.ssa_sum_excess > 0))) {
13950209230bSgjelinek 
13960209230bSgjelinek 		/*
13970209230bSgjelinek 		 * Since at least one collection is over its cap & needs
13980209230bSgjelinek 		 * enforcing, check if it is at least time for a process walk
13990209230bSgjelinek 		 * (we could be well past time since we only walk /proc when
14000209230bSgjelinek 		 * we need to) and if so, update each collections process list
14010209230bSgjelinek 		 * in a single pass through /proc.
14020209230bSgjelinek 		 */
14030209230bSgjelinek 		if (EVENT_TIME(now, *next_proc_walk)) {
14040209230bSgjelinek 			debug("scanning process list...\n");
14050209230bSgjelinek 			proc_walk_all(proc_cb);		 /* insert & mark */
14060209230bSgjelinek 			list_walk_all(sweep_process_cb); /* free dead procs */
14070209230bSgjelinek 			*next_proc_walk = NEXT_EVENT_TIME(now,
14080209230bSgjelinek 			    rcfg.rcfg_proc_walk_interval);
14090209230bSgjelinek 		}
14100209230bSgjelinek 
14110209230bSgjelinek 		gz_col = NULL;
14120209230bSgjelinek 		if (enforce_soft_caps) {
14130209230bSgjelinek 			debug("scan goal is %lldKB\n",
14140209230bSgjelinek 			    (long long)arg.ssa_scan_goal);
14150209230bSgjelinek 			list_walk_collection(soft_scan_cb, &arg);
14160209230bSgjelinek 			if (gz_capped && gz_col != NULL) {
14170209230bSgjelinek 				/* process global zone */
14180209230bSgjelinek 				arg.ssa_project_over_cap =
14190209230bSgjelinek 				    col_arg.sca_project_over_cap;
14200209230bSgjelinek 				soft_scan_gz(gz_col, &arg);
14210209230bSgjelinek 			}
14220209230bSgjelinek 		} else {
14230209230bSgjelinek 			list_walk_collection(scan_cb, NULL);
14240209230bSgjelinek 			if (gz_capped && gz_col != NULL) {
14250209230bSgjelinek 				/* process global zone */
14260209230bSgjelinek 				scan_gz(gz_col, col_arg.sca_project_over_cap);
14270209230bSgjelinek 			}
14280209230bSgjelinek 		}
14290209230bSgjelinek 	} else if (col_arg.sca_any_over_cap) {
14300209230bSgjelinek 		list_walk_collection(unenforced_cap_cb, NULL);
14310209230bSgjelinek 	}
14320209230bSgjelinek }
14330209230bSgjelinek 
14347c478bd9Sstevel@tonic-gate int
main(int argc,char * argv[])14357c478bd9Sstevel@tonic-gate main(int argc, char *argv[])
14367c478bd9Sstevel@tonic-gate {
14377c478bd9Sstevel@tonic-gate 	int res;
14387c478bd9Sstevel@tonic-gate 	int should_fork = 1;	/* fork flag */
14397c478bd9Sstevel@tonic-gate 	hrtime_t now;		/* current time */
14407c478bd9Sstevel@tonic-gate 	hrtime_t next;		/* time of next event */
14417c478bd9Sstevel@tonic-gate 	int sig;		/* signal iteration */
14427c478bd9Sstevel@tonic-gate 	struct rlimit rl;
14437c478bd9Sstevel@tonic-gate 	hrtime_t next_proc_walk;	/* time of next /proc scan */
14447c478bd9Sstevel@tonic-gate 	hrtime_t next_configuration;	/* time of next configuration */
14457c478bd9Sstevel@tonic-gate 	hrtime_t next_rss_sample;	/* (latest) time of next RSS sample */
14467c478bd9Sstevel@tonic-gate 
14477c478bd9Sstevel@tonic-gate 	(void) set_message_priority(RCM_INFO);
144823a1cceaSRoger A. Faulkner 	(void) setpname("rcapd");
14497c478bd9Sstevel@tonic-gate 	rcapd_pid = getpid();
14507c478bd9Sstevel@tonic-gate 	(void) chdir("/");
14517c478bd9Sstevel@tonic-gate 	should_run = 1;
14527c478bd9Sstevel@tonic-gate 	ever_ran = 0;
14537c478bd9Sstevel@tonic-gate 
14547c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_ALL, "");
14557c478bd9Sstevel@tonic-gate 	(void) textdomain(TEXT_DOMAIN);
14567c478bd9Sstevel@tonic-gate 
14577c478bd9Sstevel@tonic-gate 	/*
14587c478bd9Sstevel@tonic-gate 	 * Parse command-line options.
14597c478bd9Sstevel@tonic-gate 	 */
14607c478bd9Sstevel@tonic-gate 	while ((res = getopt(argc, argv, "dF")) > 0)
14617c478bd9Sstevel@tonic-gate 		switch (res) {
14627c478bd9Sstevel@tonic-gate 		case 'd':
14637c478bd9Sstevel@tonic-gate 			should_fork = 0;
14647c478bd9Sstevel@tonic-gate 			if (debug_mode == 0) {
14657c478bd9Sstevel@tonic-gate 				debug_mode = 1;
14667c478bd9Sstevel@tonic-gate 				(void) set_message_priority(RCM_DEBUG);
14677c478bd9Sstevel@tonic-gate 			} else
14687c478bd9Sstevel@tonic-gate 				(void) set_message_priority(RCM_DEBUG_HIGH);
14697c478bd9Sstevel@tonic-gate 			break;
14707c478bd9Sstevel@tonic-gate 		case 'F':
14717c478bd9Sstevel@tonic-gate 			should_fork = 0;
14727c478bd9Sstevel@tonic-gate 			break;
14737c478bd9Sstevel@tonic-gate 		default:
14747c478bd9Sstevel@tonic-gate 			rcapd_usage();
14757c478bd9Sstevel@tonic-gate 			return (E_USAGE);
14767c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
14777c478bd9Sstevel@tonic-gate 		}
14787c478bd9Sstevel@tonic-gate 
14797c478bd9Sstevel@tonic-gate 	/*
1480d75e6a5dStn143363 	 * Read the configuration.
1481d75e6a5dStn143363 	 */
1482d75e6a5dStn143363 	if (rcfg_read(&rcfg, verify_statistics) != E_SUCCESS) {
1483d75e6a5dStn143363 		warn(gettext("resource caps not configured\n"));
1484d75e6a5dStn143363 		return (SMF_EXIT_ERR_CONFIG);
1485d75e6a5dStn143363 	}
1486d75e6a5dStn143363 
1487d75e6a5dStn143363 	/*
14887c478bd9Sstevel@tonic-gate 	 * If not debugging, fork and continue operating, changing the
14897c478bd9Sstevel@tonic-gate 	 * destination of messages to syslog().
14907c478bd9Sstevel@tonic-gate 	 */
14917c478bd9Sstevel@tonic-gate 	if (should_fork == 1) {
14927c478bd9Sstevel@tonic-gate 		pid_t child;
14937c478bd9Sstevel@tonic-gate 		debug("forking\n");
14947c478bd9Sstevel@tonic-gate 		child = fork();
14957c478bd9Sstevel@tonic-gate 		if (child == -1)
14967c478bd9Sstevel@tonic-gate 			die(gettext("cannot fork"));
14977c478bd9Sstevel@tonic-gate 		if (child > 0)
14987c478bd9Sstevel@tonic-gate 			return (0);
14997c478bd9Sstevel@tonic-gate 		else {
15007c478bd9Sstevel@tonic-gate 			rcapd_pid = getpid();
15017c478bd9Sstevel@tonic-gate 			(void) set_message_destination(RCD_SYSLOG);
15027c478bd9Sstevel@tonic-gate 			(void) fclose(stdin);
15037c478bd9Sstevel@tonic-gate 			(void) fclose(stdout);
15047c478bd9Sstevel@tonic-gate 			(void) fclose(stderr);
15057c478bd9Sstevel@tonic-gate 		}
15067c478bd9Sstevel@tonic-gate 		/*
15077c478bd9Sstevel@tonic-gate 		 * Start a new session and detatch from the controlling tty.
15087c478bd9Sstevel@tonic-gate 		 */
15097c478bd9Sstevel@tonic-gate 		if (setsid() == (pid_t)-1)
15107c478bd9Sstevel@tonic-gate 			debug(gettext("setsid() failed; cannot detach from "
15117c478bd9Sstevel@tonic-gate 			    "terminal"));
15127c478bd9Sstevel@tonic-gate 	}
15137c478bd9Sstevel@tonic-gate 
15147c478bd9Sstevel@tonic-gate 	finish_configuration();
15157c478bd9Sstevel@tonic-gate 	should_reconfigure = 0;
15167c478bd9Sstevel@tonic-gate 
15177c478bd9Sstevel@tonic-gate 	/*
15187c478bd9Sstevel@tonic-gate 	 * Check that required privileges are possessed.
15197c478bd9Sstevel@tonic-gate 	 */
15207c478bd9Sstevel@tonic-gate 	verify_and_set_privileges();
15217c478bd9Sstevel@tonic-gate 
15227c478bd9Sstevel@tonic-gate 	now = next_report = next_proc_walk = next_rss_sample = gethrtime();
15237c478bd9Sstevel@tonic-gate 	next_configuration = NEXT_EVENT_TIME(gethrtime(),
15247c478bd9Sstevel@tonic-gate 	    rcfg.rcfg_reconfiguration_interval);
15257c478bd9Sstevel@tonic-gate 
15267c478bd9Sstevel@tonic-gate 	/*
15277c478bd9Sstevel@tonic-gate 	 * Open the kstat chain.
15287c478bd9Sstevel@tonic-gate 	 */
15297c478bd9Sstevel@tonic-gate 	kctl = kstat_open();
15307c478bd9Sstevel@tonic-gate 	if (kctl == NULL)
15317c478bd9Sstevel@tonic-gate 		die(gettext("can't open kstats"));
15327c478bd9Sstevel@tonic-gate 
15337c478bd9Sstevel@tonic-gate 	/*
15347c478bd9Sstevel@tonic-gate 	 * Set RLIMIT_NOFILE as high as practical, so roughly 10K processes can
15357c478bd9Sstevel@tonic-gate 	 * be effectively managed without revoking descriptors (at 3 per
15367c478bd9Sstevel@tonic-gate 	 * process).
15377c478bd9Sstevel@tonic-gate 	 */
15387c478bd9Sstevel@tonic-gate 	rl.rlim_cur = 32 * 1024;
15397c478bd9Sstevel@tonic-gate 	rl.rlim_max = 32 * 1024;
15407c478bd9Sstevel@tonic-gate 	if (setrlimit(RLIMIT_NOFILE, &rl) != 0 &&
15417c478bd9Sstevel@tonic-gate 	    getrlimit(RLIMIT_NOFILE, &rl) == 0) {
15427c478bd9Sstevel@tonic-gate 		rl.rlim_cur = rl.rlim_max;
15437c478bd9Sstevel@tonic-gate 		(void) setrlimit(RLIMIT_NOFILE, &rl);
15447c478bd9Sstevel@tonic-gate 	}
1545004388ebScasper 	(void) enable_extended_FILE_stdio(-1, -1);
1546004388ebScasper 
15477c478bd9Sstevel@tonic-gate 	if (getrlimit(RLIMIT_NOFILE, &rl) == 0)
15487c478bd9Sstevel@tonic-gate 		debug("fd limit: %lu\n", rl.rlim_cur);
15497c478bd9Sstevel@tonic-gate 	else
15507c478bd9Sstevel@tonic-gate 		debug("fd limit: unknown\n");
15517c478bd9Sstevel@tonic-gate 
15520209230bSgjelinek 	get_page_size();
15530209230bSgjelinek 	my_zoneid = getzoneid();
15540209230bSgjelinek 
15557c478bd9Sstevel@tonic-gate 	/*
15567c478bd9Sstevel@tonic-gate 	 * Handle those signals whose (default) exit disposition
15577c478bd9Sstevel@tonic-gate 	 * prevents rcapd from finishing scanning before terminating.
15587c478bd9Sstevel@tonic-gate 	 */
15597c478bd9Sstevel@tonic-gate 	(void) sigset(SIGINT, terminate_signal);
15607c478bd9Sstevel@tonic-gate 	(void) sigset(SIGQUIT, abort_signal);
15617c478bd9Sstevel@tonic-gate 	(void) sigset(SIGILL, abort_signal);
15627c478bd9Sstevel@tonic-gate 	(void) sigset(SIGEMT, abort_signal);
15637c478bd9Sstevel@tonic-gate 	(void) sigset(SIGFPE, abort_signal);
15647c478bd9Sstevel@tonic-gate 	(void) sigset(SIGBUS, abort_signal);
15657c478bd9Sstevel@tonic-gate 	(void) sigset(SIGSEGV, abort_signal);
15667c478bd9Sstevel@tonic-gate 	(void) sigset(SIGSYS, abort_signal);
15677c478bd9Sstevel@tonic-gate 	(void) sigset(SIGPIPE, terminate_signal);
15687c478bd9Sstevel@tonic-gate 	(void) sigset(SIGALRM, terminate_signal);
15697c478bd9Sstevel@tonic-gate 	(void) sigset(SIGTERM, terminate_signal);
15707c478bd9Sstevel@tonic-gate 	(void) sigset(SIGUSR1, terminate_signal);
15717c478bd9Sstevel@tonic-gate 	(void) sigset(SIGUSR2, terminate_signal);
15727c478bd9Sstevel@tonic-gate 	(void) sigset(SIGPOLL, terminate_signal);
15737c478bd9Sstevel@tonic-gate 	(void) sigset(SIGVTALRM, terminate_signal);
15747c478bd9Sstevel@tonic-gate 	(void) sigset(SIGXCPU, abort_signal);
15757c478bd9Sstevel@tonic-gate 	(void) sigset(SIGXFSZ, abort_signal);
15767c478bd9Sstevel@tonic-gate 	for (sig = SIGRTMIN; sig <= SIGRTMAX; sig++)
15777c478bd9Sstevel@tonic-gate 		(void) sigset(sig, terminate_signal);
15787c478bd9Sstevel@tonic-gate 
15797c478bd9Sstevel@tonic-gate 	/*
15807c478bd9Sstevel@tonic-gate 	 * Install a signal handler for reconfiguration processing.
15817c478bd9Sstevel@tonic-gate 	 */
15827c478bd9Sstevel@tonic-gate 	(void) sigset(SIGHUP, sighup);
15837c478bd9Sstevel@tonic-gate 
15847c478bd9Sstevel@tonic-gate 	/*
15857c478bd9Sstevel@tonic-gate 	 * Determine which process collections to cap.
15867c478bd9Sstevel@tonic-gate 	 */
15877c478bd9Sstevel@tonic-gate 	lcollection_update(LCU_COMPLETE);
15887c478bd9Sstevel@tonic-gate 
15897c478bd9Sstevel@tonic-gate 	/*
15907c478bd9Sstevel@tonic-gate 	 * Loop forever, monitoring collections' resident set sizes and
15910209230bSgjelinek 	 * enforcing their caps.  Look for changes in caps as well as
15920209230bSgjelinek 	 * responding to requests to reread the configuration.  Update
15930209230bSgjelinek 	 * per-collection statistics periodically.
15947c478bd9Sstevel@tonic-gate 	 */
15957c478bd9Sstevel@tonic-gate 	while (should_run != 0) {
15967c478bd9Sstevel@tonic-gate 		struct timespec ts;
15977c478bd9Sstevel@tonic-gate 
15987c478bd9Sstevel@tonic-gate 		/*
15997c478bd9Sstevel@tonic-gate 		 * Announce that rcapd is starting.
16007c478bd9Sstevel@tonic-gate 		 */
16017c478bd9Sstevel@tonic-gate 		if (ever_ran == 0) {
16027c478bd9Sstevel@tonic-gate 			info(gettext("starting\n"));
16037c478bd9Sstevel@tonic-gate 			ever_ran = 1;
16047c478bd9Sstevel@tonic-gate 		}
16057c478bd9Sstevel@tonic-gate 
16067c478bd9Sstevel@tonic-gate 		/*
16070209230bSgjelinek 		 * Check the configuration at every next_configuration interval.
16080209230bSgjelinek 		 * Update the rss data once every next_rss_sample interval.
16090209230bSgjelinek 		 * The condition of global memory pressure is also checked at
16100209230bSgjelinek 		 * the same frequency, if strict caps are in use.
16117c478bd9Sstevel@tonic-gate 		 */
16127c478bd9Sstevel@tonic-gate 		now = gethrtime();
16137c478bd9Sstevel@tonic-gate 
16147c478bd9Sstevel@tonic-gate 		/*
1615d75e6a5dStn143363 		 * Detect configuration and cap changes only when SIGHUP
1616d75e6a5dStn143363 		 * is received. Call reconfigure to apply new configuration
1617d75e6a5dStn143363 		 * parameters.
16187c478bd9Sstevel@tonic-gate 		 */
1619d75e6a5dStn143363 		if (should_reconfigure == 1) {
1620d75e6a5dStn143363 			reread_configuration();
1621d75e6a5dStn143363 			should_reconfigure = 0;
16220209230bSgjelinek 			reconfigure(now, &next_configuration, &next_proc_walk,
16230209230bSgjelinek 			    &next_rss_sample);
1624d75e6a5dStn143363 		}
1625d75e6a5dStn143363 
1626d75e6a5dStn143363 		if (EVENT_TIME(now, next_configuration)) {
1627d75e6a5dStn143363 			reconfigure(now, &next_configuration, &next_proc_walk,
1628d75e6a5dStn143363 			    &next_rss_sample);
16297c478bd9Sstevel@tonic-gate 		}
16307c478bd9Sstevel@tonic-gate 
16310209230bSgjelinek 		/*
16320209230bSgjelinek 		 * Do the main work for enforcing caps.
16330209230bSgjelinek 		 */
16347c478bd9Sstevel@tonic-gate 		if (EVENT_TIME(now, next_rss_sample)) {
16350209230bSgjelinek 			do_capping(now, &next_proc_walk);
16367c478bd9Sstevel@tonic-gate 
16377c478bd9Sstevel@tonic-gate 			next_rss_sample = NEXT_EVENT_TIME(now,
16387c478bd9Sstevel@tonic-gate 			    rcfg.rcfg_rss_sample_interval);
16397c478bd9Sstevel@tonic-gate 		}
16407c478bd9Sstevel@tonic-gate 
16417c478bd9Sstevel@tonic-gate 		/*
16427c478bd9Sstevel@tonic-gate 		 * Update the statistics file, if it's time.
16437c478bd9Sstevel@tonic-gate 		 */
16447c478bd9Sstevel@tonic-gate 		check_update_statistics();
16457c478bd9Sstevel@tonic-gate 
16467c478bd9Sstevel@tonic-gate 		/*
16477c478bd9Sstevel@tonic-gate 		 * Sleep for some time before repeating.
16487c478bd9Sstevel@tonic-gate 		 */
16497c478bd9Sstevel@tonic-gate 		now = gethrtime();
16507c478bd9Sstevel@tonic-gate 		next = next_configuration;
16517c478bd9Sstevel@tonic-gate 		next = POSITIVE_MIN(next, next_report);
16527c478bd9Sstevel@tonic-gate 		next = POSITIVE_MIN(next, next_rss_sample);
16537c478bd9Sstevel@tonic-gate 		if (next > now && should_run != 0) {
16547c478bd9Sstevel@tonic-gate 			debug("sleeping %-4.2f seconds\n", (float)(next -
16557c478bd9Sstevel@tonic-gate 			    now) / (float)NANOSEC);
16567c478bd9Sstevel@tonic-gate 			hrt2ts(next - now, &ts);
16577c478bd9Sstevel@tonic-gate 			(void) nanosleep(&ts, NULL);
16587c478bd9Sstevel@tonic-gate 		}
16597c478bd9Sstevel@tonic-gate 	}
16607c478bd9Sstevel@tonic-gate 	if (termination_signal != 0)
16617c478bd9Sstevel@tonic-gate 		debug("exiting due to signal %d\n", termination_signal);
16627c478bd9Sstevel@tonic-gate 	if (ever_ran != 0)
16637c478bd9Sstevel@tonic-gate 		info(gettext("exiting\n"));
16647c478bd9Sstevel@tonic-gate 
16657c478bd9Sstevel@tonic-gate 	/*
16667c478bd9Sstevel@tonic-gate 	 * Unlink the statistics file before exiting.
16677c478bd9Sstevel@tonic-gate 	 */
16687c478bd9Sstevel@tonic-gate 	if (rcfg.rcfg_stat_file[0] != 0)
16697c478bd9Sstevel@tonic-gate 		(void) unlink(rcfg.rcfg_stat_file);
16707c478bd9Sstevel@tonic-gate 
16717c478bd9Sstevel@tonic-gate 	return (E_SUCCESS);
16727c478bd9Sstevel@tonic-gate }
1673