xref: /titanic_51/usr/src/cmd/zonestat/zonestatd/zonestatd.c (revision efd4c9b63ad77503c101fc6c2ed8ba96c9d52964)
1*efd4c9b6SSteve Lawrence /*
2*efd4c9b6SSteve Lawrence  * CDDL HEADER START
3*efd4c9b6SSteve Lawrence  *
4*efd4c9b6SSteve Lawrence  * The contents of this file are subject to the terms of the
5*efd4c9b6SSteve Lawrence  * Common Development and Distribution License (the "License").
6*efd4c9b6SSteve Lawrence  * You may not use this file except in compliance with the License.
7*efd4c9b6SSteve Lawrence  *
8*efd4c9b6SSteve Lawrence  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*efd4c9b6SSteve Lawrence  * or http://www.opensolaris.org/os/licensing.
10*efd4c9b6SSteve Lawrence  * See the License for the specific language governing permissions
11*efd4c9b6SSteve Lawrence  * and limitations under the License.
12*efd4c9b6SSteve Lawrence  *
13*efd4c9b6SSteve Lawrence  * When distributing Covered Code, include this CDDL HEADER in each
14*efd4c9b6SSteve Lawrence  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*efd4c9b6SSteve Lawrence  * If applicable, add the following below this CDDL HEADER, with the
16*efd4c9b6SSteve Lawrence  * fields enclosed by brackets "[]" replaced with your own identifying
17*efd4c9b6SSteve Lawrence  * information: Portions Copyright [yyyy] [name of copyright owner]
18*efd4c9b6SSteve Lawrence  *
19*efd4c9b6SSteve Lawrence  * CDDL HEADER END
20*efd4c9b6SSteve Lawrence  */
21*efd4c9b6SSteve Lawrence 
22*efd4c9b6SSteve Lawrence /*
23*efd4c9b6SSteve Lawrence  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24*efd4c9b6SSteve Lawrence  */
25*efd4c9b6SSteve Lawrence #include <alloca.h>
26*efd4c9b6SSteve Lawrence #include <assert.h>
27*efd4c9b6SSteve Lawrence #include <dirent.h>
28*efd4c9b6SSteve Lawrence #include <dlfcn.h>
29*efd4c9b6SSteve Lawrence #include <door.h>
30*efd4c9b6SSteve Lawrence #include <errno.h>
31*efd4c9b6SSteve Lawrence #include <exacct.h>
32*efd4c9b6SSteve Lawrence #include <ctype.h>
33*efd4c9b6SSteve Lawrence #include <fcntl.h>
34*efd4c9b6SSteve Lawrence #include <kstat.h>
35*efd4c9b6SSteve Lawrence #include <libcontract.h>
36*efd4c9b6SSteve Lawrence #include <libintl.h>
37*efd4c9b6SSteve Lawrence #include <libscf.h>
38*efd4c9b6SSteve Lawrence #include <zonestat.h>
39*efd4c9b6SSteve Lawrence #include <zonestat_impl.h>
40*efd4c9b6SSteve Lawrence #include <limits.h>
41*efd4c9b6SSteve Lawrence #include <pool.h>
42*efd4c9b6SSteve Lawrence #include <procfs.h>
43*efd4c9b6SSteve Lawrence #include <rctl.h>
44*efd4c9b6SSteve Lawrence #include <thread.h>
45*efd4c9b6SSteve Lawrence #include <signal.h>
46*efd4c9b6SSteve Lawrence #include <stdarg.h>
47*efd4c9b6SSteve Lawrence #include <stddef.h>
48*efd4c9b6SSteve Lawrence #include <stdio.h>
49*efd4c9b6SSteve Lawrence #include <stdlib.h>
50*efd4c9b6SSteve Lawrence #include <strings.h>
51*efd4c9b6SSteve Lawrence #include <synch.h>
52*efd4c9b6SSteve Lawrence #include <sys/acctctl.h>
53*efd4c9b6SSteve Lawrence #include <sys/contract/process.h>
54*efd4c9b6SSteve Lawrence #include <sys/ctfs.h>
55*efd4c9b6SSteve Lawrence #include <sys/fork.h>
56*efd4c9b6SSteve Lawrence #include <sys/param.h>
57*efd4c9b6SSteve Lawrence #include <sys/priocntl.h>
58*efd4c9b6SSteve Lawrence #include <sys/fxpriocntl.h>
59*efd4c9b6SSteve Lawrence #include <sys/processor.h>
60*efd4c9b6SSteve Lawrence #include <sys/pset.h>
61*efd4c9b6SSteve Lawrence #include <sys/socket.h>
62*efd4c9b6SSteve Lawrence #include <sys/stat.h>
63*efd4c9b6SSteve Lawrence #include <sys/statvfs.h>
64*efd4c9b6SSteve Lawrence #include <sys/swap.h>
65*efd4c9b6SSteve Lawrence #include <sys/systeminfo.h>
66*efd4c9b6SSteve Lawrence #include <thread.h>
67*efd4c9b6SSteve Lawrence #include <sys/list.h>
68*efd4c9b6SSteve Lawrence #include <sys/time.h>
69*efd4c9b6SSteve Lawrence #include <sys/types.h>
70*efd4c9b6SSteve Lawrence #include <sys/vm_usage.h>
71*efd4c9b6SSteve Lawrence #include <sys/wait.h>
72*efd4c9b6SSteve Lawrence #include <sys/zone.h>
73*efd4c9b6SSteve Lawrence #include <time.h>
74*efd4c9b6SSteve Lawrence #include <ucred.h>
75*efd4c9b6SSteve Lawrence #include <unistd.h>
76*efd4c9b6SSteve Lawrence #include <vm/anon.h>
77*efd4c9b6SSteve Lawrence #include <zone.h>
78*efd4c9b6SSteve Lawrence #include <zonestat.h>
79*efd4c9b6SSteve Lawrence 
80*efd4c9b6SSteve Lawrence #define	MAX_PSET_NAME	1024	/* Taken from PV_NAME_MAX_LEN */
81*efd4c9b6SSteve Lawrence #define	ZSD_PSET_UNLIMITED	UINT16_MAX
82*efd4c9b6SSteve Lawrence #define	ZONESTAT_EXACCT_FILE	"/var/adm/exacct/zonestat-process"
83*efd4c9b6SSteve Lawrence 
84*efd4c9b6SSteve Lawrence /*
85*efd4c9b6SSteve Lawrence  * zonestatd implements gathering cpu and memory utilization data for
86*efd4c9b6SSteve Lawrence  * running zones.  It has these components:
87*efd4c9b6SSteve Lawrence  *
88*efd4c9b6SSteve Lawrence  * zsd_server:
89*efd4c9b6SSteve Lawrence  *	Door server to respond to client connections.  Each client
90*efd4c9b6SSteve Lawrence  *	will connect using libzonestat.so, which will open and
91*efd4c9b6SSteve Lawrence  *	call /var/tmp/.zonestat_door.  Each connecting client is given
92*efd4c9b6SSteve Lawrence  *	a file descriptor to the stat server.
93*efd4c9b6SSteve Lawrence  *
94*efd4c9b6SSteve Lawrence  *	The zsd_server also responds to zoneadmd, which reports when a
95*efd4c9b6SSteve Lawrence  *	new zone is booted.  This is used to fattach the zsd_server door
96*efd4c9b6SSteve Lawrence  *	into the new zone.
97*efd4c9b6SSteve Lawrence  *
98*efd4c9b6SSteve Lawrence  * zsd_stat_server:
99*efd4c9b6SSteve Lawrence  *	Receives client requests for the current utilization data.  Each
100*efd4c9b6SSteve Lawrence  *	client request will cause zonestatd to update the current utilization
101*efd4c9b6SSteve Lawrence  *	data by kicking the stat_thread.
102*efd4c9b6SSteve Lawrence  *
103*efd4c9b6SSteve Lawrence  *	If the client is in a non-global zone, the utilization data will
104*efd4c9b6SSteve Lawrence  *	be filtered to only show the given zone.  The usage by all other zones
105*efd4c9b6SSteve Lawrence  *	will be added to the system utilization.
106*efd4c9b6SSteve Lawrence  *
107*efd4c9b6SSteve Lawrence  * stat_thread:
108*efd4c9b6SSteve Lawrence  *	The stat thread implements querying the system to determine the
109*efd4c9b6SSteve Lawrence  *	current utilization data for each running zone.  This includes
110*efd4c9b6SSteve Lawrence  *	inspecting the system's processor set configuration, as well as details
111*efd4c9b6SSteve Lawrence  *	of each zone, such as their configured limits, and which processor
112*efd4c9b6SSteve Lawrence  *	sets they are running in.
113*efd4c9b6SSteve Lawrence  *
114*efd4c9b6SSteve Lawrence  *	The stat_thread will only update memory utilization data as often as
115*efd4c9b6SSteve Lawrence  *	the configured config/sample_interval on the zones-monitoring service.
116*efd4c9b6SSteve Lawrence  */
117*efd4c9b6SSteve Lawrence 
118*efd4c9b6SSteve Lawrence /*
119*efd4c9b6SSteve Lawrence  * The private vmusage structure unfortunately uses size_t types, and assumes
120*efd4c9b6SSteve Lawrence  * the caller's bitness matches the kernel's bitness.  Since the getvmusage()
121*efd4c9b6SSteve Lawrence  * system call is contracted, and zonestatd is 32 bit, the following structures
122*efd4c9b6SSteve Lawrence  * are used to interact with a 32bit or 64 bit kernel.
123*efd4c9b6SSteve Lawrence  */
124*efd4c9b6SSteve Lawrence typedef struct zsd_vmusage32 {
125*efd4c9b6SSteve Lawrence 	id_t vmu_zoneid;
126*efd4c9b6SSteve Lawrence 	uint_t vmu_type;
127*efd4c9b6SSteve Lawrence 	id_t vmu_id;
128*efd4c9b6SSteve Lawrence 
129*efd4c9b6SSteve Lawrence 	uint32_t vmu_rss_all;
130*efd4c9b6SSteve Lawrence 	uint32_t vmu_rss_private;
131*efd4c9b6SSteve Lawrence 	uint32_t vmu_rss_shared;
132*efd4c9b6SSteve Lawrence 	uint32_t vmu_swap_all;
133*efd4c9b6SSteve Lawrence 	uint32_t vmu_swap_private;
134*efd4c9b6SSteve Lawrence 	uint32_t vmu_swap_shared;
135*efd4c9b6SSteve Lawrence } zsd_vmusage32_t;
136*efd4c9b6SSteve Lawrence 
137*efd4c9b6SSteve Lawrence typedef struct zsd_vmusage64 {
138*efd4c9b6SSteve Lawrence 	id_t vmu_zoneid;
139*efd4c9b6SSteve Lawrence 	uint_t vmu_type;
140*efd4c9b6SSteve Lawrence 	id_t vmu_id;
141*efd4c9b6SSteve Lawrence 	/*
142*efd4c9b6SSteve Lawrence 	 * An amd64 kernel will align the following uint64_t members, but a
143*efd4c9b6SSteve Lawrence 	 * 32bit i386 process will not without help.
144*efd4c9b6SSteve Lawrence 	 */
145*efd4c9b6SSteve Lawrence 	int vmu_align_next_members_on_8_bytes;
146*efd4c9b6SSteve Lawrence 	uint64_t vmu_rss_all;
147*efd4c9b6SSteve Lawrence 	uint64_t vmu_rss_private;
148*efd4c9b6SSteve Lawrence 	uint64_t vmu_rss_shared;
149*efd4c9b6SSteve Lawrence 	uint64_t vmu_swap_all;
150*efd4c9b6SSteve Lawrence 	uint64_t vmu_swap_private;
151*efd4c9b6SSteve Lawrence 	uint64_t vmu_swap_shared;
152*efd4c9b6SSteve Lawrence } zsd_vmusage64_t;
153*efd4c9b6SSteve Lawrence 
154*efd4c9b6SSteve Lawrence struct zsd_zone;
155*efd4c9b6SSteve Lawrence 
156*efd4c9b6SSteve Lawrence /* Used to store a zone's usage of a pset */
157*efd4c9b6SSteve Lawrence typedef struct zsd_pset_usage {
158*efd4c9b6SSteve Lawrence 	struct zsd_zone	*zsu_zone;
159*efd4c9b6SSteve Lawrence 	struct zsd_pset	*zsu_pset;
160*efd4c9b6SSteve Lawrence 
161*efd4c9b6SSteve Lawrence 	list_node_t	zsu_next;
162*efd4c9b6SSteve Lawrence 
163*efd4c9b6SSteve Lawrence 	zoneid_t	zsu_zoneid;
164*efd4c9b6SSteve Lawrence 	boolean_t	zsu_found;	/* zone bound at end of interval */
165*efd4c9b6SSteve Lawrence 	boolean_t	zsu_active;	/* zone was bound during interval */
166*efd4c9b6SSteve Lawrence 	boolean_t	zsu_new;	/* zone newly bound in this interval */
167*efd4c9b6SSteve Lawrence 	boolean_t	zsu_deleted;	/* zone was unbound in this interval */
168*efd4c9b6SSteve Lawrence 	boolean_t	zsu_empty;	/* no procs in pset in this interval */
169*efd4c9b6SSteve Lawrence 	time_t		zsu_start;	/* time when zone was found in pset */
170*efd4c9b6SSteve Lawrence 	hrtime_t	zsu_hrstart;	/* time when zone  was found in pset */
171*efd4c9b6SSteve Lawrence 	uint64_t	zsu_cpu_shares;
172*efd4c9b6SSteve Lawrence 	uint_t		zsu_scheds;	/* schedulers found in this pass */
173*efd4c9b6SSteve Lawrence 	timestruc_t	zsu_cpu_usage;	/* cpu time used */
174*efd4c9b6SSteve Lawrence } zsd_pset_usage_t;
175*efd4c9b6SSteve Lawrence 
176*efd4c9b6SSteve Lawrence /* Used to store a pset's utilization */
177*efd4c9b6SSteve Lawrence typedef struct zsd_pset {
178*efd4c9b6SSteve Lawrence 	psetid_t	zsp_id;
179*efd4c9b6SSteve Lawrence 	list_node_t	zsp_next;
180*efd4c9b6SSteve Lawrence 	char		zsp_name[ZS_PSETNAME_MAX];
181*efd4c9b6SSteve Lawrence 
182*efd4c9b6SSteve Lawrence 	uint_t		zsp_cputype;	/* default, dedicated or shared */
183*efd4c9b6SSteve Lawrence 	boolean_t	zsp_found;	/* pset found at end of interval */
184*efd4c9b6SSteve Lawrence 	boolean_t	zsp_new;	/* pset new in this interval */
185*efd4c9b6SSteve Lawrence 	boolean_t	zsp_deleted;	/* pset deleted in this interval */
186*efd4c9b6SSteve Lawrence 	boolean_t	zsp_active;	/* pset existed during interval */
187*efd4c9b6SSteve Lawrence 	boolean_t	zsp_empty;	/* no processes in pset */
188*efd4c9b6SSteve Lawrence 	time_t		zsp_start;
189*efd4c9b6SSteve Lawrence 	hrtime_t	zsp_hrstart;
190*efd4c9b6SSteve Lawrence 
191*efd4c9b6SSteve Lawrence 	uint64_t	zsp_online;	/* online cpus in interval */
192*efd4c9b6SSteve Lawrence 	uint64_t	zsp_size;	/* size in this interval */
193*efd4c9b6SSteve Lawrence 	uint64_t	zsp_min;	/* configured min in this interval */
194*efd4c9b6SSteve Lawrence 	uint64_t	zsp_max;	/* configured max in this interval */
195*efd4c9b6SSteve Lawrence 	int64_t		zsp_importance;	/* configured max in this interval */
196*efd4c9b6SSteve Lawrence 
197*efd4c9b6SSteve Lawrence 	uint_t		zsp_scheds;	/* scheds of processes found in pset */
198*efd4c9b6SSteve Lawrence 	uint64_t	zsp_cpu_shares;	/* total shares in this interval */
199*efd4c9b6SSteve Lawrence 
200*efd4c9b6SSteve Lawrence 	timestruc_t	zsp_total_time;
201*efd4c9b6SSteve Lawrence 	timestruc_t	zsp_usage_kern;
202*efd4c9b6SSteve Lawrence 	timestruc_t	zsp_usage_zones;
203*efd4c9b6SSteve Lawrence 
204*efd4c9b6SSteve Lawrence 	/* Individual zone usages of pset */
205*efd4c9b6SSteve Lawrence 	list_t		zsp_usage_list;
206*efd4c9b6SSteve Lawrence 	int		zsp_nusage;
207*efd4c9b6SSteve Lawrence 
208*efd4c9b6SSteve Lawrence 	/* Summed kstat values from individual cpus in pset */
209*efd4c9b6SSteve Lawrence 	timestruc_t	zsp_idle;
210*efd4c9b6SSteve Lawrence 	timestruc_t	zsp_intr;
211*efd4c9b6SSteve Lawrence 	timestruc_t	zsp_kern;
212*efd4c9b6SSteve Lawrence 	timestruc_t	zsp_user;
213*efd4c9b6SSteve Lawrence 
214*efd4c9b6SSteve Lawrence } zsd_pset_t;
215*efd4c9b6SSteve Lawrence 
216*efd4c9b6SSteve Lawrence /* Used to track an individual cpu's utilization as reported by kstats */
217*efd4c9b6SSteve Lawrence typedef struct zsd_cpu {
218*efd4c9b6SSteve Lawrence 	processorid_t	zsc_id;
219*efd4c9b6SSteve Lawrence 	list_node_t	zsc_next;
220*efd4c9b6SSteve Lawrence 	psetid_t	zsc_psetid;
221*efd4c9b6SSteve Lawrence 	psetid_t	zsc_psetid_prev;
222*efd4c9b6SSteve Lawrence 	zsd_pset_t	*zsc_pset;
223*efd4c9b6SSteve Lawrence 
224*efd4c9b6SSteve Lawrence 	boolean_t	zsc_found;	/* cpu online in this interval */
225*efd4c9b6SSteve Lawrence 	boolean_t	zsc_onlined;	/* cpu onlined during this interval */
226*efd4c9b6SSteve Lawrence 	boolean_t	zsc_offlined;	/* cpu offlined during this interval */
227*efd4c9b6SSteve Lawrence 	boolean_t	zsc_active;	/* cpu online during this interval */
228*efd4c9b6SSteve Lawrence 	boolean_t	zsc_allocated;	/* True if cpu has ever been found */
229*efd4c9b6SSteve Lawrence 
230*efd4c9b6SSteve Lawrence 	/* kstats this interval */
231*efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_idle;
232*efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_intr;
233*efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_kern;
234*efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_user;
235*efd4c9b6SSteve Lawrence 
236*efd4c9b6SSteve Lawrence 	/* kstats in most recent interval */
237*efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_idle_prev;
238*efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_intr_prev;
239*efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_kern_prev;
240*efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_user_prev;
241*efd4c9b6SSteve Lawrence 
242*efd4c9b6SSteve Lawrence 	/* Total kstat increases since zonestatd started reading kstats */
243*efd4c9b6SSteve Lawrence 	timestruc_t	zsc_idle;
244*efd4c9b6SSteve Lawrence 	timestruc_t	zsc_intr;
245*efd4c9b6SSteve Lawrence 	timestruc_t	zsc_kern;
246*efd4c9b6SSteve Lawrence 	timestruc_t	zsc_user;
247*efd4c9b6SSteve Lawrence 
248*efd4c9b6SSteve Lawrence } zsd_cpu_t;
249*efd4c9b6SSteve Lawrence 
250*efd4c9b6SSteve Lawrence /* Used to describe an individual zone and its utilization */
251*efd4c9b6SSteve Lawrence typedef struct zsd_zone {
252*efd4c9b6SSteve Lawrence 	zoneid_t	zsz_id;
253*efd4c9b6SSteve Lawrence 	list_node_t	zsz_next;
254*efd4c9b6SSteve Lawrence 	char		zsz_name[ZS_ZONENAME_MAX];
255*efd4c9b6SSteve Lawrence 	uint_t		zsz_cputype;
256*efd4c9b6SSteve Lawrence 	uint_t		zsz_iptype;
257*efd4c9b6SSteve Lawrence 	time_t		zsz_start;
258*efd4c9b6SSteve Lawrence 	hrtime_t	zsz_hrstart;
259*efd4c9b6SSteve Lawrence 
260*efd4c9b6SSteve Lawrence 	char		zsz_pool[ZS_POOLNAME_MAX];
261*efd4c9b6SSteve Lawrence 	char		zsz_pset[ZS_PSETNAME_MAX];
262*efd4c9b6SSteve Lawrence 	int		zsz_default_sched;
263*efd4c9b6SSteve Lawrence 	/* These are deduced by inspecting processes */
264*efd4c9b6SSteve Lawrence 	psetid_t	zsz_psetid;
265*efd4c9b6SSteve Lawrence 	uint_t		zsz_scheds;
266*efd4c9b6SSteve Lawrence 
267*efd4c9b6SSteve Lawrence 	boolean_t	zsz_new;	/* zone booted during this interval */
268*efd4c9b6SSteve Lawrence 	boolean_t	zsz_deleted;	/* halted during this interval */
269*efd4c9b6SSteve Lawrence 	boolean_t	zsz_active;	/* running in this interval */
270*efd4c9b6SSteve Lawrence 	boolean_t	zsz_empty;	/* no processes in this interval */
271*efd4c9b6SSteve Lawrence 	boolean_t	zsz_gone;	/* not installed in this interval */
272*efd4c9b6SSteve Lawrence 	boolean_t	zsz_found;	/* Running at end of this interval */
273*efd4c9b6SSteve Lawrence 
274*efd4c9b6SSteve Lawrence 	uint64_t	zsz_cpu_shares;
275*efd4c9b6SSteve Lawrence 	uint64_t	zsz_cpu_cap;
276*efd4c9b6SSteve Lawrence 	uint64_t	zsz_ram_cap;
277*efd4c9b6SSteve Lawrence 	uint64_t	zsz_locked_cap;
278*efd4c9b6SSteve Lawrence 	uint64_t	zsz_vm_cap;
279*efd4c9b6SSteve Lawrence 
280*efd4c9b6SSteve Lawrence 	uint64_t	zsz_cpus_online;
281*efd4c9b6SSteve Lawrence 	timestruc_t	zsz_cpu_usage;	/* cpu time of cpu cap */
282*efd4c9b6SSteve Lawrence 	timestruc_t	zsz_cap_time;	/* cpu time of cpu cap */
283*efd4c9b6SSteve Lawrence 	timestruc_t	zsz_share_time; /* cpu time of share of cpu */
284*efd4c9b6SSteve Lawrence 	timestruc_t	zsz_pset_time;  /* time of all psets zone is bound to */
285*efd4c9b6SSteve Lawrence 
286*efd4c9b6SSteve Lawrence 	uint64_t	zsz_usage_ram;
287*efd4c9b6SSteve Lawrence 	uint64_t	zsz_usage_locked;
288*efd4c9b6SSteve Lawrence 	uint64_t	zsz_usage_vm;
289*efd4c9b6SSteve Lawrence 
290*efd4c9b6SSteve Lawrence 	uint64_t	zsz_processes_cap;
291*efd4c9b6SSteve Lawrence 	uint64_t	zsz_lwps_cap;
292*efd4c9b6SSteve Lawrence 	uint64_t	zsz_shm_cap;
293*efd4c9b6SSteve Lawrence 	uint64_t	zsz_shmids_cap;
294*efd4c9b6SSteve Lawrence 	uint64_t	zsz_semids_cap;
295*efd4c9b6SSteve Lawrence 	uint64_t	zsz_msgids_cap;
296*efd4c9b6SSteve Lawrence 	uint64_t	zsz_lofi_cap;
297*efd4c9b6SSteve Lawrence 
298*efd4c9b6SSteve Lawrence 	uint64_t	zsz_processes;
299*efd4c9b6SSteve Lawrence 	uint64_t	zsz_lwps;
300*efd4c9b6SSteve Lawrence 	uint64_t	zsz_shm;
301*efd4c9b6SSteve Lawrence 	uint64_t	zsz_shmids;
302*efd4c9b6SSteve Lawrence 	uint64_t	zsz_semids;
303*efd4c9b6SSteve Lawrence 	uint64_t	zsz_msgids;
304*efd4c9b6SSteve Lawrence 	uint64_t	zsz_lofi;
305*efd4c9b6SSteve Lawrence 
306*efd4c9b6SSteve Lawrence } zsd_zone_t;
307*efd4c9b6SSteve Lawrence 
308*efd4c9b6SSteve Lawrence /*
309*efd4c9b6SSteve Lawrence  * Used to track the cpu usage of an individual processes.
310*efd4c9b6SSteve Lawrence  *
311*efd4c9b6SSteve Lawrence  * zonestatd sweeps /proc each interval and charges the cpu usage of processes.
312*efd4c9b6SSteve Lawrence  * to their zone.  As processes exit, their extended accounting records are
313*efd4c9b6SSteve Lawrence  * read and the difference of their total and known usage is charged to their
314*efd4c9b6SSteve Lawrence  * zone.
315*efd4c9b6SSteve Lawrence  *
316*efd4c9b6SSteve Lawrence  * If a process is never seen in /proc, the total usage on its extended
317*efd4c9b6SSteve Lawrence  * accounting record will be charged to its zone.
318*efd4c9b6SSteve Lawrence  */
319*efd4c9b6SSteve Lawrence typedef struct zsd_proc {
320*efd4c9b6SSteve Lawrence 	list_node_t	zspr_next;
321*efd4c9b6SSteve Lawrence 	pid_t		zspr_ppid;
322*efd4c9b6SSteve Lawrence 	psetid_t	zspr_psetid;
323*efd4c9b6SSteve Lawrence 	zoneid_t	zspr_zoneid;
324*efd4c9b6SSteve Lawrence 	int		zspr_sched;
325*efd4c9b6SSteve Lawrence 	timestruc_t	zspr_usage;
326*efd4c9b6SSteve Lawrence } zsd_proc_t;
327*efd4c9b6SSteve Lawrence 
328*efd4c9b6SSteve Lawrence /* Used to track the overall resource usage of the system */
329*efd4c9b6SSteve Lawrence typedef struct zsd_system {
330*efd4c9b6SSteve Lawrence 
331*efd4c9b6SSteve Lawrence 	uint64_t zss_ram_total;
332*efd4c9b6SSteve Lawrence 	uint64_t zss_ram_kern;
333*efd4c9b6SSteve Lawrence 	uint64_t zss_ram_zones;
334*efd4c9b6SSteve Lawrence 
335*efd4c9b6SSteve Lawrence 	uint64_t zss_locked_kern;
336*efd4c9b6SSteve Lawrence 	uint64_t zss_locked_zones;
337*efd4c9b6SSteve Lawrence 
338*efd4c9b6SSteve Lawrence 	uint64_t zss_vm_total;
339*efd4c9b6SSteve Lawrence 	uint64_t zss_vm_kern;
340*efd4c9b6SSteve Lawrence 	uint64_t zss_vm_zones;
341*efd4c9b6SSteve Lawrence 
342*efd4c9b6SSteve Lawrence 	uint64_t zss_swap_total;
343*efd4c9b6SSteve Lawrence 	uint64_t zss_swap_used;
344*efd4c9b6SSteve Lawrence 
345*efd4c9b6SSteve Lawrence 	timestruc_t zss_idle;
346*efd4c9b6SSteve Lawrence 	timestruc_t zss_intr;
347*efd4c9b6SSteve Lawrence 	timestruc_t zss_kern;
348*efd4c9b6SSteve Lawrence 	timestruc_t zss_user;
349*efd4c9b6SSteve Lawrence 
350*efd4c9b6SSteve Lawrence 	timestruc_t zss_cpu_total_time;
351*efd4c9b6SSteve Lawrence 	timestruc_t zss_cpu_usage_kern;
352*efd4c9b6SSteve Lawrence 	timestruc_t zss_cpu_usage_zones;
353*efd4c9b6SSteve Lawrence 
354*efd4c9b6SSteve Lawrence 	uint64_t zss_maxpid;
355*efd4c9b6SSteve Lawrence 	uint64_t zss_processes_max;
356*efd4c9b6SSteve Lawrence 	uint64_t zss_lwps_max;
357*efd4c9b6SSteve Lawrence 	uint64_t zss_shm_max;
358*efd4c9b6SSteve Lawrence 	uint64_t zss_shmids_max;
359*efd4c9b6SSteve Lawrence 	uint64_t zss_semids_max;
360*efd4c9b6SSteve Lawrence 	uint64_t zss_msgids_max;
361*efd4c9b6SSteve Lawrence 	uint64_t zss_lofi_max;
362*efd4c9b6SSteve Lawrence 
363*efd4c9b6SSteve Lawrence 	uint64_t zss_processes;
364*efd4c9b6SSteve Lawrence 	uint64_t zss_lwps;
365*efd4c9b6SSteve Lawrence 	uint64_t zss_shm;
366*efd4c9b6SSteve Lawrence 	uint64_t zss_shmids;
367*efd4c9b6SSteve Lawrence 	uint64_t zss_semids;
368*efd4c9b6SSteve Lawrence 	uint64_t zss_msgids;
369*efd4c9b6SSteve Lawrence 	uint64_t zss_lofi;
370*efd4c9b6SSteve Lawrence 
371*efd4c9b6SSteve Lawrence 	uint64_t zss_ncpus;
372*efd4c9b6SSteve Lawrence 	uint64_t zss_ncpus_online;
373*efd4c9b6SSteve Lawrence 
374*efd4c9b6SSteve Lawrence } zsd_system_t;
375*efd4c9b6SSteve Lawrence 
376*efd4c9b6SSteve Lawrence /*
377*efd4c9b6SSteve Lawrence  * A dumping ground for various information and structures used to compute
378*efd4c9b6SSteve Lawrence  * utilization.
379*efd4c9b6SSteve Lawrence  *
380*efd4c9b6SSteve Lawrence  * This structure is used to track the system while clients are connected.
381*efd4c9b6SSteve Lawrence  * When The first client connects, a zsd_ctl is allocated and configured by
382*efd4c9b6SSteve Lawrence  * zsd_open().  When all clients disconnect, the zsd_ctl is closed.
383*efd4c9b6SSteve Lawrence  */
384*efd4c9b6SSteve Lawrence typedef struct zsd_ctl {
385*efd4c9b6SSteve Lawrence 	kstat_ctl_t	*zsctl_kstat_ctl;
386*efd4c9b6SSteve Lawrence 
387*efd4c9b6SSteve Lawrence 	/* To track extended accounting */
388*efd4c9b6SSteve Lawrence 	int		zsctl_proc_fd;		/* Log currently being used */
389*efd4c9b6SSteve Lawrence 	ea_file_t	zsctl_proc_eaf;
390*efd4c9b6SSteve Lawrence 	struct stat64	zsctl_proc_stat;
391*efd4c9b6SSteve Lawrence 	int		zsctl_proc_open;
392*efd4c9b6SSteve Lawrence 	int		zsctl_proc_fd_next;	/* Log file to use next */
393*efd4c9b6SSteve Lawrence 	ea_file_t	zsctl_proc_eaf_next;
394*efd4c9b6SSteve Lawrence 	struct stat64	zsctl_proc_stat_next;
395*efd4c9b6SSteve Lawrence 	int		zsctl_proc_open_next;
396*efd4c9b6SSteve Lawrence 
397*efd4c9b6SSteve Lawrence 	/* pool configuration handle */
398*efd4c9b6SSteve Lawrence 	pool_conf_t	*zsctl_pool_conf;
399*efd4c9b6SSteve Lawrence 	int		zsctl_pool_status;
400*efd4c9b6SSteve Lawrence 	int		zsctl_pool_changed;
401*efd4c9b6SSteve Lawrence 
402*efd4c9b6SSteve Lawrence 	/* The above usage tacking structures */
403*efd4c9b6SSteve Lawrence 	zsd_system_t	*zsctl_system;
404*efd4c9b6SSteve Lawrence 	list_t		zsctl_zones;
405*efd4c9b6SSteve Lawrence 	list_t		zsctl_psets;
406*efd4c9b6SSteve Lawrence 	list_t		zsctl_cpus;
407*efd4c9b6SSteve Lawrence 	zsd_cpu_t	*zsctl_cpu_array;
408*efd4c9b6SSteve Lawrence 	zsd_proc_t	*zsctl_proc_array;
409*efd4c9b6SSteve Lawrence 
410*efd4c9b6SSteve Lawrence 	/* Various system info */
411*efd4c9b6SSteve Lawrence 	uint64_t	zsctl_maxcpuid;
412*efd4c9b6SSteve Lawrence 	uint64_t	zsctl_maxproc;
413*efd4c9b6SSteve Lawrence 	uint64_t	zsctl_kern_bits;
414*efd4c9b6SSteve Lawrence 	uint64_t	zsctl_pagesize;
415*efd4c9b6SSteve Lawrence 
416*efd4c9b6SSteve Lawrence 	/* Used to track time available under a cpu cap. */
417*efd4c9b6SSteve Lawrence 	uint64_t	zsctl_hrtime;
418*efd4c9b6SSteve Lawrence 	uint64_t	zsctl_hrtime_prev;
419*efd4c9b6SSteve Lawrence 	timestruc_t	zsctl_hrtime_total;
420*efd4c9b6SSteve Lawrence 
421*efd4c9b6SSteve Lawrence 	struct timeval	zsctl_timeofday;
422*efd4c9b6SSteve Lawrence 
423*efd4c9b6SSteve Lawrence 	/* Caches for arrays allocated for use by various system calls */
424*efd4c9b6SSteve Lawrence 	psetid_t	*zsctl_pset_cache;
425*efd4c9b6SSteve Lawrence 	uint_t		zsctl_pset_ncache;
426*efd4c9b6SSteve Lawrence 	processorid_t	*zsctl_cpu_cache;
427*efd4c9b6SSteve Lawrence 	uint_t		zsctl_cpu_ncache;
428*efd4c9b6SSteve Lawrence 	zoneid_t	*zsctl_zone_cache;
429*efd4c9b6SSteve Lawrence 	uint_t		zsctl_zone_ncache;
430*efd4c9b6SSteve Lawrence 	struct swaptable *zsctl_swap_cache;
431*efd4c9b6SSteve Lawrence 	uint64_t	zsctl_swap_cache_size;
432*efd4c9b6SSteve Lawrence 	uint64_t	zsctl_swap_cache_num;
433*efd4c9b6SSteve Lawrence 	zsd_vmusage64_t	*zsctl_vmusage_cache;
434*efd4c9b6SSteve Lawrence 	uint64_t	zsctl_vmusage_cache_num;
435*efd4c9b6SSteve Lawrence 
436*efd4c9b6SSteve Lawrence 	/* Info about procfs for scanning /proc */
437*efd4c9b6SSteve Lawrence 	struct dirent	*zsctl_procfs_dent;
438*efd4c9b6SSteve Lawrence 	long		zsctl_procfs_dent_size;
439*efd4c9b6SSteve Lawrence 	pool_value_t	*zsctl_pool_vals[3];
440*efd4c9b6SSteve Lawrence 
441*efd4c9b6SSteve Lawrence 	/* Counts on tracked entities */
442*efd4c9b6SSteve Lawrence 	uint_t		zsctl_nzones;
443*efd4c9b6SSteve Lawrence 	uint_t		zsctl_npsets;
444*efd4c9b6SSteve Lawrence 	uint_t		zsctl_npset_usages;
445*efd4c9b6SSteve Lawrence } zsd_ctl_t;
446*efd4c9b6SSteve Lawrence 
447*efd4c9b6SSteve Lawrence zsd_ctl_t		*g_ctl;
448*efd4c9b6SSteve Lawrence boolean_t		g_open;		/* True if g_ctl is open */
449*efd4c9b6SSteve Lawrence int			g_hasclient;	/* True if any clients are connected */
450*efd4c9b6SSteve Lawrence 
451*efd4c9b6SSteve Lawrence /*
452*efd4c9b6SSteve Lawrence  * The usage cache is updated by the stat_thread, and copied to clients by
453*efd4c9b6SSteve Lawrence  * the zsd_stat_server.  Mutex and cond are to synchronize between the
454*efd4c9b6SSteve Lawrence  * stat_thread and the stat_server.
455*efd4c9b6SSteve Lawrence  */
456*efd4c9b6SSteve Lawrence zs_usage_cache_t	*g_usage_cache;
457*efd4c9b6SSteve Lawrence mutex_t			g_usage_cache_lock;
458*efd4c9b6SSteve Lawrence cond_t			g_usage_cache_kick;
459*efd4c9b6SSteve Lawrence uint_t			g_usage_cache_kickers;
460*efd4c9b6SSteve Lawrence cond_t			g_usage_cache_wait;
461*efd4c9b6SSteve Lawrence char			*g_usage_cache_buf;
462*efd4c9b6SSteve Lawrence uint_t			g_usage_cache_bufsz;
463*efd4c9b6SSteve Lawrence uint64_t		g_gen_next;
464*efd4c9b6SSteve Lawrence 
465*efd4c9b6SSteve Lawrence /* fds of door servers */
466*efd4c9b6SSteve Lawrence int			g_server_door;
467*efd4c9b6SSteve Lawrence int			g_stat_door;
468*efd4c9b6SSteve Lawrence 
469*efd4c9b6SSteve Lawrence /*
470*efd4c9b6SSteve Lawrence  * Starting and current time.  Used to throttle memory calculation, and to
471*efd4c9b6SSteve Lawrence  * mark new zones and psets with their boot and creation time.
472*efd4c9b6SSteve Lawrence  */
473*efd4c9b6SSteve Lawrence time_t			g_now;
474*efd4c9b6SSteve Lawrence time_t			g_start;
475*efd4c9b6SSteve Lawrence hrtime_t		g_hrnow;
476*efd4c9b6SSteve Lawrence hrtime_t		g_hrstart;
477*efd4c9b6SSteve Lawrence uint64_t		g_interval;
478*efd4c9b6SSteve Lawrence 
479*efd4c9b6SSteve Lawrence /*
480*efd4c9b6SSteve Lawrence  * main() thread.
481*efd4c9b6SSteve Lawrence  */
482*efd4c9b6SSteve Lawrence thread_t		g_main;
483*efd4c9b6SSteve Lawrence 
484*efd4c9b6SSteve Lawrence /* PRINTFLIKE1 */
485*efd4c9b6SSteve Lawrence static void
486*efd4c9b6SSteve Lawrence zsd_warn(const char *fmt, ...)
487*efd4c9b6SSteve Lawrence {
488*efd4c9b6SSteve Lawrence 	va_list alist;
489*efd4c9b6SSteve Lawrence 
490*efd4c9b6SSteve Lawrence 	va_start(alist, fmt);
491*efd4c9b6SSteve Lawrence 
492*efd4c9b6SSteve Lawrence 	(void) fprintf(stderr, gettext("zonestat: Warning: "));
493*efd4c9b6SSteve Lawrence 	(void) vfprintf(stderr, fmt, alist);
494*efd4c9b6SSteve Lawrence 	(void) fprintf(stderr, "\n");
495*efd4c9b6SSteve Lawrence 	va_end(alist);
496*efd4c9b6SSteve Lawrence }
497*efd4c9b6SSteve Lawrence 
498*efd4c9b6SSteve Lawrence /* PRINTFLIKE1 */
499*efd4c9b6SSteve Lawrence static void
500*efd4c9b6SSteve Lawrence zsd_error(const char *fmt, ...)
501*efd4c9b6SSteve Lawrence {
502*efd4c9b6SSteve Lawrence 	va_list alist;
503*efd4c9b6SSteve Lawrence 
504*efd4c9b6SSteve Lawrence 	va_start(alist, fmt);
505*efd4c9b6SSteve Lawrence 
506*efd4c9b6SSteve Lawrence 	(void) fprintf(stderr, gettext("zonestat: Error: "));
507*efd4c9b6SSteve Lawrence 	(void) vfprintf(stderr, fmt, alist);
508*efd4c9b6SSteve Lawrence 	(void) fprintf(stderr, "\n");
509*efd4c9b6SSteve Lawrence 	va_end(alist);
510*efd4c9b6SSteve Lawrence 	exit(1);
511*efd4c9b6SSteve Lawrence }
512*efd4c9b6SSteve Lawrence 
513*efd4c9b6SSteve Lawrence /* Turns on extended accounting if not configured externally */
514*efd4c9b6SSteve Lawrence int
515*efd4c9b6SSteve Lawrence zsd_enable_cpu_stats()
516*efd4c9b6SSteve Lawrence {
517*efd4c9b6SSteve Lawrence 	char *path = ZONESTAT_EXACCT_FILE;
518*efd4c9b6SSteve Lawrence 	char oldfile[MAXPATHLEN];
519*efd4c9b6SSteve Lawrence 	int ret, state = AC_ON;
520*efd4c9b6SSteve Lawrence 	ac_res_t res[6];
521*efd4c9b6SSteve Lawrence 
522*efd4c9b6SSteve Lawrence 	/*
523*efd4c9b6SSteve Lawrence 	 * Start a new accounting file  if accounting not configured
524*efd4c9b6SSteve Lawrence 	 * externally.
525*efd4c9b6SSteve Lawrence 	 */
526*efd4c9b6SSteve Lawrence 
527*efd4c9b6SSteve Lawrence 	res[0].ar_id = AC_PROC_PID;
528*efd4c9b6SSteve Lawrence 	res[0].ar_state = AC_ON;
529*efd4c9b6SSteve Lawrence 	res[1].ar_id = AC_PROC_ANCPID;
530*efd4c9b6SSteve Lawrence 	res[1].ar_state = AC_ON;
531*efd4c9b6SSteve Lawrence 	res[2].ar_id = AC_PROC_CPU;
532*efd4c9b6SSteve Lawrence 	res[2].ar_state = AC_ON;
533*efd4c9b6SSteve Lawrence 	res[3].ar_id = AC_PROC_TIME;
534*efd4c9b6SSteve Lawrence 	res[3].ar_state = AC_ON;
535*efd4c9b6SSteve Lawrence 	res[4].ar_id = AC_PROC_ZONENAME;
536*efd4c9b6SSteve Lawrence 	res[4].ar_state = AC_ON;
537*efd4c9b6SSteve Lawrence 	res[5].ar_id = AC_NONE;
538*efd4c9b6SSteve Lawrence 	res[5].ar_state = AC_ON;
539*efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
540*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to set accounting resources"));
541*efd4c9b6SSteve Lawrence 		return (-1);
542*efd4c9b6SSteve Lawrence 	}
543*efd4c9b6SSteve Lawrence 	/* Only set accounting file if none is configured */
544*efd4c9b6SSteve Lawrence 	ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
545*efd4c9b6SSteve Lawrence 	if (ret < 0) {
546*efd4c9b6SSteve Lawrence 
547*efd4c9b6SSteve Lawrence 		(void) unlink(path);
548*efd4c9b6SSteve Lawrence 		if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1)
549*efd4c9b6SSteve Lawrence 		    == -1) {
550*efd4c9b6SSteve Lawrence 			zsd_warn(gettext("Unable to set accounting file"));
551*efd4c9b6SSteve Lawrence 			return (-1);
552*efd4c9b6SSteve Lawrence 		}
553*efd4c9b6SSteve Lawrence 	}
554*efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
555*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to enable accounting"));
556*efd4c9b6SSteve Lawrence 		return (-1);
557*efd4c9b6SSteve Lawrence 	}
558*efd4c9b6SSteve Lawrence 	return (0);
559*efd4c9b6SSteve Lawrence }
560*efd4c9b6SSteve Lawrence 
561*efd4c9b6SSteve Lawrence /* Turns off extended accounting if not configured externally */
562*efd4c9b6SSteve Lawrence int
563*efd4c9b6SSteve Lawrence zsd_disable_cpu_stats()
564*efd4c9b6SSteve Lawrence {
565*efd4c9b6SSteve Lawrence 	char *path = ZONESTAT_EXACCT_FILE;
566*efd4c9b6SSteve Lawrence 	int ret, state = AC_OFF;
567*efd4c9b6SSteve Lawrence 	ac_res_t res[6];
568*efd4c9b6SSteve Lawrence 	char oldfile[MAXPATHLEN];
569*efd4c9b6SSteve Lawrence 
570*efd4c9b6SSteve Lawrence 	/* If accounting file is externally configured, leave it alone */
571*efd4c9b6SSteve Lawrence 	ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
572*efd4c9b6SSteve Lawrence 	if (ret == 0 && strcmp(oldfile, path) != 0)
573*efd4c9b6SSteve Lawrence 		return (0);
574*efd4c9b6SSteve Lawrence 
575*efd4c9b6SSteve Lawrence 	res[0].ar_id = AC_PROC_PID;
576*efd4c9b6SSteve Lawrence 	res[0].ar_state = AC_OFF;
577*efd4c9b6SSteve Lawrence 	res[1].ar_id = AC_PROC_ANCPID;
578*efd4c9b6SSteve Lawrence 	res[1].ar_state = AC_OFF;
579*efd4c9b6SSteve Lawrence 	res[2].ar_id = AC_PROC_CPU;
580*efd4c9b6SSteve Lawrence 	res[2].ar_state = AC_OFF;
581*efd4c9b6SSteve Lawrence 	res[3].ar_id = AC_PROC_TIME;
582*efd4c9b6SSteve Lawrence 	res[3].ar_state = AC_OFF;
583*efd4c9b6SSteve Lawrence 	res[4].ar_id = AC_PROC_ZONENAME;
584*efd4c9b6SSteve Lawrence 	res[4].ar_state = AC_OFF;
585*efd4c9b6SSteve Lawrence 	res[5].ar_id = AC_NONE;
586*efd4c9b6SSteve Lawrence 	res[5].ar_state = AC_OFF;
587*efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
588*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to clear accounting resources"));
589*efd4c9b6SSteve Lawrence 		return (-1);
590*efd4c9b6SSteve Lawrence 	}
591*efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_FILE_SET, NULL, 0) == -1) {
592*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to clear accounting file"));
593*efd4c9b6SSteve Lawrence 		return (-1);
594*efd4c9b6SSteve Lawrence 	}
595*efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
596*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to diable accounting"));
597*efd4c9b6SSteve Lawrence 		return (-1);
598*efd4c9b6SSteve Lawrence 	}
599*efd4c9b6SSteve Lawrence 
600*efd4c9b6SSteve Lawrence 	(void) unlink(path);
601*efd4c9b6SSteve Lawrence 	return (0);
602*efd4c9b6SSteve Lawrence }
603*efd4c9b6SSteve Lawrence 
604*efd4c9b6SSteve Lawrence /*
605*efd4c9b6SSteve Lawrence  * If not configured externally, deletes the current extended accounting file
606*efd4c9b6SSteve Lawrence  * and starts a new one.
607*efd4c9b6SSteve Lawrence  *
608*efd4c9b6SSteve Lawrence  * Since the stat_thread holds an open handle to the accounting file, it will
609*efd4c9b6SSteve Lawrence  * read all remaining entries from the old file before switching to
610*efd4c9b6SSteve Lawrence  * read the new one.
611*efd4c9b6SSteve Lawrence  */
612*efd4c9b6SSteve Lawrence int
613*efd4c9b6SSteve Lawrence zsd_roll_exacct(void)
614*efd4c9b6SSteve Lawrence {
615*efd4c9b6SSteve Lawrence 	int ret;
616*efd4c9b6SSteve Lawrence 	char *path = ZONESTAT_EXACCT_FILE;
617*efd4c9b6SSteve Lawrence 	char oldfile[MAXPATHLEN];
618*efd4c9b6SSteve Lawrence 
619*efd4c9b6SSteve Lawrence 	/* If accounting file is externally configured, leave it alone */
620*efd4c9b6SSteve Lawrence 	ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
621*efd4c9b6SSteve Lawrence 	if (ret == 0 && strcmp(oldfile, path) != 0)
622*efd4c9b6SSteve Lawrence 		return (0);
623*efd4c9b6SSteve Lawrence 
624*efd4c9b6SSteve Lawrence 	if (unlink(path) != 0)
625*efd4c9b6SSteve Lawrence 		/* Roll it next time */
626*efd4c9b6SSteve Lawrence 		return (0);
627*efd4c9b6SSteve Lawrence 
628*efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1) == -1) {
629*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to set accounting file"));
630*efd4c9b6SSteve Lawrence 		return (-1);
631*efd4c9b6SSteve Lawrence 	}
632*efd4c9b6SSteve Lawrence 	return (0);
633*efd4c9b6SSteve Lawrence }
634*efd4c9b6SSteve Lawrence 
635*efd4c9b6SSteve Lawrence /* Contract stuff for zone_enter() */
636*efd4c9b6SSteve Lawrence int
637*efd4c9b6SSteve Lawrence init_template(void)
638*efd4c9b6SSteve Lawrence {
639*efd4c9b6SSteve Lawrence 	int fd;
640*efd4c9b6SSteve Lawrence 	int err = 0;
641*efd4c9b6SSteve Lawrence 
642*efd4c9b6SSteve Lawrence 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
643*efd4c9b6SSteve Lawrence 	if (fd == -1)
644*efd4c9b6SSteve Lawrence 		return (-1);
645*efd4c9b6SSteve Lawrence 
646*efd4c9b6SSteve Lawrence 	/*
647*efd4c9b6SSteve Lawrence 	 * For now, zoneadmd doesn't do anything with the contract.
648*efd4c9b6SSteve Lawrence 	 * Deliver no events, don't inherit, and allow it to be orphaned.
649*efd4c9b6SSteve Lawrence 	 */
650*efd4c9b6SSteve Lawrence 	err |= ct_tmpl_set_critical(fd, 0);
651*efd4c9b6SSteve Lawrence 	err |= ct_tmpl_set_informative(fd, 0);
652*efd4c9b6SSteve Lawrence 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
653*efd4c9b6SSteve Lawrence 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
654*efd4c9b6SSteve Lawrence 	if (err || ct_tmpl_activate(fd)) {
655*efd4c9b6SSteve Lawrence 		(void) close(fd);
656*efd4c9b6SSteve Lawrence 		return (-1);
657*efd4c9b6SSteve Lawrence 	}
658*efd4c9b6SSteve Lawrence 
659*efd4c9b6SSteve Lawrence 	return (fd);
660*efd4c9b6SSteve Lawrence }
661*efd4c9b6SSteve Lawrence 
662*efd4c9b6SSteve Lawrence /*
663*efd4c9b6SSteve Lawrence  * Contract stuff for zone_enter()
664*efd4c9b6SSteve Lawrence  */
665*efd4c9b6SSteve Lawrence int
666*efd4c9b6SSteve Lawrence contract_latest(ctid_t *id)
667*efd4c9b6SSteve Lawrence {
668*efd4c9b6SSteve Lawrence 	int cfd, r;
669*efd4c9b6SSteve Lawrence 	ct_stathdl_t st;
670*efd4c9b6SSteve Lawrence 	ctid_t result;
671*efd4c9b6SSteve Lawrence 
672*efd4c9b6SSteve Lawrence 	if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1)
673*efd4c9b6SSteve Lawrence 		return (errno);
674*efd4c9b6SSteve Lawrence 
675*efd4c9b6SSteve Lawrence 	if ((r = ct_status_read(cfd, CTD_COMMON, &st)) != 0) {
676*efd4c9b6SSteve Lawrence 		(void) close(cfd);
677*efd4c9b6SSteve Lawrence 		return (r);
678*efd4c9b6SSteve Lawrence 	}
679*efd4c9b6SSteve Lawrence 
680*efd4c9b6SSteve Lawrence 	result = ct_status_get_id(st);
681*efd4c9b6SSteve Lawrence 	ct_status_free(st);
682*efd4c9b6SSteve Lawrence 	(void) close(cfd);
683*efd4c9b6SSteve Lawrence 
684*efd4c9b6SSteve Lawrence 	*id = result;
685*efd4c9b6SSteve Lawrence 	return (0);
686*efd4c9b6SSteve Lawrence }
687*efd4c9b6SSteve Lawrence 
688*efd4c9b6SSteve Lawrence static int
689*efd4c9b6SSteve Lawrence close_on_exec(int fd)
690*efd4c9b6SSteve Lawrence {
691*efd4c9b6SSteve Lawrence 	int flags = fcntl(fd, F_GETFD, 0);
692*efd4c9b6SSteve Lawrence 	if ((flags != -1) && (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != -1))
693*efd4c9b6SSteve Lawrence 		return (0);
694*efd4c9b6SSteve Lawrence 	return (-1);
695*efd4c9b6SSteve Lawrence }
696*efd4c9b6SSteve Lawrence 
697*efd4c9b6SSteve Lawrence int
698*efd4c9b6SSteve Lawrence contract_open(ctid_t ctid, const char *type, const char *file, int oflag)
699*efd4c9b6SSteve Lawrence {
700*efd4c9b6SSteve Lawrence 	char path[PATH_MAX];
701*efd4c9b6SSteve Lawrence 	int n, fd;
702*efd4c9b6SSteve Lawrence 
703*efd4c9b6SSteve Lawrence 	if (type == NULL)
704*efd4c9b6SSteve Lawrence 		type = "all";
705*efd4c9b6SSteve Lawrence 
706*efd4c9b6SSteve Lawrence 	n = snprintf(path, PATH_MAX, CTFS_ROOT "/%s/%ld/%s", type, ctid, file);
707*efd4c9b6SSteve Lawrence 	if (n >= sizeof (path)) {
708*efd4c9b6SSteve Lawrence 		errno = ENAMETOOLONG;
709*efd4c9b6SSteve Lawrence 		return (-1);
710*efd4c9b6SSteve Lawrence 	}
711*efd4c9b6SSteve Lawrence 
712*efd4c9b6SSteve Lawrence 	fd = open64(path, oflag);
713*efd4c9b6SSteve Lawrence 	if (fd != -1) {
714*efd4c9b6SSteve Lawrence 		if (close_on_exec(fd) == -1) {
715*efd4c9b6SSteve Lawrence 			int err = errno;
716*efd4c9b6SSteve Lawrence 			(void) close(fd);
717*efd4c9b6SSteve Lawrence 			errno = err;
718*efd4c9b6SSteve Lawrence 			return (-1);
719*efd4c9b6SSteve Lawrence 		}
720*efd4c9b6SSteve Lawrence 	}
721*efd4c9b6SSteve Lawrence 	return (fd);
722*efd4c9b6SSteve Lawrence }
723*efd4c9b6SSteve Lawrence 
724*efd4c9b6SSteve Lawrence int
725*efd4c9b6SSteve Lawrence contract_abandon_id(ctid_t ctid)
726*efd4c9b6SSteve Lawrence {
727*efd4c9b6SSteve Lawrence 	int fd, err;
728*efd4c9b6SSteve Lawrence 
729*efd4c9b6SSteve Lawrence 	fd = contract_open(ctid, "all", "ctl", O_WRONLY);
730*efd4c9b6SSteve Lawrence 	if (fd == -1)
731*efd4c9b6SSteve Lawrence 		return (errno);
732*efd4c9b6SSteve Lawrence 
733*efd4c9b6SSteve Lawrence 	err = ct_ctl_abandon(fd);
734*efd4c9b6SSteve Lawrence 	(void) close(fd);
735*efd4c9b6SSteve Lawrence 
736*efd4c9b6SSteve Lawrence 	return (err);
737*efd4c9b6SSteve Lawrence }
738*efd4c9b6SSteve Lawrence /*
739*efd4c9b6SSteve Lawrence  * Attach the zsd_server to a zone.  Called for each zone when zonestatd
740*efd4c9b6SSteve Lawrence  * starts, and for each newly booted zone when zoneadmd contacts the zsd_server
741*efd4c9b6SSteve Lawrence  *
742*efd4c9b6SSteve Lawrence  * Zone_enter is used to avoid reaching into zone to fattach door.
743*efd4c9b6SSteve Lawrence  */
744*efd4c9b6SSteve Lawrence static void
745*efd4c9b6SSteve Lawrence zsd_fattach_zone(zoneid_t zid, int door, boolean_t detach_only)
746*efd4c9b6SSteve Lawrence {
747*efd4c9b6SSteve Lawrence 	char *path = ZS_DOOR_PATH;
748*efd4c9b6SSteve Lawrence 	int fd, pid, stat, tmpl_fd;
749*efd4c9b6SSteve Lawrence 	ctid_t ct;
750*efd4c9b6SSteve Lawrence 
751*efd4c9b6SSteve Lawrence 	if ((tmpl_fd = init_template()) == -1) {
752*efd4c9b6SSteve Lawrence 		zsd_warn("Unable to init template");
753*efd4c9b6SSteve Lawrence 		return;
754*efd4c9b6SSteve Lawrence 	}
755*efd4c9b6SSteve Lawrence 
756*efd4c9b6SSteve Lawrence 	pid = forkx(0);
757*efd4c9b6SSteve Lawrence 	if (pid < 0) {
758*efd4c9b6SSteve Lawrence 		(void) ct_tmpl_clear(tmpl_fd);
759*efd4c9b6SSteve Lawrence 		zsd_warn(gettext(
760*efd4c9b6SSteve Lawrence 		    "Unable to fork to add zonestat to zoneid %d\n"), zid);
761*efd4c9b6SSteve Lawrence 		return;
762*efd4c9b6SSteve Lawrence 	}
763*efd4c9b6SSteve Lawrence 
764*efd4c9b6SSteve Lawrence 	if (pid == 0) {
765*efd4c9b6SSteve Lawrence 		(void) ct_tmpl_clear(tmpl_fd);
766*efd4c9b6SSteve Lawrence 		(void) close(tmpl_fd);
767*efd4c9b6SSteve Lawrence 		if (zid != 0 && zone_enter(zid) != 0) {
768*efd4c9b6SSteve Lawrence 			if (errno == EINVAL) {
769*efd4c9b6SSteve Lawrence 				_exit(0);
770*efd4c9b6SSteve Lawrence 			}
771*efd4c9b6SSteve Lawrence 			_exit(1);
772*efd4c9b6SSteve Lawrence 		}
773*efd4c9b6SSteve Lawrence 		(void) fdetach(path);
774*efd4c9b6SSteve Lawrence 		(void) unlink(path);
775*efd4c9b6SSteve Lawrence 		if (detach_only)
776*efd4c9b6SSteve Lawrence 			_exit(0);
777*efd4c9b6SSteve Lawrence 		fd = open(path, O_CREAT|O_RDWR, 0644);
778*efd4c9b6SSteve Lawrence 		if (fd < 0)
779*efd4c9b6SSteve Lawrence 			_exit(2);
780*efd4c9b6SSteve Lawrence 		if (fattach(door, path) != 0)
781*efd4c9b6SSteve Lawrence 			_exit(3);
782*efd4c9b6SSteve Lawrence 		_exit(0);
783*efd4c9b6SSteve Lawrence 	}
784*efd4c9b6SSteve Lawrence 	if (contract_latest(&ct) == -1)
785*efd4c9b6SSteve Lawrence 		ct = -1;
786*efd4c9b6SSteve Lawrence 	(void) ct_tmpl_clear(tmpl_fd);
787*efd4c9b6SSteve Lawrence 	(void) close(tmpl_fd);
788*efd4c9b6SSteve Lawrence 	(void) contract_abandon_id(ct);
789*efd4c9b6SSteve Lawrence 	while (waitpid(pid, &stat, 0) != pid)
790*efd4c9b6SSteve Lawrence 		;
791*efd4c9b6SSteve Lawrence 	if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0)
792*efd4c9b6SSteve Lawrence 		return;
793*efd4c9b6SSteve Lawrence 
794*efd4c9b6SSteve Lawrence 	zsd_warn(gettext("Unable to attach door to zoneid: %d"), zid);
795*efd4c9b6SSteve Lawrence 
796*efd4c9b6SSteve Lawrence 	if (WEXITSTATUS(stat) == 1)
797*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Cannot entering zone"));
798*efd4c9b6SSteve Lawrence 	else if (WEXITSTATUS(stat) == 2)
799*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to create door file: %s"), path);
800*efd4c9b6SSteve Lawrence 	else if (WEXITSTATUS(stat) == 3)
801*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to fattach file: %s"), path);
802*efd4c9b6SSteve Lawrence 
803*efd4c9b6SSteve Lawrence 	zsd_warn(gettext("Internal error entering zone: %d"), zid);
804*efd4c9b6SSteve Lawrence }
805*efd4c9b6SSteve Lawrence 
806*efd4c9b6SSteve Lawrence /*
807*efd4c9b6SSteve Lawrence  * Zone lookup and allocation functions to manage list of currently running
808*efd4c9b6SSteve Lawrence  * zones.
809*efd4c9b6SSteve Lawrence  */
810*efd4c9b6SSteve Lawrence static zsd_zone_t *
811*efd4c9b6SSteve Lawrence zsd_lookup_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
812*efd4c9b6SSteve Lawrence {
813*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
814*efd4c9b6SSteve Lawrence 
815*efd4c9b6SSteve Lawrence 	for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
816*efd4c9b6SSteve Lawrence 	    zone = list_next(&ctl->zsctl_zones, zone)) {
817*efd4c9b6SSteve Lawrence 		if (strcmp(zone->zsz_name, zonename) == 0) {
818*efd4c9b6SSteve Lawrence 			if (zoneid != -1)
819*efd4c9b6SSteve Lawrence 				zone->zsz_id = zoneid;
820*efd4c9b6SSteve Lawrence 			return (zone);
821*efd4c9b6SSteve Lawrence 		}
822*efd4c9b6SSteve Lawrence 	}
823*efd4c9b6SSteve Lawrence 	return (NULL);
824*efd4c9b6SSteve Lawrence }
825*efd4c9b6SSteve Lawrence 
826*efd4c9b6SSteve Lawrence static zsd_zone_t *
827*efd4c9b6SSteve Lawrence zsd_lookup_zone_byid(zsd_ctl_t *ctl, zoneid_t zoneid)
828*efd4c9b6SSteve Lawrence {
829*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
830*efd4c9b6SSteve Lawrence 
831*efd4c9b6SSteve Lawrence 	for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
832*efd4c9b6SSteve Lawrence 	    zone = list_next(&ctl->zsctl_zones, zone)) {
833*efd4c9b6SSteve Lawrence 		if (zone->zsz_id == zoneid)
834*efd4c9b6SSteve Lawrence 			return (zone);
835*efd4c9b6SSteve Lawrence 	}
836*efd4c9b6SSteve Lawrence 	return (NULL);
837*efd4c9b6SSteve Lawrence }
838*efd4c9b6SSteve Lawrence 
839*efd4c9b6SSteve Lawrence static zsd_zone_t *
840*efd4c9b6SSteve Lawrence zsd_allocate_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
841*efd4c9b6SSteve Lawrence {
842*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
843*efd4c9b6SSteve Lawrence 
844*efd4c9b6SSteve Lawrence 	if ((zone = (zsd_zone_t *)calloc(1, sizeof (zsd_zone_t))) == NULL)
845*efd4c9b6SSteve Lawrence 		return (NULL);
846*efd4c9b6SSteve Lawrence 
847*efd4c9b6SSteve Lawrence 	(void) strlcpy(zone->zsz_name, zonename, sizeof (zone->zsz_name));
848*efd4c9b6SSteve Lawrence 	zone->zsz_id = zoneid;
849*efd4c9b6SSteve Lawrence 	zone->zsz_found = B_FALSE;
850*efd4c9b6SSteve Lawrence 
851*efd4c9b6SSteve Lawrence 	/*
852*efd4c9b6SSteve Lawrence 	 * Allocate as deleted so if not found in first pass, zone is deleted
853*efd4c9b6SSteve Lawrence 	 * from list.  This can happen if zone is returned by zone_list, but
854*efd4c9b6SSteve Lawrence 	 * exits before first attempt to fetch zone details.
855*efd4c9b6SSteve Lawrence 	 */
856*efd4c9b6SSteve Lawrence 	zone->zsz_start = g_now;
857*efd4c9b6SSteve Lawrence 	zone->zsz_hrstart = g_hrnow;
858*efd4c9b6SSteve Lawrence 	zone->zsz_deleted = B_TRUE;
859*efd4c9b6SSteve Lawrence 
860*efd4c9b6SSteve Lawrence 	zone->zsz_cpu_shares = ZS_LIMIT_NONE;
861*efd4c9b6SSteve Lawrence 	zone->zsz_cpu_cap = ZS_LIMIT_NONE;
862*efd4c9b6SSteve Lawrence 	zone->zsz_ram_cap = ZS_LIMIT_NONE;
863*efd4c9b6SSteve Lawrence 	zone->zsz_locked_cap = ZS_LIMIT_NONE;
864*efd4c9b6SSteve Lawrence 	zone->zsz_vm_cap = ZS_LIMIT_NONE;
865*efd4c9b6SSteve Lawrence 
866*efd4c9b6SSteve Lawrence 	zone->zsz_processes_cap = ZS_LIMIT_NONE;
867*efd4c9b6SSteve Lawrence 	zone->zsz_lwps_cap = ZS_LIMIT_NONE;
868*efd4c9b6SSteve Lawrence 	zone->zsz_shm_cap = ZS_LIMIT_NONE;
869*efd4c9b6SSteve Lawrence 	zone->zsz_shmids_cap = ZS_LIMIT_NONE;
870*efd4c9b6SSteve Lawrence 	zone->zsz_semids_cap = ZS_LIMIT_NONE;
871*efd4c9b6SSteve Lawrence 	zone->zsz_msgids_cap = ZS_LIMIT_NONE;
872*efd4c9b6SSteve Lawrence 	zone->zsz_lofi_cap = ZS_LIMIT_NONE;
873*efd4c9b6SSteve Lawrence 
874*efd4c9b6SSteve Lawrence 	ctl->zsctl_nzones++;
875*efd4c9b6SSteve Lawrence 
876*efd4c9b6SSteve Lawrence 	return (zone);
877*efd4c9b6SSteve Lawrence }
878*efd4c9b6SSteve Lawrence 
879*efd4c9b6SSteve Lawrence static zsd_zone_t *
880*efd4c9b6SSteve Lawrence zsd_lookup_insert_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
881*efd4c9b6SSteve Lawrence {
882*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone, *tmp;
883*efd4c9b6SSteve Lawrence 
884*efd4c9b6SSteve Lawrence 	if ((zone = zsd_lookup_zone(ctl, zonename, zoneid)) != NULL)
885*efd4c9b6SSteve Lawrence 		return (zone);
886*efd4c9b6SSteve Lawrence 
887*efd4c9b6SSteve Lawrence 	if ((zone = zsd_allocate_zone(ctl, zonename, zoneid)) == NULL)
888*efd4c9b6SSteve Lawrence 		return (NULL);
889*efd4c9b6SSteve Lawrence 
890*efd4c9b6SSteve Lawrence 	/* Insert sorted by zonename */
891*efd4c9b6SSteve Lawrence 	tmp = list_head(&ctl->zsctl_zones);
892*efd4c9b6SSteve Lawrence 	while (tmp != NULL && strcmp(zonename, tmp->zsz_name) > 0)
893*efd4c9b6SSteve Lawrence 		tmp = list_next(&ctl->zsctl_zones, tmp);
894*efd4c9b6SSteve Lawrence 
895*efd4c9b6SSteve Lawrence 	list_insert_before(&ctl->zsctl_zones, tmp, zone);
896*efd4c9b6SSteve Lawrence 	return (zone);
897*efd4c9b6SSteve Lawrence }
898*efd4c9b6SSteve Lawrence 
899*efd4c9b6SSteve Lawrence /*
900*efd4c9b6SSteve Lawrence  * Mark all zones as not existing.  As zones are found, they will
901*efd4c9b6SSteve Lawrence  * be marked as existing.  If a zone is not found, then it must have
902*efd4c9b6SSteve Lawrence  * halted.
903*efd4c9b6SSteve Lawrence  */
904*efd4c9b6SSteve Lawrence static void
905*efd4c9b6SSteve Lawrence zsd_mark_zones_start(zsd_ctl_t *ctl)
906*efd4c9b6SSteve Lawrence {
907*efd4c9b6SSteve Lawrence 
908*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
909*efd4c9b6SSteve Lawrence 
910*efd4c9b6SSteve Lawrence 	for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
911*efd4c9b6SSteve Lawrence 	    zone = list_next(&ctl->zsctl_zones, zone)) {
912*efd4c9b6SSteve Lawrence 		zone->zsz_found = B_FALSE;
913*efd4c9b6SSteve Lawrence 	}
914*efd4c9b6SSteve Lawrence }
915*efd4c9b6SSteve Lawrence 
916*efd4c9b6SSteve Lawrence /*
917*efd4c9b6SSteve Lawrence  * Mark each zone as not using pset.  If processes are found using the
918*efd4c9b6SSteve Lawrence  * pset, the zone will remain bound to the pset.  If none of a zones
919*efd4c9b6SSteve Lawrence  * processes are bound to the pset, the zone's usage of the pset will
920*efd4c9b6SSteve Lawrence  * be deleted.
921*efd4c9b6SSteve Lawrence  *
922*efd4c9b6SSteve Lawrence  */
923*efd4c9b6SSteve Lawrence static void
924*efd4c9b6SSteve Lawrence zsd_mark_pset_usage_start(zsd_pset_t *pset)
925*efd4c9b6SSteve Lawrence {
926*efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *usage;
927*efd4c9b6SSteve Lawrence 
928*efd4c9b6SSteve Lawrence 	for (usage = list_head(&pset->zsp_usage_list);
929*efd4c9b6SSteve Lawrence 	    usage != NULL;
930*efd4c9b6SSteve Lawrence 	    usage = list_next(&pset->zsp_usage_list, usage)) {
931*efd4c9b6SSteve Lawrence 		usage->zsu_found = B_FALSE;
932*efd4c9b6SSteve Lawrence 		usage->zsu_empty = B_TRUE;
933*efd4c9b6SSteve Lawrence 	}
934*efd4c9b6SSteve Lawrence }
935*efd4c9b6SSteve Lawrence 
936*efd4c9b6SSteve Lawrence /*
937*efd4c9b6SSteve Lawrence  * Mark each pset as not existing.  If a pset is found, it will be marked
938*efd4c9b6SSteve Lawrence  * as existing.  If a pset is not found, it wil be deleted.
939*efd4c9b6SSteve Lawrence  */
940*efd4c9b6SSteve Lawrence static void
941*efd4c9b6SSteve Lawrence zsd_mark_psets_start(zsd_ctl_t *ctl)
942*efd4c9b6SSteve Lawrence {
943*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset;
944*efd4c9b6SSteve Lawrence 
945*efd4c9b6SSteve Lawrence 	for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
946*efd4c9b6SSteve Lawrence 	    pset = list_next(&ctl->zsctl_psets, pset)) {
947*efd4c9b6SSteve Lawrence 		pset->zsp_found = B_FALSE;
948*efd4c9b6SSteve Lawrence 		zsd_mark_pset_usage_start(pset);
949*efd4c9b6SSteve Lawrence 	}
950*efd4c9b6SSteve Lawrence }
951*efd4c9b6SSteve Lawrence 
952*efd4c9b6SSteve Lawrence /*
953*efd4c9b6SSteve Lawrence  * A pset was found.  Update its information
954*efd4c9b6SSteve Lawrence  */
955*efd4c9b6SSteve Lawrence static void
956*efd4c9b6SSteve Lawrence zsd_mark_pset_found(zsd_pset_t *pset, uint_t type, uint64_t online,
957*efd4c9b6SSteve Lawrence     uint64_t size, uint64_t min, uint64_t max, int64_t importance)
958*efd4c9b6SSteve Lawrence {
959*efd4c9b6SSteve Lawrence 	pset->zsp_empty = B_TRUE;
960*efd4c9b6SSteve Lawrence 	pset->zsp_deleted = B_FALSE;
961*efd4c9b6SSteve Lawrence 
962*efd4c9b6SSteve Lawrence 	assert(pset->zsp_found == B_FALSE);
963*efd4c9b6SSteve Lawrence 
964*efd4c9b6SSteve Lawrence 	/* update pset flags */
965*efd4c9b6SSteve Lawrence 	if (pset->zsp_active == B_FALSE)
966*efd4c9b6SSteve Lawrence 		/* pset not seen on previous interval.  It is new. */
967*efd4c9b6SSteve Lawrence 		pset->zsp_new = B_TRUE;
968*efd4c9b6SSteve Lawrence 	else
969*efd4c9b6SSteve Lawrence 		pset->zsp_new = B_FALSE;
970*efd4c9b6SSteve Lawrence 
971*efd4c9b6SSteve Lawrence 	pset->zsp_found = B_TRUE;
972*efd4c9b6SSteve Lawrence 	pset->zsp_cputype = type;
973*efd4c9b6SSteve Lawrence 	pset->zsp_online = online;
974*efd4c9b6SSteve Lawrence 	pset->zsp_size = size;
975*efd4c9b6SSteve Lawrence 	pset->zsp_min = min;
976*efd4c9b6SSteve Lawrence 	pset->zsp_max = max;
977*efd4c9b6SSteve Lawrence 	pset->zsp_importance = importance;
978*efd4c9b6SSteve Lawrence 	pset->zsp_cpu_shares = 0;
979*efd4c9b6SSteve Lawrence 	pset->zsp_scheds = 0;
980*efd4c9b6SSteve Lawrence 	pset->zsp_active = B_TRUE;
981*efd4c9b6SSteve Lawrence }
982*efd4c9b6SSteve Lawrence 
983*efd4c9b6SSteve Lawrence /*
984*efd4c9b6SSteve Lawrence  * A zone's process was found using a pset. Charge the process to the pset and
985*efd4c9b6SSteve Lawrence  * the per-zone data for the pset.
986*efd4c9b6SSteve Lawrence  */
987*efd4c9b6SSteve Lawrence static void
988*efd4c9b6SSteve Lawrence zsd_mark_pset_usage_found(zsd_pset_usage_t *usage, uint_t sched)
989*efd4c9b6SSteve Lawrence {
990*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone = usage->zsu_zone;
991*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset = usage->zsu_pset;
992*efd4c9b6SSteve Lawrence 
993*efd4c9b6SSteve Lawrence 	/* Nothing to do if already found */
994*efd4c9b6SSteve Lawrence 	if (usage->zsu_found == B_TRUE)
995*efd4c9b6SSteve Lawrence 		goto add_stats;
996*efd4c9b6SSteve Lawrence 
997*efd4c9b6SSteve Lawrence 	usage->zsu_found = B_TRUE;
998*efd4c9b6SSteve Lawrence 	usage->zsu_empty = B_FALSE;
999*efd4c9b6SSteve Lawrence 
1000*efd4c9b6SSteve Lawrence 	usage->zsu_deleted = B_FALSE;
1001*efd4c9b6SSteve Lawrence 	/* update usage flags */
1002*efd4c9b6SSteve Lawrence 	if (usage->zsu_active == B_FALSE)
1003*efd4c9b6SSteve Lawrence 		usage->zsu_new = B_TRUE;
1004*efd4c9b6SSteve Lawrence 	else
1005*efd4c9b6SSteve Lawrence 		usage->zsu_new = B_FALSE;
1006*efd4c9b6SSteve Lawrence 
1007*efd4c9b6SSteve Lawrence 	usage->zsu_scheds = 0;
1008*efd4c9b6SSteve Lawrence 	usage->zsu_cpu_shares = ZS_LIMIT_NONE;
1009*efd4c9b6SSteve Lawrence 	usage->zsu_active = B_TRUE;
1010*efd4c9b6SSteve Lawrence 	pset->zsp_empty = B_FALSE;
1011*efd4c9b6SSteve Lawrence 	zone->zsz_empty = B_FALSE;
1012*efd4c9b6SSteve Lawrence 
1013*efd4c9b6SSteve Lawrence add_stats:
1014*efd4c9b6SSteve Lawrence 	/* Detect zone's pset id, and if it is bound to multiple psets */
1015*efd4c9b6SSteve Lawrence 	if (zone->zsz_psetid == ZS_PSET_ERROR)
1016*efd4c9b6SSteve Lawrence 		zone->zsz_psetid = pset->zsp_id;
1017*efd4c9b6SSteve Lawrence 	else if (zone->zsz_psetid != pset->zsp_id)
1018*efd4c9b6SSteve Lawrence 		zone->zsz_psetid = ZS_PSET_MULTI;
1019*efd4c9b6SSteve Lawrence 
1020*efd4c9b6SSteve Lawrence 	usage->zsu_scheds |= sched;
1021*efd4c9b6SSteve Lawrence 	pset->zsp_scheds |= sched;
1022*efd4c9b6SSteve Lawrence 	zone->zsz_scheds |= sched;
1023*efd4c9b6SSteve Lawrence 
1024*efd4c9b6SSteve Lawrence 	/* Record if FSS is co-habitating with conflicting scheduler */
1025*efd4c9b6SSteve Lawrence 	if ((pset->zsp_scheds & ZS_SCHED_FSS) &&
1026*efd4c9b6SSteve Lawrence 	    usage->zsu_scheds & (
1027*efd4c9b6SSteve Lawrence 	    ZS_SCHED_TS | ZS_SCHED_IA | ZS_SCHED_FX)) {
1028*efd4c9b6SSteve Lawrence 		usage->zsu_scheds |= ZS_SCHED_CONFLICT;
1029*efd4c9b6SSteve Lawrence 
1030*efd4c9b6SSteve Lawrence 		pset->zsp_scheds |= ZS_SCHED_CONFLICT;
1031*efd4c9b6SSteve Lawrence 	}
1032*efd4c9b6SSteve Lawrence 
1033*efd4c9b6SSteve Lawrence }
1034*efd4c9b6SSteve Lawrence 
1035*efd4c9b6SSteve Lawrence /* Add cpu time for a process to a pset, zone, and system totals */
1036*efd4c9b6SSteve Lawrence static void
1037*efd4c9b6SSteve Lawrence zsd_add_usage(zsd_ctl_t *ctl, zsd_pset_usage_t *usage, timestruc_t *delta)
1038*efd4c9b6SSteve Lawrence {
1039*efd4c9b6SSteve Lawrence 	zsd_system_t *system = ctl->zsctl_system;
1040*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone = usage->zsu_zone;
1041*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset = usage->zsu_pset;
1042*efd4c9b6SSteve Lawrence 
1043*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_TIMESTRUC(usage->zsu_cpu_usage, *delta);
1044*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_zones, *delta);
1045*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_TIMESTRUC(zone->zsz_cpu_usage, *delta);
1046*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_TIMESTRUC(system->zss_cpu_usage_zones, *delta);
1047*efd4c9b6SSteve Lawrence }
1048*efd4c9b6SSteve Lawrence 
1049*efd4c9b6SSteve Lawrence /* Determine which processor sets have been deleted */
1050*efd4c9b6SSteve Lawrence static void
1051*efd4c9b6SSteve Lawrence zsd_mark_psets_end(zsd_ctl_t *ctl)
1052*efd4c9b6SSteve Lawrence {
1053*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset, *tmp;
1054*efd4c9b6SSteve Lawrence 
1055*efd4c9b6SSteve Lawrence 	/*
1056*efd4c9b6SSteve Lawrence 	 * Mark pset as not exists, and deleted if it existed
1057*efd4c9b6SSteve Lawrence 	 * previous interval.
1058*efd4c9b6SSteve Lawrence 	 */
1059*efd4c9b6SSteve Lawrence 	pset = list_head(&ctl->zsctl_psets);
1060*efd4c9b6SSteve Lawrence 	while (pset != NULL) {
1061*efd4c9b6SSteve Lawrence 		if (pset->zsp_found == B_FALSE) {
1062*efd4c9b6SSteve Lawrence 			pset->zsp_empty = B_TRUE;
1063*efd4c9b6SSteve Lawrence 			if (pset->zsp_deleted == B_TRUE) {
1064*efd4c9b6SSteve Lawrence 				tmp = pset;
1065*efd4c9b6SSteve Lawrence 				pset = list_next(&ctl->zsctl_psets, pset);
1066*efd4c9b6SSteve Lawrence 				list_remove(&ctl->zsctl_psets, tmp);
1067*efd4c9b6SSteve Lawrence 				free(tmp);
1068*efd4c9b6SSteve Lawrence 				ctl->zsctl_npsets--;
1069*efd4c9b6SSteve Lawrence 				continue;
1070*efd4c9b6SSteve Lawrence 			} else {
1071*efd4c9b6SSteve Lawrence 				/* Pset vanished during this interval */
1072*efd4c9b6SSteve Lawrence 				pset->zsp_new = B_FALSE;
1073*efd4c9b6SSteve Lawrence 				pset->zsp_deleted = B_TRUE;
1074*efd4c9b6SSteve Lawrence 				pset->zsp_active = B_TRUE;
1075*efd4c9b6SSteve Lawrence 			}
1076*efd4c9b6SSteve Lawrence 		}
1077*efd4c9b6SSteve Lawrence 		pset = list_next(&ctl->zsctl_psets, pset);
1078*efd4c9b6SSteve Lawrence 	}
1079*efd4c9b6SSteve Lawrence }
1080*efd4c9b6SSteve Lawrence 
1081*efd4c9b6SSteve Lawrence /* Determine which zones are no longer bound to processor sets */
1082*efd4c9b6SSteve Lawrence static void
1083*efd4c9b6SSteve Lawrence zsd_mark_pset_usages_end(zsd_ctl_t *ctl)
1084*efd4c9b6SSteve Lawrence {
1085*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset;
1086*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
1087*efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *usage, *tmp;
1088*efd4c9b6SSteve Lawrence 
1089*efd4c9b6SSteve Lawrence 	/*
1090*efd4c9b6SSteve Lawrence 	 * Mark pset as not exists, and deleted if it existed previous
1091*efd4c9b6SSteve Lawrence 	 * interval.
1092*efd4c9b6SSteve Lawrence 	 */
1093*efd4c9b6SSteve Lawrence 	for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1094*efd4c9b6SSteve Lawrence 	    pset = list_next(&ctl->zsctl_psets, pset)) {
1095*efd4c9b6SSteve Lawrence 		usage = list_head(&pset->zsp_usage_list);
1096*efd4c9b6SSteve Lawrence 		while (usage != NULL) {
1097*efd4c9b6SSteve Lawrence 			/*
1098*efd4c9b6SSteve Lawrence 			 * Mark pset as not exists, and deleted if it existed
1099*efd4c9b6SSteve Lawrence 			 * previous interval.
1100*efd4c9b6SSteve Lawrence 			 */
1101*efd4c9b6SSteve Lawrence 			if (usage->zsu_found == B_FALSE ||
1102*efd4c9b6SSteve Lawrence 			    usage->zsu_zone->zsz_deleted == B_TRUE ||
1103*efd4c9b6SSteve Lawrence 			    usage->zsu_pset->zsp_deleted == B_TRUE) {
1104*efd4c9b6SSteve Lawrence 				tmp = usage;
1105*efd4c9b6SSteve Lawrence 				usage = list_next(&pset->zsp_usage_list,
1106*efd4c9b6SSteve Lawrence 				    usage);
1107*efd4c9b6SSteve Lawrence 				list_remove(&pset->zsp_usage_list, tmp);
1108*efd4c9b6SSteve Lawrence 				free(tmp);
1109*efd4c9b6SSteve Lawrence 				pset->zsp_nusage--;
1110*efd4c9b6SSteve Lawrence 				ctl->zsctl_npset_usages--;
1111*efd4c9b6SSteve Lawrence 				continue;
1112*efd4c9b6SSteve Lawrence 			} else {
1113*efd4c9b6SSteve Lawrence 				usage->zsu_new = B_FALSE;
1114*efd4c9b6SSteve Lawrence 				usage->zsu_deleted = B_TRUE;
1115*efd4c9b6SSteve Lawrence 				usage->zsu_active = B_TRUE;
1116*efd4c9b6SSteve Lawrence 			}
1117*efd4c9b6SSteve Lawrence 			/* Add cpu shares for usages that are in FSS */
1118*efd4c9b6SSteve Lawrence 			zone = usage->zsu_zone;
1119*efd4c9b6SSteve Lawrence 			if (usage->zsu_scheds & ZS_SCHED_FSS &&
1120*efd4c9b6SSteve Lawrence 			    zone->zsz_cpu_shares != ZS_SHARES_UNLIMITED &&
1121*efd4c9b6SSteve Lawrence 			    zone->zsz_cpu_shares != 0) {
1122*efd4c9b6SSteve Lawrence 				zone = usage->zsu_zone;
1123*efd4c9b6SSteve Lawrence 				usage->zsu_cpu_shares = zone->zsz_cpu_shares;
1124*efd4c9b6SSteve Lawrence 				pset->zsp_cpu_shares += zone->zsz_cpu_shares;
1125*efd4c9b6SSteve Lawrence 			}
1126*efd4c9b6SSteve Lawrence 			usage = list_next(&pset->zsp_usage_list,
1127*efd4c9b6SSteve Lawrence 			    usage);
1128*efd4c9b6SSteve Lawrence 		}
1129*efd4c9b6SSteve Lawrence 	}
1130*efd4c9b6SSteve Lawrence }
1131*efd4c9b6SSteve Lawrence 
1132*efd4c9b6SSteve Lawrence /* A zone has been found.  Update its information */
1133*efd4c9b6SSteve Lawrence static void
1134*efd4c9b6SSteve Lawrence zsd_mark_zone_found(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t cpu_shares,
1135*efd4c9b6SSteve Lawrence     uint64_t cpu_cap, uint64_t ram_cap, uint64_t locked_cap,
1136*efd4c9b6SSteve Lawrence     uint64_t vm_cap, uint64_t processes_cap, uint64_t processes,
1137*efd4c9b6SSteve Lawrence     uint64_t lwps_cap, uint64_t lwps, uint64_t shm_cap, uint64_t shm,
1138*efd4c9b6SSteve Lawrence     uint64_t shmids_cap, uint64_t shmids, uint64_t semids_cap,
1139*efd4c9b6SSteve Lawrence     uint64_t semids, uint64_t msgids_cap, uint64_t msgids, uint64_t lofi_cap,
1140*efd4c9b6SSteve Lawrence     uint64_t lofi, char *poolname, char *psetname, uint_t sched, uint_t cputype,
1141*efd4c9b6SSteve Lawrence     uint_t iptype)
1142*efd4c9b6SSteve Lawrence {
1143*efd4c9b6SSteve Lawrence 	zsd_system_t *sys = ctl->zsctl_system;
1144*efd4c9b6SSteve Lawrence 
1145*efd4c9b6SSteve Lawrence 	assert(zone->zsz_found == B_FALSE);
1146*efd4c9b6SSteve Lawrence 
1147*efd4c9b6SSteve Lawrence 	/*
1148*efd4c9b6SSteve Lawrence 	 * Mark zone as exists, and new if it did not exist in previous
1149*efd4c9b6SSteve Lawrence 	 * interval.
1150*efd4c9b6SSteve Lawrence 	 */
1151*efd4c9b6SSteve Lawrence 	zone->zsz_found = B_TRUE;
1152*efd4c9b6SSteve Lawrence 	zone->zsz_empty = B_TRUE;
1153*efd4c9b6SSteve Lawrence 	zone->zsz_deleted = B_FALSE;
1154*efd4c9b6SSteve Lawrence 
1155*efd4c9b6SSteve Lawrence 	/*
1156*efd4c9b6SSteve Lawrence 	 * Zone is new.  Assume zone's properties are the same over entire
1157*efd4c9b6SSteve Lawrence 	 * interval.
1158*efd4c9b6SSteve Lawrence 	 */
1159*efd4c9b6SSteve Lawrence 	if (zone->zsz_active == B_FALSE)
1160*efd4c9b6SSteve Lawrence 		zone->zsz_new = B_TRUE;
1161*efd4c9b6SSteve Lawrence 	else
1162*efd4c9b6SSteve Lawrence 		zone->zsz_new = B_FALSE;
1163*efd4c9b6SSteve Lawrence 
1164*efd4c9b6SSteve Lawrence 	(void) strlcpy(zone->zsz_pool, poolname, sizeof (zone->zsz_pool));
1165*efd4c9b6SSteve Lawrence 	(void) strlcpy(zone->zsz_pset, psetname, sizeof (zone->zsz_pset));
1166*efd4c9b6SSteve Lawrence 	zone->zsz_default_sched = sched;
1167*efd4c9b6SSteve Lawrence 
1168*efd4c9b6SSteve Lawrence 	/* Schedulers updated later as processes are found */
1169*efd4c9b6SSteve Lawrence 	zone->zsz_scheds = 0;
1170*efd4c9b6SSteve Lawrence 
1171*efd4c9b6SSteve Lawrence 	/* Cpus updated later as psets bound are identified */
1172*efd4c9b6SSteve Lawrence 	zone->zsz_cpus_online = 0;
1173*efd4c9b6SSteve Lawrence 
1174*efd4c9b6SSteve Lawrence 	zone->zsz_cputype = cputype;
1175*efd4c9b6SSteve Lawrence 	zone->zsz_iptype = iptype;
1176*efd4c9b6SSteve Lawrence 	zone->zsz_psetid = ZS_PSET_ERROR;
1177*efd4c9b6SSteve Lawrence 	zone->zsz_cpu_cap = cpu_cap;
1178*efd4c9b6SSteve Lawrence 	zone->zsz_cpu_shares = cpu_shares;
1179*efd4c9b6SSteve Lawrence 	zone->zsz_ram_cap = ram_cap;
1180*efd4c9b6SSteve Lawrence 	zone->zsz_locked_cap = locked_cap;
1181*efd4c9b6SSteve Lawrence 	zone->zsz_vm_cap = vm_cap;
1182*efd4c9b6SSteve Lawrence 	zone->zsz_processes_cap = processes_cap;
1183*efd4c9b6SSteve Lawrence 	zone->zsz_processes = processes;
1184*efd4c9b6SSteve Lawrence 	zone->zsz_lwps_cap = lwps_cap;
1185*efd4c9b6SSteve Lawrence 	zone->zsz_lwps = lwps;
1186*efd4c9b6SSteve Lawrence 	zone->zsz_shm_cap = shm_cap;
1187*efd4c9b6SSteve Lawrence 	zone->zsz_shm = shm;
1188*efd4c9b6SSteve Lawrence 	zone->zsz_shmids_cap = shmids_cap;
1189*efd4c9b6SSteve Lawrence 	zone->zsz_shmids = shmids;
1190*efd4c9b6SSteve Lawrence 	zone->zsz_semids_cap = semids_cap;
1191*efd4c9b6SSteve Lawrence 	zone->zsz_semids = semids;
1192*efd4c9b6SSteve Lawrence 	zone->zsz_msgids_cap = msgids_cap;
1193*efd4c9b6SSteve Lawrence 	zone->zsz_msgids = msgids;
1194*efd4c9b6SSteve Lawrence 	zone->zsz_lofi_cap = lofi_cap;
1195*efd4c9b6SSteve Lawrence 	zone->zsz_lofi = lofi;
1196*efd4c9b6SSteve Lawrence 
1197*efd4c9b6SSteve Lawrence 	sys->zss_processes += processes;
1198*efd4c9b6SSteve Lawrence 	sys->zss_lwps += lwps;
1199*efd4c9b6SSteve Lawrence 	sys->zss_shm += shm;
1200*efd4c9b6SSteve Lawrence 	sys->zss_shmids += shmids;
1201*efd4c9b6SSteve Lawrence 	sys->zss_semids += semids;
1202*efd4c9b6SSteve Lawrence 	sys->zss_msgids += msgids;
1203*efd4c9b6SSteve Lawrence 	sys->zss_lofi += lofi;
1204*efd4c9b6SSteve Lawrence 	zone->zsz_active = B_TRUE;
1205*efd4c9b6SSteve Lawrence }
1206*efd4c9b6SSteve Lawrence 
1207*efd4c9b6SSteve Lawrence 
1208*efd4c9b6SSteve Lawrence /* Determine which zones have halted */
1209*efd4c9b6SSteve Lawrence static void
1210*efd4c9b6SSteve Lawrence zsd_mark_zones_end(zsd_ctl_t *ctl)
1211*efd4c9b6SSteve Lawrence {
1212*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone, *tmp;
1213*efd4c9b6SSteve Lawrence 
1214*efd4c9b6SSteve Lawrence 	/*
1215*efd4c9b6SSteve Lawrence 	 * Mark zone as not existing, or delete if it did not exist in
1216*efd4c9b6SSteve Lawrence 	 * previous interval.
1217*efd4c9b6SSteve Lawrence 	 */
1218*efd4c9b6SSteve Lawrence 	zone = list_head(&ctl->zsctl_zones);
1219*efd4c9b6SSteve Lawrence 	while (zone != NULL) {
1220*efd4c9b6SSteve Lawrence 		if (zone->zsz_found == B_FALSE) {
1221*efd4c9b6SSteve Lawrence 			zone->zsz_empty = B_TRUE;
1222*efd4c9b6SSteve Lawrence 			if (zone->zsz_deleted == B_TRUE) {
1223*efd4c9b6SSteve Lawrence 				/*
1224*efd4c9b6SSteve Lawrence 				 * Zone deleted in prior interval,
1225*efd4c9b6SSteve Lawrence 				 * so it no longer exists.
1226*efd4c9b6SSteve Lawrence 				 */
1227*efd4c9b6SSteve Lawrence 				tmp = zone;
1228*efd4c9b6SSteve Lawrence 				zone = list_next(&ctl->zsctl_zones, zone);
1229*efd4c9b6SSteve Lawrence 				list_remove(&ctl->zsctl_zones, tmp);
1230*efd4c9b6SSteve Lawrence 				free(tmp);
1231*efd4c9b6SSteve Lawrence 				ctl->zsctl_nzones--;
1232*efd4c9b6SSteve Lawrence 				continue;
1233*efd4c9b6SSteve Lawrence 			} else {
1234*efd4c9b6SSteve Lawrence 				zone->zsz_new = B_FALSE;
1235*efd4c9b6SSteve Lawrence 				zone->zsz_deleted = B_TRUE;
1236*efd4c9b6SSteve Lawrence 				zone->zsz_active = B_TRUE;
1237*efd4c9b6SSteve Lawrence 			}
1238*efd4c9b6SSteve Lawrence 		}
1239*efd4c9b6SSteve Lawrence 		zone = list_next(&ctl->zsctl_zones, zone);
1240*efd4c9b6SSteve Lawrence 	}
1241*efd4c9b6SSteve Lawrence }
1242*efd4c9b6SSteve Lawrence 
1243*efd4c9b6SSteve Lawrence /*
1244*efd4c9b6SSteve Lawrence  * Mark cpus as not existing.  If a cpu is found, it will be updated.  If
1245*efd4c9b6SSteve Lawrence  * a cpu is not found, then it must have gone offline, so it will be
1246*efd4c9b6SSteve Lawrence  * deleted.
1247*efd4c9b6SSteve Lawrence  *
1248*efd4c9b6SSteve Lawrence  * The kstat tracking data is rolled so that the usage since the previous
1249*efd4c9b6SSteve Lawrence  * interval can be determined.
1250*efd4c9b6SSteve Lawrence  */
1251*efd4c9b6SSteve Lawrence static void
1252*efd4c9b6SSteve Lawrence zsd_mark_cpus_start(zsd_ctl_t *ctl, boolean_t roll)
1253*efd4c9b6SSteve Lawrence {
1254*efd4c9b6SSteve Lawrence 	zsd_cpu_t *cpu;
1255*efd4c9b6SSteve Lawrence 
1256*efd4c9b6SSteve Lawrence 	/*
1257*efd4c9b6SSteve Lawrence 	 * Mark all cpus as not existing.  As cpus are found, they will
1258*efd4c9b6SSteve Lawrence 	 * be marked as existing.
1259*efd4c9b6SSteve Lawrence 	 */
1260*efd4c9b6SSteve Lawrence 	for (cpu = list_head(&ctl->zsctl_cpus); cpu != NULL;
1261*efd4c9b6SSteve Lawrence 	    cpu = list_next(&ctl->zsctl_cpus, cpu)) {
1262*efd4c9b6SSteve Lawrence 		cpu->zsc_found = B_FALSE;
1263*efd4c9b6SSteve Lawrence 		if (cpu->zsc_active == B_TRUE && roll) {
1264*efd4c9b6SSteve Lawrence 			cpu->zsc_psetid_prev = cpu->zsc_psetid;
1265*efd4c9b6SSteve Lawrence 			cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle;
1266*efd4c9b6SSteve Lawrence 			cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr;
1267*efd4c9b6SSteve Lawrence 			cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern;
1268*efd4c9b6SSteve Lawrence 			cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user;
1269*efd4c9b6SSteve Lawrence 		}
1270*efd4c9b6SSteve Lawrence 	}
1271*efd4c9b6SSteve Lawrence }
1272*efd4c9b6SSteve Lawrence 
1273*efd4c9b6SSteve Lawrence /*
1274*efd4c9b6SSteve Lawrence  * An array the size of the maximum number of cpus is kept.  Within this array
1275*efd4c9b6SSteve Lawrence  * a list of the online cpus is maintained.
1276*efd4c9b6SSteve Lawrence  */
1277*efd4c9b6SSteve Lawrence zsd_cpu_t *
1278*efd4c9b6SSteve Lawrence zsd_lookup_insert_cpu(zsd_ctl_t *ctl, processorid_t cpuid)
1279*efd4c9b6SSteve Lawrence {
1280*efd4c9b6SSteve Lawrence 	zsd_cpu_t *cpu;
1281*efd4c9b6SSteve Lawrence 
1282*efd4c9b6SSteve Lawrence 	assert(cpuid < ctl->zsctl_maxcpuid);
1283*efd4c9b6SSteve Lawrence 	cpu = &(ctl->zsctl_cpu_array[cpuid]);
1284*efd4c9b6SSteve Lawrence 	assert(cpuid == cpu->zsc_id);
1285*efd4c9b6SSteve Lawrence 
1286*efd4c9b6SSteve Lawrence 	if (cpu->zsc_allocated == B_FALSE) {
1287*efd4c9b6SSteve Lawrence 		cpu->zsc_allocated = B_TRUE;
1288*efd4c9b6SSteve Lawrence 		list_insert_tail(&ctl->zsctl_cpus, cpu);
1289*efd4c9b6SSteve Lawrence 	}
1290*efd4c9b6SSteve Lawrence 	return (cpu);
1291*efd4c9b6SSteve Lawrence }
1292*efd4c9b6SSteve Lawrence 
1293*efd4c9b6SSteve Lawrence /* A cpu has been found.  Update its information */
1294*efd4c9b6SSteve Lawrence static void
1295*efd4c9b6SSteve Lawrence zsd_mark_cpu_found(zsd_cpu_t *cpu, zsd_pset_t *pset, psetid_t psetid)
1296*efd4c9b6SSteve Lawrence {
1297*efd4c9b6SSteve Lawrence 	/*
1298*efd4c9b6SSteve Lawrence 	 * legacy processor sets, the cpu may move while zonestatd is
1299*efd4c9b6SSteve Lawrence 	 * inspecting, causing it to be found twice.  In this case, just
1300*efd4c9b6SSteve Lawrence 	 * leave cpu in the first processor set in which it was found.
1301*efd4c9b6SSteve Lawrence 	 */
1302*efd4c9b6SSteve Lawrence 	if (cpu->zsc_found == B_TRUE)
1303*efd4c9b6SSteve Lawrence 		return;
1304*efd4c9b6SSteve Lawrence 
1305*efd4c9b6SSteve Lawrence 	/* Mark cpu as online */
1306*efd4c9b6SSteve Lawrence 	cpu->zsc_found = B_TRUE;
1307*efd4c9b6SSteve Lawrence 	cpu->zsc_offlined = B_FALSE;
1308*efd4c9b6SSteve Lawrence 	cpu->zsc_pset = pset;
1309*efd4c9b6SSteve Lawrence 	/*
1310*efd4c9b6SSteve Lawrence 	 * cpu is newly online.
1311*efd4c9b6SSteve Lawrence 	 */
1312*efd4c9b6SSteve Lawrence 	if (cpu->zsc_active == B_FALSE) {
1313*efd4c9b6SSteve Lawrence 		/*
1314*efd4c9b6SSteve Lawrence 		 * Cpu is newly online.
1315*efd4c9b6SSteve Lawrence 		 */
1316*efd4c9b6SSteve Lawrence 		cpu->zsc_onlined = B_TRUE;
1317*efd4c9b6SSteve Lawrence 		cpu->zsc_psetid = psetid;
1318*efd4c9b6SSteve Lawrence 		cpu->zsc_psetid_prev = psetid;
1319*efd4c9b6SSteve Lawrence 	} else {
1320*efd4c9b6SSteve Lawrence 		/*
1321*efd4c9b6SSteve Lawrence 		 * cpu online during previous interval.  Save properties at
1322*efd4c9b6SSteve Lawrence 		 * start of interval
1323*efd4c9b6SSteve Lawrence 		 */
1324*efd4c9b6SSteve Lawrence 		cpu->zsc_onlined = B_FALSE;
1325*efd4c9b6SSteve Lawrence 		cpu->zsc_psetid = psetid;
1326*efd4c9b6SSteve Lawrence 
1327*efd4c9b6SSteve Lawrence 	}
1328*efd4c9b6SSteve Lawrence 	cpu->zsc_active = B_TRUE;
1329*efd4c9b6SSteve Lawrence }
1330*efd4c9b6SSteve Lawrence 
1331*efd4c9b6SSteve Lawrence /* Remove all offlined cpus from the list of tracked cpus */
1332*efd4c9b6SSteve Lawrence static void
1333*efd4c9b6SSteve Lawrence zsd_mark_cpus_end(zsd_ctl_t *ctl)
1334*efd4c9b6SSteve Lawrence {
1335*efd4c9b6SSteve Lawrence 	zsd_cpu_t *cpu, *tmp;
1336*efd4c9b6SSteve Lawrence 	int id;
1337*efd4c9b6SSteve Lawrence 
1338*efd4c9b6SSteve Lawrence 	/* Mark cpu as online or offline */
1339*efd4c9b6SSteve Lawrence 	cpu = list_head(&ctl->zsctl_cpus);
1340*efd4c9b6SSteve Lawrence 	while (cpu != NULL) {
1341*efd4c9b6SSteve Lawrence 		if (cpu->zsc_found == B_FALSE) {
1342*efd4c9b6SSteve Lawrence 			if (cpu->zsc_offlined == B_TRUE) {
1343*efd4c9b6SSteve Lawrence 				/*
1344*efd4c9b6SSteve Lawrence 				 * cpu offlined in prior interval. It is gone.
1345*efd4c9b6SSteve Lawrence 				 */
1346*efd4c9b6SSteve Lawrence 				tmp = cpu;
1347*efd4c9b6SSteve Lawrence 				cpu = list_next(&ctl->zsctl_cpus, cpu);
1348*efd4c9b6SSteve Lawrence 				list_remove(&ctl->zsctl_cpus, tmp);
1349*efd4c9b6SSteve Lawrence 				/* Clear structure for future use */
1350*efd4c9b6SSteve Lawrence 				id = tmp->zsc_id;
1351*efd4c9b6SSteve Lawrence 				bzero(tmp, sizeof (zsd_cpu_t));
1352*efd4c9b6SSteve Lawrence 				tmp->zsc_id = id;
1353*efd4c9b6SSteve Lawrence 				tmp->zsc_allocated = B_FALSE;
1354*efd4c9b6SSteve Lawrence 				tmp->zsc_psetid = ZS_PSET_ERROR;
1355*efd4c9b6SSteve Lawrence 				tmp->zsc_psetid_prev = ZS_PSET_ERROR;
1356*efd4c9b6SSteve Lawrence 
1357*efd4c9b6SSteve Lawrence 			} else {
1358*efd4c9b6SSteve Lawrence 				/*
1359*efd4c9b6SSteve Lawrence 				 * cpu online at start of interval.  Treat
1360*efd4c9b6SSteve Lawrence 				 * as still online, since it was online for
1361*efd4c9b6SSteve Lawrence 				 * some portion of the interval.
1362*efd4c9b6SSteve Lawrence 				 */
1363*efd4c9b6SSteve Lawrence 				cpu->zsc_offlined = B_TRUE;
1364*efd4c9b6SSteve Lawrence 				cpu->zsc_onlined = B_FALSE;
1365*efd4c9b6SSteve Lawrence 				cpu->zsc_active = B_TRUE;
1366*efd4c9b6SSteve Lawrence 				cpu->zsc_psetid = cpu->zsc_psetid_prev;
1367*efd4c9b6SSteve Lawrence 				cpu->zsc_pset = NULL;
1368*efd4c9b6SSteve Lawrence 			}
1369*efd4c9b6SSteve Lawrence 		}
1370*efd4c9b6SSteve Lawrence 		cpu = list_next(&ctl->zsctl_cpus, cpu);
1371*efd4c9b6SSteve Lawrence 	}
1372*efd4c9b6SSteve Lawrence }
1373*efd4c9b6SSteve Lawrence 
1374*efd4c9b6SSteve Lawrence /* Some utility functions for managing the list of processor sets */
1375*efd4c9b6SSteve Lawrence static zsd_pset_t *
1376*efd4c9b6SSteve Lawrence zsd_lookup_pset_byid(zsd_ctl_t *ctl, psetid_t psetid)
1377*efd4c9b6SSteve Lawrence {
1378*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset;
1379*efd4c9b6SSteve Lawrence 
1380*efd4c9b6SSteve Lawrence 	for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1381*efd4c9b6SSteve Lawrence 	    pset = list_next(&ctl->zsctl_psets, pset)) {
1382*efd4c9b6SSteve Lawrence 		if (pset->zsp_id == psetid)
1383*efd4c9b6SSteve Lawrence 			return (pset);
1384*efd4c9b6SSteve Lawrence 	}
1385*efd4c9b6SSteve Lawrence 	return (NULL);
1386*efd4c9b6SSteve Lawrence }
1387*efd4c9b6SSteve Lawrence 
1388*efd4c9b6SSteve Lawrence static zsd_pset_t *
1389*efd4c9b6SSteve Lawrence zsd_lookup_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1390*efd4c9b6SSteve Lawrence {
1391*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset;
1392*efd4c9b6SSteve Lawrence 
1393*efd4c9b6SSteve Lawrence 	for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1394*efd4c9b6SSteve Lawrence 	    pset = list_next(&ctl->zsctl_psets, pset)) {
1395*efd4c9b6SSteve Lawrence 		if (strcmp(pset->zsp_name, psetname) == 0) {
1396*efd4c9b6SSteve Lawrence 			if (psetid != -1)
1397*efd4c9b6SSteve Lawrence 				pset->zsp_id = psetid;
1398*efd4c9b6SSteve Lawrence 			return (pset);
1399*efd4c9b6SSteve Lawrence 		}
1400*efd4c9b6SSteve Lawrence 	}
1401*efd4c9b6SSteve Lawrence 	return (NULL);
1402*efd4c9b6SSteve Lawrence }
1403*efd4c9b6SSteve Lawrence 
1404*efd4c9b6SSteve Lawrence static zsd_pset_t *
1405*efd4c9b6SSteve Lawrence zsd_allocate_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1406*efd4c9b6SSteve Lawrence {
1407*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset;
1408*efd4c9b6SSteve Lawrence 
1409*efd4c9b6SSteve Lawrence 	if ((pset = (zsd_pset_t *)calloc(1, sizeof (zsd_pset_t))) == NULL)
1410*efd4c9b6SSteve Lawrence 		return (NULL);
1411*efd4c9b6SSteve Lawrence 
1412*efd4c9b6SSteve Lawrence 	(void) strlcpy(pset->zsp_name, psetname, sizeof (pset->zsp_name));
1413*efd4c9b6SSteve Lawrence 	pset->zsp_id = psetid;
1414*efd4c9b6SSteve Lawrence 	pset->zsp_found = B_FALSE;
1415*efd4c9b6SSteve Lawrence 	/*
1416*efd4c9b6SSteve Lawrence 	 * Allocate as deleted so if not found in first pass, pset is deleted
1417*efd4c9b6SSteve Lawrence 	 * from list.  This can happen if pset is returned by pset_list, but
1418*efd4c9b6SSteve Lawrence 	 * is destroyed before first attempt to fetch pset details.
1419*efd4c9b6SSteve Lawrence 	 */
1420*efd4c9b6SSteve Lawrence 	list_create(&pset->zsp_usage_list, sizeof (zsd_pset_usage_t),
1421*efd4c9b6SSteve Lawrence 	    offsetof(zsd_pset_usage_t, zsu_next));
1422*efd4c9b6SSteve Lawrence 
1423*efd4c9b6SSteve Lawrence 	pset->zsp_hrstart = g_hrnow;
1424*efd4c9b6SSteve Lawrence 	pset->zsp_deleted = B_TRUE;
1425*efd4c9b6SSteve Lawrence 	pset->zsp_empty = B_TRUE;
1426*efd4c9b6SSteve Lawrence 	ctl->zsctl_npsets++;
1427*efd4c9b6SSteve Lawrence 
1428*efd4c9b6SSteve Lawrence 	return (pset);
1429*efd4c9b6SSteve Lawrence }
1430*efd4c9b6SSteve Lawrence 
1431*efd4c9b6SSteve Lawrence static zsd_pset_t *
1432*efd4c9b6SSteve Lawrence zsd_lookup_insert_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1433*efd4c9b6SSteve Lawrence {
1434*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset, *tmp;
1435*efd4c9b6SSteve Lawrence 
1436*efd4c9b6SSteve Lawrence 	if ((pset = zsd_lookup_pset(ctl, psetname, psetid)) != NULL)
1437*efd4c9b6SSteve Lawrence 		return (pset);
1438*efd4c9b6SSteve Lawrence 
1439*efd4c9b6SSteve Lawrence 	if ((pset = zsd_allocate_pset(ctl, psetname, psetid)) == NULL)
1440*efd4c9b6SSteve Lawrence 		return (NULL);
1441*efd4c9b6SSteve Lawrence 
1442*efd4c9b6SSteve Lawrence 	/* Insert sorted by psetname */
1443*efd4c9b6SSteve Lawrence 	tmp = list_head(&ctl->zsctl_psets);
1444*efd4c9b6SSteve Lawrence 	while (tmp != NULL && strcmp(psetname, tmp->zsp_name) > 0)
1445*efd4c9b6SSteve Lawrence 		tmp = list_next(&ctl->zsctl_psets, tmp);
1446*efd4c9b6SSteve Lawrence 
1447*efd4c9b6SSteve Lawrence 	list_insert_before(&ctl->zsctl_psets, tmp, pset);
1448*efd4c9b6SSteve Lawrence 	return (pset);
1449*efd4c9b6SSteve Lawrence }
1450*efd4c9b6SSteve Lawrence 
1451*efd4c9b6SSteve Lawrence /* Some utility functions for managing the list of zones using each pset */
1452*efd4c9b6SSteve Lawrence static zsd_pset_usage_t *
1453*efd4c9b6SSteve Lawrence zsd_lookup_usage(zsd_pset_t *pset, zsd_zone_t *zone)
1454*efd4c9b6SSteve Lawrence {
1455*efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *usage;
1456*efd4c9b6SSteve Lawrence 
1457*efd4c9b6SSteve Lawrence 	for (usage = list_head(&pset->zsp_usage_list); usage != NULL;
1458*efd4c9b6SSteve Lawrence 	    usage = list_next(&pset->zsp_usage_list, usage))
1459*efd4c9b6SSteve Lawrence 		if (usage->zsu_zone == zone)
1460*efd4c9b6SSteve Lawrence 			return (usage);
1461*efd4c9b6SSteve Lawrence 
1462*efd4c9b6SSteve Lawrence 	return (NULL);
1463*efd4c9b6SSteve Lawrence }
1464*efd4c9b6SSteve Lawrence 
1465*efd4c9b6SSteve Lawrence static zsd_pset_usage_t *
1466*efd4c9b6SSteve Lawrence zsd_allocate_pset_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone)
1467*efd4c9b6SSteve Lawrence {
1468*efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *usage;
1469*efd4c9b6SSteve Lawrence 
1470*efd4c9b6SSteve Lawrence 	if ((usage = (zsd_pset_usage_t *)calloc(1, sizeof (zsd_pset_usage_t)))
1471*efd4c9b6SSteve Lawrence 	    == NULL)
1472*efd4c9b6SSteve Lawrence 		return (NULL);
1473*efd4c9b6SSteve Lawrence 
1474*efd4c9b6SSteve Lawrence 	list_link_init(&usage->zsu_next);
1475*efd4c9b6SSteve Lawrence 	usage->zsu_zone = zone;
1476*efd4c9b6SSteve Lawrence 	usage->zsu_zoneid = zone->zsz_id;
1477*efd4c9b6SSteve Lawrence 	usage->zsu_pset = pset;
1478*efd4c9b6SSteve Lawrence 	usage->zsu_found = B_FALSE;
1479*efd4c9b6SSteve Lawrence 	usage->zsu_active = B_FALSE;
1480*efd4c9b6SSteve Lawrence 	usage->zsu_new = B_FALSE;
1481*efd4c9b6SSteve Lawrence 	/*
1482*efd4c9b6SSteve Lawrence 	 * Allocate as not deleted.  If a process is found in a pset for
1483*efd4c9b6SSteve Lawrence 	 * a zone, the usage will not be deleted until at least the next
1484*efd4c9b6SSteve Lawrence 	 * interval.
1485*efd4c9b6SSteve Lawrence 	 */
1486*efd4c9b6SSteve Lawrence 	usage->zsu_start = g_now;
1487*efd4c9b6SSteve Lawrence 	usage->zsu_hrstart = g_hrnow;
1488*efd4c9b6SSteve Lawrence 	usage->zsu_deleted = B_FALSE;
1489*efd4c9b6SSteve Lawrence 	usage->zsu_empty = B_TRUE;
1490*efd4c9b6SSteve Lawrence 	usage->zsu_scheds = 0;
1491*efd4c9b6SSteve Lawrence 	usage->zsu_cpu_shares = ZS_LIMIT_NONE;
1492*efd4c9b6SSteve Lawrence 
1493*efd4c9b6SSteve Lawrence 	ctl->zsctl_npset_usages++;
1494*efd4c9b6SSteve Lawrence 	pset->zsp_nusage++;
1495*efd4c9b6SSteve Lawrence 
1496*efd4c9b6SSteve Lawrence 	return (usage);
1497*efd4c9b6SSteve Lawrence }
1498*efd4c9b6SSteve Lawrence 
1499*efd4c9b6SSteve Lawrence static zsd_pset_usage_t *
1500*efd4c9b6SSteve Lawrence zsd_lookup_insert_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone)
1501*efd4c9b6SSteve Lawrence {
1502*efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *usage, *tmp;
1503*efd4c9b6SSteve Lawrence 
1504*efd4c9b6SSteve Lawrence 	if ((usage = zsd_lookup_usage(pset, zone))
1505*efd4c9b6SSteve Lawrence 	    != NULL)
1506*efd4c9b6SSteve Lawrence 		return (usage);
1507*efd4c9b6SSteve Lawrence 
1508*efd4c9b6SSteve Lawrence 	if ((usage = zsd_allocate_pset_usage(ctl, pset, zone)) == NULL)
1509*efd4c9b6SSteve Lawrence 		return (NULL);
1510*efd4c9b6SSteve Lawrence 
1511*efd4c9b6SSteve Lawrence 	tmp = list_head(&pset->zsp_usage_list);
1512*efd4c9b6SSteve Lawrence 	while (tmp != NULL && strcmp(zone->zsz_name, tmp->zsu_zone->zsz_name)
1513*efd4c9b6SSteve Lawrence 	    > 0)
1514*efd4c9b6SSteve Lawrence 		tmp = list_next(&pset->zsp_usage_list, tmp);
1515*efd4c9b6SSteve Lawrence 
1516*efd4c9b6SSteve Lawrence 	list_insert_before(&pset->zsp_usage_list, tmp, usage);
1517*efd4c9b6SSteve Lawrence 	return (usage);
1518*efd4c9b6SSteve Lawrence }
1519*efd4c9b6SSteve Lawrence 
1520*efd4c9b6SSteve Lawrence static void
1521*efd4c9b6SSteve Lawrence zsd_refresh_system(zsd_ctl_t *ctl)
1522*efd4c9b6SSteve Lawrence {
1523*efd4c9b6SSteve Lawrence 	zsd_system_t *system = ctl->zsctl_system;
1524*efd4c9b6SSteve Lawrence 
1525*efd4c9b6SSteve Lawrence 	/* Re-count these values each interval */
1526*efd4c9b6SSteve Lawrence 	system->zss_processes = 0;
1527*efd4c9b6SSteve Lawrence 	system->zss_lwps = 0;
1528*efd4c9b6SSteve Lawrence 	system->zss_shm = 0;
1529*efd4c9b6SSteve Lawrence 	system->zss_shmids = 0;
1530*efd4c9b6SSteve Lawrence 	system->zss_semids = 0;
1531*efd4c9b6SSteve Lawrence 	system->zss_msgids = 0;
1532*efd4c9b6SSteve Lawrence 	system->zss_lofi = 0;
1533*efd4c9b6SSteve Lawrence }
1534*efd4c9b6SSteve Lawrence 
1535*efd4c9b6SSteve Lawrence 
1536*efd4c9b6SSteve Lawrence /* Reads each cpu's kstats, and adds the usage to the cpu's pset */
1537*efd4c9b6SSteve Lawrence static void
1538*efd4c9b6SSteve Lawrence zsd_update_cpu_stats(zsd_ctl_t *ctl, zsd_cpu_t *cpu)
1539*efd4c9b6SSteve Lawrence {
1540*efd4c9b6SSteve Lawrence 	zsd_system_t *sys;
1541*efd4c9b6SSteve Lawrence 	processorid_t cpuid;
1542*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset_prev;
1543*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset;
1544*efd4c9b6SSteve Lawrence 	kstat_t *kstat;
1545*efd4c9b6SSteve Lawrence 	kstat_named_t *knp;
1546*efd4c9b6SSteve Lawrence 	kid_t kid;
1547*efd4c9b6SSteve Lawrence 	uint64_t idle, intr, kern, user;
1548*efd4c9b6SSteve Lawrence 
1549*efd4c9b6SSteve Lawrence 	sys = ctl->zsctl_system;
1550*efd4c9b6SSteve Lawrence 	pset = cpu->zsc_pset;
1551*efd4c9b6SSteve Lawrence 	knp = NULL;
1552*efd4c9b6SSteve Lawrence 	kid = -1;
1553*efd4c9b6SSteve Lawrence 	cpuid = cpu->zsc_id;
1554*efd4c9b6SSteve Lawrence 
1555*efd4c9b6SSteve Lawrence 	/* Get the cpu time totals for this cpu */
1556*efd4c9b6SSteve Lawrence 	kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "cpu", cpuid, "sys");
1557*efd4c9b6SSteve Lawrence 	if (kstat == NULL)
1558*efd4c9b6SSteve Lawrence 		return;
1559*efd4c9b6SSteve Lawrence 
1560*efd4c9b6SSteve Lawrence 	kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
1561*efd4c9b6SSteve Lawrence 	if (kid == -1)
1562*efd4c9b6SSteve Lawrence 		return;
1563*efd4c9b6SSteve Lawrence 
1564*efd4c9b6SSteve Lawrence 	knp = kstat_data_lookup(kstat, "cpu_nsec_idle");
1565*efd4c9b6SSteve Lawrence 	if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1566*efd4c9b6SSteve Lawrence 		return;
1567*efd4c9b6SSteve Lawrence 
1568*efd4c9b6SSteve Lawrence 	idle = knp->value.ui64;
1569*efd4c9b6SSteve Lawrence 
1570*efd4c9b6SSteve Lawrence 	knp = kstat_data_lookup(kstat, "cpu_nsec_kernel");
1571*efd4c9b6SSteve Lawrence 	if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1572*efd4c9b6SSteve Lawrence 		return;
1573*efd4c9b6SSteve Lawrence 
1574*efd4c9b6SSteve Lawrence 	kern = knp->value.ui64;
1575*efd4c9b6SSteve Lawrence 
1576*efd4c9b6SSteve Lawrence 	knp = kstat_data_lookup(kstat, "cpu_nsec_user");
1577*efd4c9b6SSteve Lawrence 	if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1578*efd4c9b6SSteve Lawrence 		return;
1579*efd4c9b6SSteve Lawrence 
1580*efd4c9b6SSteve Lawrence 	user = knp->value.ui64;
1581*efd4c9b6SSteve Lawrence 
1582*efd4c9b6SSteve Lawrence 	/*
1583*efd4c9b6SSteve Lawrence 	 * Tracking intr time per cpu just exists for future enhancements.
1584*efd4c9b6SSteve Lawrence 	 * The value is presently always zero.
1585*efd4c9b6SSteve Lawrence 	 */
1586*efd4c9b6SSteve Lawrence 	intr = 0;
1587*efd4c9b6SSteve Lawrence 	cpu->zsc_nsec_idle = idle;
1588*efd4c9b6SSteve Lawrence 	cpu->zsc_nsec_intr = intr;
1589*efd4c9b6SSteve Lawrence 	cpu->zsc_nsec_kern = kern;
1590*efd4c9b6SSteve Lawrence 	cpu->zsc_nsec_user = user;
1591*efd4c9b6SSteve Lawrence 
1592*efd4c9b6SSteve Lawrence 	if (cpu->zsc_onlined == B_TRUE) {
1593*efd4c9b6SSteve Lawrence 		/*
1594*efd4c9b6SSteve Lawrence 		 * cpu is newly online.  There is no reference value,
1595*efd4c9b6SSteve Lawrence 		 * so just record its current stats for comparison
1596*efd4c9b6SSteve Lawrence 		 * on next stat read.
1597*efd4c9b6SSteve Lawrence 		 */
1598*efd4c9b6SSteve Lawrence 		cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle;
1599*efd4c9b6SSteve Lawrence 		cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr;
1600*efd4c9b6SSteve Lawrence 		cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern;
1601*efd4c9b6SSteve Lawrence 		cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user;
1602*efd4c9b6SSteve Lawrence 		return;
1603*efd4c9b6SSteve Lawrence 	}
1604*efd4c9b6SSteve Lawrence 
1605*efd4c9b6SSteve Lawrence 	/*
1606*efd4c9b6SSteve Lawrence 	 * Calculate relative time since previous refresh.
1607*efd4c9b6SSteve Lawrence 	 * Paranoia.  Don't let time  go backwards.
1608*efd4c9b6SSteve Lawrence 	 */
1609*efd4c9b6SSteve Lawrence 	idle = intr = kern = user = 0;
1610*efd4c9b6SSteve Lawrence 	if (cpu->zsc_nsec_idle > cpu->zsc_nsec_idle_prev)
1611*efd4c9b6SSteve Lawrence 		idle = cpu->zsc_nsec_idle - cpu->zsc_nsec_idle_prev;
1612*efd4c9b6SSteve Lawrence 
1613*efd4c9b6SSteve Lawrence 	if (cpu->zsc_nsec_intr > cpu->zsc_nsec_intr_prev)
1614*efd4c9b6SSteve Lawrence 		intr = cpu->zsc_nsec_intr - cpu->zsc_nsec_intr_prev;
1615*efd4c9b6SSteve Lawrence 
1616*efd4c9b6SSteve Lawrence 	if (cpu->zsc_nsec_kern > cpu->zsc_nsec_kern_prev)
1617*efd4c9b6SSteve Lawrence 		kern = cpu->zsc_nsec_kern - cpu->zsc_nsec_kern_prev;
1618*efd4c9b6SSteve Lawrence 
1619*efd4c9b6SSteve Lawrence 	if (cpu->zsc_nsec_user > cpu->zsc_nsec_user_prev)
1620*efd4c9b6SSteve Lawrence 		user = cpu->zsc_nsec_user - cpu->zsc_nsec_user_prev;
1621*efd4c9b6SSteve Lawrence 
1622*efd4c9b6SSteve Lawrence 	/* Update totals for cpu usage */
1623*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_NANOSEC(cpu->zsc_idle, idle);
1624*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_NANOSEC(cpu->zsc_intr, intr);
1625*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_NANOSEC(cpu->zsc_kern, kern);
1626*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_NANOSEC(cpu->zsc_user, user);
1627*efd4c9b6SSteve Lawrence 
1628*efd4c9b6SSteve Lawrence 	/*
1629*efd4c9b6SSteve Lawrence 	 * Add cpu's stats to its pset if it is known to be in
1630*efd4c9b6SSteve Lawrence 	 * the pset since previous read.
1631*efd4c9b6SSteve Lawrence 	 */
1632*efd4c9b6SSteve Lawrence 	if (cpu->zsc_psetid == cpu->zsc_psetid_prev ||
1633*efd4c9b6SSteve Lawrence 	    cpu->zsc_psetid_prev == ZS_PSET_ERROR ||
1634*efd4c9b6SSteve Lawrence 	    (pset_prev = zsd_lookup_pset_byid(ctl,
1635*efd4c9b6SSteve Lawrence 	    cpu->zsc_psetid_prev)) == NULL) {
1636*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset->zsp_idle, idle);
1637*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset->zsp_intr, intr);
1638*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset->zsp_kern, kern);
1639*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset->zsp_user, user);
1640*efd4c9b6SSteve Lawrence 	} else {
1641*efd4c9b6SSteve Lawrence 		/*
1642*efd4c9b6SSteve Lawrence 		 * Last pset was different than current pset.
1643*efd4c9b6SSteve Lawrence 		 * Best guess is to split usage between the two.
1644*efd4c9b6SSteve Lawrence 		 */
1645*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_idle, idle / 2);
1646*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_intr, intr / 2);
1647*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_kern, kern / 2);
1648*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_user, user / 2);
1649*efd4c9b6SSteve Lawrence 
1650*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset->zsp_idle,
1651*efd4c9b6SSteve Lawrence 		    (idle / 2) + (idle % 2));
1652*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset->zsp_intr,
1653*efd4c9b6SSteve Lawrence 		    (intr / 2) + (intr % 2));
1654*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset->zsp_kern,
1655*efd4c9b6SSteve Lawrence 		    (kern / 2) + (kern % 2));
1656*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_NANOSEC(pset->zsp_user,
1657*efd4c9b6SSteve Lawrence 		    (user / 2) + (user % 2));
1658*efd4c9b6SSteve Lawrence 	}
1659*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_NANOSEC(sys->zss_idle, idle);
1660*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_NANOSEC(sys->zss_intr, intr);
1661*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_NANOSEC(sys->zss_kern, kern);
1662*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_NANOSEC(sys->zss_user, user);
1663*efd4c9b6SSteve Lawrence }
1664*efd4c9b6SSteve Lawrence 
1665*efd4c9b6SSteve Lawrence /* Determine the details of a processor set by pset_id */
1666*efd4c9b6SSteve Lawrence static int
1667*efd4c9b6SSteve Lawrence zsd_get_pool_pset(zsd_ctl_t *ctl, psetid_t psetid, char *psetname,
1668*efd4c9b6SSteve Lawrence     size_t namelen, uint_t *cputype, uint64_t *online, uint64_t *size,
1669*efd4c9b6SSteve Lawrence     uint64_t *min, uint64_t *max, int64_t *importance)
1670*efd4c9b6SSteve Lawrence {
1671*efd4c9b6SSteve Lawrence 	uint_t old, num;
1672*efd4c9b6SSteve Lawrence 
1673*efd4c9b6SSteve Lawrence 	pool_conf_t *conf = ctl->zsctl_pool_conf;
1674*efd4c9b6SSteve Lawrence 	pool_value_t **vals = ctl->zsctl_pool_vals;
1675*efd4c9b6SSteve Lawrence 	pool_resource_t **res_list = NULL;
1676*efd4c9b6SSteve Lawrence 	pool_resource_t *pset;
1677*efd4c9b6SSteve Lawrence 	pool_component_t **cpus = NULL;
1678*efd4c9b6SSteve Lawrence 	processorid_t *cache;
1679*efd4c9b6SSteve Lawrence 	const char *string;
1680*efd4c9b6SSteve Lawrence 	uint64_t uint64;
1681*efd4c9b6SSteve Lawrence 	int64_t int64;
1682*efd4c9b6SSteve Lawrence 	int i, ret, type;
1683*efd4c9b6SSteve Lawrence 
1684*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_pool_status == POOL_DISABLED) {
1685*efd4c9b6SSteve Lawrence 
1686*efd4c9b6SSteve Lawrence 		/*
1687*efd4c9b6SSteve Lawrence 		 * Inspect legacy psets
1688*efd4c9b6SSteve Lawrence 		 */
1689*efd4c9b6SSteve Lawrence 		for (;;) {
1690*efd4c9b6SSteve Lawrence 			old = num = ctl->zsctl_cpu_ncache;
1691*efd4c9b6SSteve Lawrence 			ret = pset_info(psetid, &type, &num,
1692*efd4c9b6SSteve Lawrence 			    ctl->zsctl_cpu_cache);
1693*efd4c9b6SSteve Lawrence 			if (ret < 0) {
1694*efd4c9b6SSteve Lawrence 				/* pset is gone.  Tell caller to retry */
1695*efd4c9b6SSteve Lawrence 				errno = EINTR;
1696*efd4c9b6SSteve Lawrence 				return (-1);
1697*efd4c9b6SSteve Lawrence 			}
1698*efd4c9b6SSteve Lawrence 			if (num <= old) {
1699*efd4c9b6SSteve Lawrence 			/* Success */
1700*efd4c9b6SSteve Lawrence 				break;
1701*efd4c9b6SSteve Lawrence 			}
1702*efd4c9b6SSteve Lawrence 			if ((cache = (processorid_t *)realloc(
1703*efd4c9b6SSteve Lawrence 			    ctl->zsctl_cpu_cache, num *
1704*efd4c9b6SSteve Lawrence 			    sizeof (processorid_t))) != NULL) {
1705*efd4c9b6SSteve Lawrence 				ctl->zsctl_cpu_ncache = num;
1706*efd4c9b6SSteve Lawrence 				ctl->zsctl_cpu_cache = cache;
1707*efd4c9b6SSteve Lawrence 			} else {
1708*efd4c9b6SSteve Lawrence 				/*
1709*efd4c9b6SSteve Lawrence 				 * Could not allocate to get new cpu list.
1710*efd4c9b6SSteve Lawrence 				 */
1711*efd4c9b6SSteve Lawrence 				zsd_warn(gettext(
1712*efd4c9b6SSteve Lawrence 				    "Could not allocate for cpu list"));
1713*efd4c9b6SSteve Lawrence 				errno = ENOMEM;
1714*efd4c9b6SSteve Lawrence 				return (-1);
1715*efd4c9b6SSteve Lawrence 			}
1716*efd4c9b6SSteve Lawrence 		}
1717*efd4c9b6SSteve Lawrence 		/*
1718*efd4c9b6SSteve Lawrence 		 * Old school pset.  Just make min and max equal
1719*efd4c9b6SSteve Lawrence 		 * to its size
1720*efd4c9b6SSteve Lawrence 		 */
1721*efd4c9b6SSteve Lawrence 		if (psetid == ZS_PSET_DEFAULT) {
1722*efd4c9b6SSteve Lawrence 			*cputype = ZS_CPUTYPE_DEFAULT_PSET;
1723*efd4c9b6SSteve Lawrence 			(void) strlcpy(psetname, "pset_default", namelen);
1724*efd4c9b6SSteve Lawrence 		} else {
1725*efd4c9b6SSteve Lawrence 			*cputype = ZS_CPUTYPE_PSRSET_PSET;
1726*efd4c9b6SSteve Lawrence 			(void) snprintf(psetname, namelen,
1727*efd4c9b6SSteve Lawrence 			    "SUNWlegacy_pset_%d", psetid);
1728*efd4c9b6SSteve Lawrence 		}
1729*efd4c9b6SSteve Lawrence 
1730*efd4c9b6SSteve Lawrence 		/*
1731*efd4c9b6SSteve Lawrence 		 * Just treat legacy pset as a simple pool pset
1732*efd4c9b6SSteve Lawrence 		 */
1733*efd4c9b6SSteve Lawrence 		*online = num;
1734*efd4c9b6SSteve Lawrence 		*size = num;
1735*efd4c9b6SSteve Lawrence 		*min = num;
1736*efd4c9b6SSteve Lawrence 		*max = num;
1737*efd4c9b6SSteve Lawrence 		*importance = 1;
1738*efd4c9b6SSteve Lawrence 
1739*efd4c9b6SSteve Lawrence 		return (0);
1740*efd4c9b6SSteve Lawrence 	}
1741*efd4c9b6SSteve Lawrence 
1742*efd4c9b6SSteve Lawrence 	/* Look up the pool pset using the pset id */
1743*efd4c9b6SSteve Lawrence 	res_list = NULL;
1744*efd4c9b6SSteve Lawrence 	pool_value_set_int64(vals[1], psetid);
1745*efd4c9b6SSteve Lawrence 	if (pool_value_set_name(vals[1], "pset.sys_id")
1746*efd4c9b6SSteve Lawrence 	    != PO_SUCCESS)
1747*efd4c9b6SSteve Lawrence 		goto err;
1748*efd4c9b6SSteve Lawrence 
1749*efd4c9b6SSteve Lawrence 	if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
1750*efd4c9b6SSteve Lawrence 		goto err;
1751*efd4c9b6SSteve Lawrence 	if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
1752*efd4c9b6SSteve Lawrence 		goto err;
1753*efd4c9b6SSteve Lawrence 	if ((res_list = pool_query_resources(conf, &num, vals)) == NULL)
1754*efd4c9b6SSteve Lawrence 		goto err;
1755*efd4c9b6SSteve Lawrence 	if (num != 1)
1756*efd4c9b6SSteve Lawrence 		goto err;
1757*efd4c9b6SSteve Lawrence 	pset = res_list[0];
1758*efd4c9b6SSteve Lawrence 	free(res_list);
1759*efd4c9b6SSteve Lawrence 	res_list = NULL;
1760*efd4c9b6SSteve Lawrence 	if (pool_get_property(conf, pool_resource_to_elem(conf, pset),
1761*efd4c9b6SSteve Lawrence 	    "pset.name", vals[0]) != POC_STRING ||
1762*efd4c9b6SSteve Lawrence 	    pool_value_get_string(vals[0], &string) != PO_SUCCESS)
1763*efd4c9b6SSteve Lawrence 		goto err;
1764*efd4c9b6SSteve Lawrence 
1765*efd4c9b6SSteve Lawrence 	(void) strlcpy(psetname, string, namelen);
1766*efd4c9b6SSteve Lawrence 	if (strncmp(psetname, "SUNWtmp", strlen("SUNWtmp")) == 0)
1767*efd4c9b6SSteve Lawrence 		*cputype = ZS_CPUTYPE_DEDICATED;
1768*efd4c9b6SSteve Lawrence 	else if (psetid == ZS_PSET_DEFAULT)
1769*efd4c9b6SSteve Lawrence 		*cputype = ZS_CPUTYPE_DEFAULT_PSET;
1770*efd4c9b6SSteve Lawrence 	else
1771*efd4c9b6SSteve Lawrence 		*cputype = ZS_CPUTYPE_POOL_PSET;
1772*efd4c9b6SSteve Lawrence 
1773*efd4c9b6SSteve Lawrence 	/* Get size, min, max, and importance */
1774*efd4c9b6SSteve Lawrence 	if (pool_get_property(conf, pool_resource_to_elem(conf,
1775*efd4c9b6SSteve Lawrence 	    pset), "pset.size", vals[0]) == POC_UINT &&
1776*efd4c9b6SSteve Lawrence 	    pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1777*efd4c9b6SSteve Lawrence 		*size = uint64;
1778*efd4c9b6SSteve Lawrence 	else
1779*efd4c9b6SSteve Lawrence 		*size = 0;
1780*efd4c9b6SSteve Lawrence 
1781*efd4c9b6SSteve Lawrence 		/* Get size, min, max, and importance */
1782*efd4c9b6SSteve Lawrence 	if (pool_get_property(conf, pool_resource_to_elem(conf,
1783*efd4c9b6SSteve Lawrence 	    pset), "pset.min", vals[0]) == POC_UINT &&
1784*efd4c9b6SSteve Lawrence 	    pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1785*efd4c9b6SSteve Lawrence 		*min = uint64;
1786*efd4c9b6SSteve Lawrence 	else
1787*efd4c9b6SSteve Lawrence 		*min = 0;
1788*efd4c9b6SSteve Lawrence 	if (*min >= ZSD_PSET_UNLIMITED)
1789*efd4c9b6SSteve Lawrence 		*min = ZS_LIMIT_NONE;
1790*efd4c9b6SSteve Lawrence 
1791*efd4c9b6SSteve Lawrence 	if (pool_get_property(conf, pool_resource_to_elem(conf,
1792*efd4c9b6SSteve Lawrence 	    pset), "pset.max", vals[0]) == POC_UINT &&
1793*efd4c9b6SSteve Lawrence 	    pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1794*efd4c9b6SSteve Lawrence 		*max = uint64;
1795*efd4c9b6SSteve Lawrence 	else
1796*efd4c9b6SSteve Lawrence 		*max = ZS_LIMIT_NONE;
1797*efd4c9b6SSteve Lawrence 
1798*efd4c9b6SSteve Lawrence 	if (*max >= ZSD_PSET_UNLIMITED)
1799*efd4c9b6SSteve Lawrence 		*max = ZS_LIMIT_NONE;
1800*efd4c9b6SSteve Lawrence 
1801*efd4c9b6SSteve Lawrence 	if (pool_get_property(conf, pool_resource_to_elem(conf,
1802*efd4c9b6SSteve Lawrence 	    pset), "pset.importance", vals[0]) == POC_INT &&
1803*efd4c9b6SSteve Lawrence 	    pool_value_get_int64(vals[0], &int64) == PO_SUCCESS)
1804*efd4c9b6SSteve Lawrence 		*importance = int64;
1805*efd4c9b6SSteve Lawrence 	else
1806*efd4c9b6SSteve Lawrence 		*importance = (uint64_t)1;
1807*efd4c9b6SSteve Lawrence 
1808*efd4c9b6SSteve Lawrence 	*online = 0;
1809*efd4c9b6SSteve Lawrence 	if (*size == 0)
1810*efd4c9b6SSteve Lawrence 		return (0);
1811*efd4c9b6SSteve Lawrence 
1812*efd4c9b6SSteve Lawrence 	/* get cpus */
1813*efd4c9b6SSteve Lawrence 	cpus = pool_query_resource_components(conf, pset, &num, NULL);
1814*efd4c9b6SSteve Lawrence 	if (cpus == NULL)
1815*efd4c9b6SSteve Lawrence 		goto err;
1816*efd4c9b6SSteve Lawrence 
1817*efd4c9b6SSteve Lawrence 	/* Make sure there is space for cpu id list */
1818*efd4c9b6SSteve Lawrence 	if (num > ctl->zsctl_cpu_ncache) {
1819*efd4c9b6SSteve Lawrence 		if ((cache = (processorid_t *)realloc(
1820*efd4c9b6SSteve Lawrence 		    ctl->zsctl_cpu_cache, num *
1821*efd4c9b6SSteve Lawrence 		    sizeof (processorid_t))) != NULL) {
1822*efd4c9b6SSteve Lawrence 			ctl->zsctl_cpu_ncache = num;
1823*efd4c9b6SSteve Lawrence 			ctl->zsctl_cpu_cache = cache;
1824*efd4c9b6SSteve Lawrence 		} else {
1825*efd4c9b6SSteve Lawrence 			/*
1826*efd4c9b6SSteve Lawrence 			 * Could not allocate to get new cpu list.
1827*efd4c9b6SSteve Lawrence 			 */
1828*efd4c9b6SSteve Lawrence 			zsd_warn(gettext(
1829*efd4c9b6SSteve Lawrence 			    "Could not allocate for cpu list"));
1830*efd4c9b6SSteve Lawrence 			goto err;
1831*efd4c9b6SSteve Lawrence 		}
1832*efd4c9b6SSteve Lawrence 	}
1833*efd4c9b6SSteve Lawrence 
1834*efd4c9b6SSteve Lawrence 	/* count the online cpus */
1835*efd4c9b6SSteve Lawrence 	for (i = 0; i < num; i++) {
1836*efd4c9b6SSteve Lawrence 		if (pool_get_property(conf, pool_component_to_elem(
1837*efd4c9b6SSteve Lawrence 		    conf, cpus[i]), "cpu.status", vals[0]) != POC_STRING ||
1838*efd4c9b6SSteve Lawrence 		    pool_value_get_string(vals[0], &string) != PO_SUCCESS)
1839*efd4c9b6SSteve Lawrence 			goto err;
1840*efd4c9b6SSteve Lawrence 
1841*efd4c9b6SSteve Lawrence 		if (strcmp(string, "on-line") != 0 &&
1842*efd4c9b6SSteve Lawrence 		    strcmp(string, "no-intr") != 0)
1843*efd4c9b6SSteve Lawrence 			continue;
1844*efd4c9b6SSteve Lawrence 
1845*efd4c9b6SSteve Lawrence 		if (pool_get_property(conf, pool_component_to_elem(
1846*efd4c9b6SSteve Lawrence 		    conf, cpus[i]), "cpu.sys_id", vals[0]) != POC_INT ||
1847*efd4c9b6SSteve Lawrence 		    pool_value_get_int64(vals[0], &int64) != PO_SUCCESS)
1848*efd4c9b6SSteve Lawrence 			goto err;
1849*efd4c9b6SSteve Lawrence 
1850*efd4c9b6SSteve Lawrence 		(*online)++;
1851*efd4c9b6SSteve Lawrence 		ctl->zsctl_cpu_cache[i] = (psetid_t)int64;
1852*efd4c9b6SSteve Lawrence 	}
1853*efd4c9b6SSteve Lawrence 	free(cpus);
1854*efd4c9b6SSteve Lawrence 	return (0);
1855*efd4c9b6SSteve Lawrence err:
1856*efd4c9b6SSteve Lawrence 	if (res_list != NULL)
1857*efd4c9b6SSteve Lawrence 		free(res_list);
1858*efd4c9b6SSteve Lawrence 	if (cpus != NULL)
1859*efd4c9b6SSteve Lawrence 		free(cpus);
1860*efd4c9b6SSteve Lawrence 
1861*efd4c9b6SSteve Lawrence 	/*
1862*efd4c9b6SSteve Lawrence 	 * The pools operations should succeed since the conf is a consistent
1863*efd4c9b6SSteve Lawrence 	 * snapshot.  Tell caller there is no need to retry.
1864*efd4c9b6SSteve Lawrence 	 */
1865*efd4c9b6SSteve Lawrence 	errno = EINVAL;
1866*efd4c9b6SSteve Lawrence 	return (-1);
1867*efd4c9b6SSteve Lawrence }
1868*efd4c9b6SSteve Lawrence 
1869*efd4c9b6SSteve Lawrence /*
1870*efd4c9b6SSteve Lawrence  * Update the current list of processor sets.
1871*efd4c9b6SSteve Lawrence  * This also updates the list of online cpus, and each cpu's pset membership.
1872*efd4c9b6SSteve Lawrence  */
1873*efd4c9b6SSteve Lawrence static void
1874*efd4c9b6SSteve Lawrence zsd_refresh_psets(zsd_ctl_t *ctl)
1875*efd4c9b6SSteve Lawrence {
1876*efd4c9b6SSteve Lawrence 	int i, j, ret, state;
1877*efd4c9b6SSteve Lawrence 	uint_t old, num;
1878*efd4c9b6SSteve Lawrence 	uint_t cputype;
1879*efd4c9b6SSteve Lawrence 	int64_t sys_id, importance;
1880*efd4c9b6SSteve Lawrence 	uint64_t online, size, min, max;
1881*efd4c9b6SSteve Lawrence 	zsd_system_t *system;
1882*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset;
1883*efd4c9b6SSteve Lawrence 	zsd_cpu_t *cpu;
1884*efd4c9b6SSteve Lawrence 	psetid_t *cache;
1885*efd4c9b6SSteve Lawrence 	char psetname[ZS_PSETNAME_MAX];
1886*efd4c9b6SSteve Lawrence 	processorid_t cpuid;
1887*efd4c9b6SSteve Lawrence 	pool_value_t *pv_save = NULL;
1888*efd4c9b6SSteve Lawrence 	pool_resource_t **res_list = NULL;
1889*efd4c9b6SSteve Lawrence 	pool_resource_t *res;
1890*efd4c9b6SSteve Lawrence 	pool_value_t **vals;
1891*efd4c9b6SSteve Lawrence 	pool_conf_t *conf;
1892*efd4c9b6SSteve Lawrence 	boolean_t roll_cpus = B_TRUE;
1893*efd4c9b6SSteve Lawrence 
1894*efd4c9b6SSteve Lawrence 	/* Zero cpu counters to recount them */
1895*efd4c9b6SSteve Lawrence 	system = ctl->zsctl_system;
1896*efd4c9b6SSteve Lawrence 	system->zss_ncpus = 0;
1897*efd4c9b6SSteve Lawrence 	system->zss_ncpus_online = 0;
1898*efd4c9b6SSteve Lawrence retry:
1899*efd4c9b6SSteve Lawrence 	ret = pool_get_status(&state);
1900*efd4c9b6SSteve Lawrence 	if (ret == 0 && state == POOL_ENABLED) {
1901*efd4c9b6SSteve Lawrence 
1902*efd4c9b6SSteve Lawrence 		conf = ctl->zsctl_pool_conf;
1903*efd4c9b6SSteve Lawrence 		vals = ctl->zsctl_pool_vals;
1904*efd4c9b6SSteve Lawrence 		pv_save = vals[1];
1905*efd4c9b6SSteve Lawrence 		vals[1] = NULL;
1906*efd4c9b6SSteve Lawrence 
1907*efd4c9b6SSteve Lawrence 		if (ctl->zsctl_pool_status == POOL_DISABLED) {
1908*efd4c9b6SSteve Lawrence 			if (pool_conf_open(ctl->zsctl_pool_conf,
1909*efd4c9b6SSteve Lawrence 			    pool_dynamic_location(), PO_RDONLY) == 0) {
1910*efd4c9b6SSteve Lawrence 				ctl->zsctl_pool_status = POOL_ENABLED;
1911*efd4c9b6SSteve Lawrence 				ctl->zsctl_pool_changed = POU_PSET;
1912*efd4c9b6SSteve Lawrence 			}
1913*efd4c9b6SSteve Lawrence 		} else {
1914*efd4c9b6SSteve Lawrence 			ctl->zsctl_pool_changed = 0;
1915*efd4c9b6SSteve Lawrence 			ret = pool_conf_update(ctl->zsctl_pool_conf,
1916*efd4c9b6SSteve Lawrence 			    &(ctl->zsctl_pool_changed));
1917*efd4c9b6SSteve Lawrence 			if (ret < 0) {
1918*efd4c9b6SSteve Lawrence 				/* Pools must have become disabled */
1919*efd4c9b6SSteve Lawrence 				(void) pool_conf_close(ctl->zsctl_pool_conf);
1920*efd4c9b6SSteve Lawrence 				ctl->zsctl_pool_status = POOL_DISABLED;
1921*efd4c9b6SSteve Lawrence 				if (pool_error() == POE_SYSTEM && errno ==
1922*efd4c9b6SSteve Lawrence 				    ENOTACTIVE)
1923*efd4c9b6SSteve Lawrence 					goto retry;
1924*efd4c9b6SSteve Lawrence 
1925*efd4c9b6SSteve Lawrence 				zsd_warn(gettext(
1926*efd4c9b6SSteve Lawrence 				    "Unable to update pool configuration"));
1927*efd4c9b6SSteve Lawrence 				/* Not able to get pool info.  Don't update. */
1928*efd4c9b6SSteve Lawrence 				goto err;
1929*efd4c9b6SSteve Lawrence 			}
1930*efd4c9b6SSteve Lawrence 		}
1931*efd4c9b6SSteve Lawrence 		/* Get the list of psets using libpool */
1932*efd4c9b6SSteve Lawrence 		if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
1933*efd4c9b6SSteve Lawrence 			goto err;
1934*efd4c9b6SSteve Lawrence 
1935*efd4c9b6SSteve Lawrence 		if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
1936*efd4c9b6SSteve Lawrence 			goto err;
1937*efd4c9b6SSteve Lawrence 		if ((res_list = pool_query_resources(conf, &num, vals))
1938*efd4c9b6SSteve Lawrence 		    == NULL)
1939*efd4c9b6SSteve Lawrence 			goto err;
1940*efd4c9b6SSteve Lawrence 
1941*efd4c9b6SSteve Lawrence 		if (num > ctl->zsctl_pset_ncache)  {
1942*efd4c9b6SSteve Lawrence 			if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache,
1943*efd4c9b6SSteve Lawrence 			    (num) * sizeof (psetid_t))) == NULL) {
1944*efd4c9b6SSteve Lawrence 				goto err;
1945*efd4c9b6SSteve Lawrence 			}
1946*efd4c9b6SSteve Lawrence 			ctl->zsctl_pset_ncache = num;
1947*efd4c9b6SSteve Lawrence 			ctl->zsctl_pset_cache = cache;
1948*efd4c9b6SSteve Lawrence 		}
1949*efd4c9b6SSteve Lawrence 		/* Save the pset id of each pset */
1950*efd4c9b6SSteve Lawrence 		for (i = 0; i < num; i++) {
1951*efd4c9b6SSteve Lawrence 			res = res_list[i];
1952*efd4c9b6SSteve Lawrence 			if (pool_get_property(conf, pool_resource_to_elem(conf,
1953*efd4c9b6SSteve Lawrence 			    res), "pset.sys_id", vals[0]) != POC_INT ||
1954*efd4c9b6SSteve Lawrence 			    pool_value_get_int64(vals[0], &sys_id)
1955*efd4c9b6SSteve Lawrence 			    != PO_SUCCESS)
1956*efd4c9b6SSteve Lawrence 				goto err;
1957*efd4c9b6SSteve Lawrence 			ctl->zsctl_pset_cache[i] = (int)sys_id;
1958*efd4c9b6SSteve Lawrence 		}
1959*efd4c9b6SSteve Lawrence 		vals[1] = pv_save;
1960*efd4c9b6SSteve Lawrence 		pv_save = NULL;
1961*efd4c9b6SSteve Lawrence 	} else {
1962*efd4c9b6SSteve Lawrence 		if (ctl->zsctl_pool_status == POOL_ENABLED) {
1963*efd4c9b6SSteve Lawrence 			(void) pool_conf_close(ctl->zsctl_pool_conf);
1964*efd4c9b6SSteve Lawrence 			ctl->zsctl_pool_status = POOL_DISABLED;
1965*efd4c9b6SSteve Lawrence 		}
1966*efd4c9b6SSteve Lawrence 		/* Get the pset list using legacy psets */
1967*efd4c9b6SSteve Lawrence 		for (;;) {
1968*efd4c9b6SSteve Lawrence 			old = num = ctl->zsctl_pset_ncache;
1969*efd4c9b6SSteve Lawrence 			(void) pset_list(ctl->zsctl_pset_cache, &num);
1970*efd4c9b6SSteve Lawrence 			if ((num + 1) <= old) {
1971*efd4c9b6SSteve Lawrence 				break;
1972*efd4c9b6SSteve Lawrence 			}
1973*efd4c9b6SSteve Lawrence 			if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache,
1974*efd4c9b6SSteve Lawrence 			    (num + 1) * sizeof (psetid_t))) != NULL) {
1975*efd4c9b6SSteve Lawrence 				ctl->zsctl_pset_ncache = num + 1;
1976*efd4c9b6SSteve Lawrence 				ctl->zsctl_pset_cache = cache;
1977*efd4c9b6SSteve Lawrence 			} else {
1978*efd4c9b6SSteve Lawrence 				/*
1979*efd4c9b6SSteve Lawrence 				 * Could not allocate to get new pset list.
1980*efd4c9b6SSteve Lawrence 				 * Give up
1981*efd4c9b6SSteve Lawrence 				 */
1982*efd4c9b6SSteve Lawrence 				return;
1983*efd4c9b6SSteve Lawrence 			}
1984*efd4c9b6SSteve Lawrence 		}
1985*efd4c9b6SSteve Lawrence 		/* Add the default pset to list */
1986*efd4c9b6SSteve Lawrence 		ctl->zsctl_pset_cache[num] = ctl->zsctl_pset_cache[0];
1987*efd4c9b6SSteve Lawrence 		ctl->zsctl_pset_cache[0] = ZS_PSET_DEFAULT;
1988*efd4c9b6SSteve Lawrence 		num++;
1989*efd4c9b6SSteve Lawrence 	}
1990*efd4c9b6SSteve Lawrence psets_changed:
1991*efd4c9b6SSteve Lawrence 	zsd_mark_cpus_start(ctl, roll_cpus);
1992*efd4c9b6SSteve Lawrence 	zsd_mark_psets_start(ctl);
1993*efd4c9b6SSteve Lawrence 	roll_cpus = B_FALSE;
1994*efd4c9b6SSteve Lawrence 
1995*efd4c9b6SSteve Lawrence 	/* Refresh cpu membership of all psets */
1996*efd4c9b6SSteve Lawrence 	for (i = 0; i < num; i++) {
1997*efd4c9b6SSteve Lawrence 
1998*efd4c9b6SSteve Lawrence 		/* Get pool pset information */
1999*efd4c9b6SSteve Lawrence 		sys_id = ctl->zsctl_pset_cache[i];
2000*efd4c9b6SSteve Lawrence 		if (zsd_get_pool_pset(ctl, sys_id, psetname, sizeof (psetname),
2001*efd4c9b6SSteve Lawrence 		    &cputype, &online, &size, &min, &max, &importance)
2002*efd4c9b6SSteve Lawrence 		    != 0) {
2003*efd4c9b6SSteve Lawrence 			if (errno == EINTR)
2004*efd4c9b6SSteve Lawrence 				goto psets_changed;
2005*efd4c9b6SSteve Lawrence 			zsd_warn(gettext("Failed to get info for pset %d"),
2006*efd4c9b6SSteve Lawrence 			    sys_id);
2007*efd4c9b6SSteve Lawrence 			continue;
2008*efd4c9b6SSteve Lawrence 		}
2009*efd4c9b6SSteve Lawrence 
2010*efd4c9b6SSteve Lawrence 		system->zss_ncpus += size;
2011*efd4c9b6SSteve Lawrence 		system->zss_ncpus_online += online;
2012*efd4c9b6SSteve Lawrence 
2013*efd4c9b6SSteve Lawrence 		pset = zsd_lookup_insert_pset(ctl, psetname,
2014*efd4c9b6SSteve Lawrence 		    ctl->zsctl_pset_cache[i]);
2015*efd4c9b6SSteve Lawrence 
2016*efd4c9b6SSteve Lawrence 		/* update pset info */
2017*efd4c9b6SSteve Lawrence 		zsd_mark_pset_found(pset, cputype, online, size, min,
2018*efd4c9b6SSteve Lawrence 		    max, importance);
2019*efd4c9b6SSteve Lawrence 
2020*efd4c9b6SSteve Lawrence 		/* update each cpu in pset */
2021*efd4c9b6SSteve Lawrence 		for (j = 0; j < pset->zsp_online; j++) {
2022*efd4c9b6SSteve Lawrence 			cpuid = ctl->zsctl_cpu_cache[j];
2023*efd4c9b6SSteve Lawrence 			cpu = zsd_lookup_insert_cpu(ctl, cpuid);
2024*efd4c9b6SSteve Lawrence 			zsd_mark_cpu_found(cpu, pset, sys_id);
2025*efd4c9b6SSteve Lawrence 		}
2026*efd4c9b6SSteve Lawrence 	}
2027*efd4c9b6SSteve Lawrence err:
2028*efd4c9b6SSteve Lawrence 	if (res_list != NULL)
2029*efd4c9b6SSteve Lawrence 		free(res_list);
2030*efd4c9b6SSteve Lawrence 	if (pv_save != NULL)
2031*efd4c9b6SSteve Lawrence 		vals[1] = pv_save;
2032*efd4c9b6SSteve Lawrence }
2033*efd4c9b6SSteve Lawrence 
2034*efd4c9b6SSteve Lawrence 
2035*efd4c9b6SSteve Lawrence 
2036*efd4c9b6SSteve Lawrence /*
2037*efd4c9b6SSteve Lawrence  * Fetch the current pool and pset name for the given zone.
2038*efd4c9b6SSteve Lawrence  */
2039*efd4c9b6SSteve Lawrence static void
2040*efd4c9b6SSteve Lawrence zsd_get_zone_pool_pset(zsd_ctl_t *ctl, zsd_zone_t *zone,
2041*efd4c9b6SSteve Lawrence     char *pool, int poollen, char *pset, int psetlen, uint_t *cputype)
2042*efd4c9b6SSteve Lawrence {
2043*efd4c9b6SSteve Lawrence 	poolid_t poolid;
2044*efd4c9b6SSteve Lawrence 	pool_t **pools = NULL;
2045*efd4c9b6SSteve Lawrence 	pool_resource_t **res_list = NULL;
2046*efd4c9b6SSteve Lawrence 	char poolname[ZS_POOLNAME_MAX];
2047*efd4c9b6SSteve Lawrence 	char psetname[ZS_PSETNAME_MAX];
2048*efd4c9b6SSteve Lawrence 	pool_conf_t *conf = ctl->zsctl_pool_conf;
2049*efd4c9b6SSteve Lawrence 	pool_value_t *pv_save = NULL;
2050*efd4c9b6SSteve Lawrence 	pool_value_t **vals = ctl->zsctl_pool_vals;
2051*efd4c9b6SSteve Lawrence 	const char *string;
2052*efd4c9b6SSteve Lawrence 	int ret;
2053*efd4c9b6SSteve Lawrence 	int64_t int64;
2054*efd4c9b6SSteve Lawrence 	uint_t num;
2055*efd4c9b6SSteve Lawrence 
2056*efd4c9b6SSteve Lawrence 	ret = zone_getattr(zone->zsz_id, ZONE_ATTR_POOLID,
2057*efd4c9b6SSteve Lawrence 	    &poolid, sizeof (poolid));
2058*efd4c9b6SSteve Lawrence 	if (ret < 0)
2059*efd4c9b6SSteve Lawrence 		goto lookup_done;
2060*efd4c9b6SSteve Lawrence 
2061*efd4c9b6SSteve Lawrence 	pv_save = vals[1];
2062*efd4c9b6SSteve Lawrence 	vals[1] = NULL;
2063*efd4c9b6SSteve Lawrence 	pools = NULL;
2064*efd4c9b6SSteve Lawrence 	res_list = NULL;
2065*efd4c9b6SSteve Lawrence 
2066*efd4c9b6SSteve Lawrence 	/* Default values if lookup fails */
2067*efd4c9b6SSteve Lawrence 	(void) strlcpy(poolname, "pool_default", sizeof (poolname));
2068*efd4c9b6SSteve Lawrence 	(void) strlcpy(psetname, "pset_default", sizeof (poolname));
2069*efd4c9b6SSteve Lawrence 	*cputype = ZS_CPUTYPE_DEFAULT_PSET;
2070*efd4c9b6SSteve Lawrence 
2071*efd4c9b6SSteve Lawrence 	/* no dedicated cpu if pools are disabled */
2072*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_pool_status == POOL_DISABLED)
2073*efd4c9b6SSteve Lawrence 		goto lookup_done;
2074*efd4c9b6SSteve Lawrence 
2075*efd4c9b6SSteve Lawrence 	/* Get the pool name using the id */
2076*efd4c9b6SSteve Lawrence 	pool_value_set_int64(vals[0], poolid);
2077*efd4c9b6SSteve Lawrence 	if (pool_value_set_name(vals[0], "pool.sys_id") != PO_SUCCESS)
2078*efd4c9b6SSteve Lawrence 		goto lookup_done;
2079*efd4c9b6SSteve Lawrence 
2080*efd4c9b6SSteve Lawrence 	if ((pools = pool_query_pools(conf, &num, vals)) == NULL)
2081*efd4c9b6SSteve Lawrence 		goto lookup_done;
2082*efd4c9b6SSteve Lawrence 
2083*efd4c9b6SSteve Lawrence 	if (num != 1)
2084*efd4c9b6SSteve Lawrence 		goto lookup_done;
2085*efd4c9b6SSteve Lawrence 
2086*efd4c9b6SSteve Lawrence 	if (pool_get_property(conf, pool_to_elem(conf, pools[0]),
2087*efd4c9b6SSteve Lawrence 	    "pool.name", vals[0]) != POC_STRING ||
2088*efd4c9b6SSteve Lawrence 	    pool_value_get_string(vals[0], &string) != PO_SUCCESS)
2089*efd4c9b6SSteve Lawrence 		goto lookup_done;
2090*efd4c9b6SSteve Lawrence 	(void) strlcpy(poolname, (char *)string, sizeof (poolname));
2091*efd4c9b6SSteve Lawrence 
2092*efd4c9b6SSteve Lawrence 	/* Get the name of the pset for the pool */
2093*efd4c9b6SSteve Lawrence 	if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
2094*efd4c9b6SSteve Lawrence 		goto lookup_done;
2095*efd4c9b6SSteve Lawrence 
2096*efd4c9b6SSteve Lawrence 	if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
2097*efd4c9b6SSteve Lawrence 		goto lookup_done;
2098*efd4c9b6SSteve Lawrence 
2099*efd4c9b6SSteve Lawrence 	if ((res_list = pool_query_pool_resources(conf, pools[0], &num, vals))
2100*efd4c9b6SSteve Lawrence 	    == NULL)
2101*efd4c9b6SSteve Lawrence 		goto lookup_done;
2102*efd4c9b6SSteve Lawrence 
2103*efd4c9b6SSteve Lawrence 	if (num != 1)
2104*efd4c9b6SSteve Lawrence 		goto lookup_done;
2105*efd4c9b6SSteve Lawrence 
2106*efd4c9b6SSteve Lawrence 	if (pool_get_property(conf, pool_resource_to_elem(conf,
2107*efd4c9b6SSteve Lawrence 	    res_list[0]), "pset.sys_id", vals[0]) != POC_INT ||
2108*efd4c9b6SSteve Lawrence 	    pool_value_get_int64(vals[0], &int64) != PO_SUCCESS)
2109*efd4c9b6SSteve Lawrence 		goto lookup_done;
2110*efd4c9b6SSteve Lawrence 
2111*efd4c9b6SSteve Lawrence 	if (int64 == ZS_PSET_DEFAULT)
2112*efd4c9b6SSteve Lawrence 		*cputype = ZS_CPUTYPE_DEFAULT_PSET;
2113*efd4c9b6SSteve Lawrence 
2114*efd4c9b6SSteve Lawrence 	if (pool_get_property(conf, pool_resource_to_elem(conf,
2115*efd4c9b6SSteve Lawrence 	    res_list[0]), "pset.name", vals[0]) != POC_STRING ||
2116*efd4c9b6SSteve Lawrence 	    pool_value_get_string(vals[0], &string) != PO_SUCCESS)
2117*efd4c9b6SSteve Lawrence 		goto lookup_done;
2118*efd4c9b6SSteve Lawrence 
2119*efd4c9b6SSteve Lawrence 	(void) strlcpy(psetname, (char *)string, sizeof (psetname));
2120*efd4c9b6SSteve Lawrence 
2121*efd4c9b6SSteve Lawrence 	if (strncmp(psetname, "SUNWtmp_", strlen("SUNWtmp_")) == 0)
2122*efd4c9b6SSteve Lawrence 		*cputype = ZS_CPUTYPE_DEDICATED;
2123*efd4c9b6SSteve Lawrence 	if (strncmp(psetname, "SUNW_legacy_", strlen("SUNW_legacy_")) == 0)
2124*efd4c9b6SSteve Lawrence 		*cputype = ZS_CPUTYPE_PSRSET_PSET;
2125*efd4c9b6SSteve Lawrence 	else
2126*efd4c9b6SSteve Lawrence 		*cputype = ZS_CPUTYPE_POOL_PSET;
2127*efd4c9b6SSteve Lawrence 
2128*efd4c9b6SSteve Lawrence lookup_done:
2129*efd4c9b6SSteve Lawrence 
2130*efd4c9b6SSteve Lawrence 	if (pv_save != NULL)
2131*efd4c9b6SSteve Lawrence 		vals[1] = pv_save;
2132*efd4c9b6SSteve Lawrence 
2133*efd4c9b6SSteve Lawrence 	if (res_list)
2134*efd4c9b6SSteve Lawrence 		free(res_list);
2135*efd4c9b6SSteve Lawrence 	if (pools)
2136*efd4c9b6SSteve Lawrence 		free(pools);
2137*efd4c9b6SSteve Lawrence 
2138*efd4c9b6SSteve Lawrence 	(void) strlcpy(pool, poolname, poollen);
2139*efd4c9b6SSteve Lawrence 	(void) strlcpy(pset, psetname, psetlen);
2140*efd4c9b6SSteve Lawrence }
2141*efd4c9b6SSteve Lawrence 
2142*efd4c9b6SSteve Lawrence /* Convert scheduler names to ZS_* scheduler flags */
2143*efd4c9b6SSteve Lawrence static uint_t
2144*efd4c9b6SSteve Lawrence zsd_schedname2int(char *clname, int pri)
2145*efd4c9b6SSteve Lawrence {
2146*efd4c9b6SSteve Lawrence 	uint_t sched = 0;
2147*efd4c9b6SSteve Lawrence 
2148*efd4c9b6SSteve Lawrence 	if (strcmp(clname, "TS") == 0) {
2149*efd4c9b6SSteve Lawrence 		sched = ZS_SCHED_TS;
2150*efd4c9b6SSteve Lawrence 	} else if (strcmp(clname, "IA") == 0) {
2151*efd4c9b6SSteve Lawrence 		sched = ZS_SCHED_IA;
2152*efd4c9b6SSteve Lawrence 	} else if (strcmp(clname, "FX") == 0) {
2153*efd4c9b6SSteve Lawrence 		if (pri > 59) {
2154*efd4c9b6SSteve Lawrence 			sched = ZS_SCHED_FX_60;
2155*efd4c9b6SSteve Lawrence 		} else {
2156*efd4c9b6SSteve Lawrence 			sched = ZS_SCHED_FX;
2157*efd4c9b6SSteve Lawrence 		}
2158*efd4c9b6SSteve Lawrence 	} else if (strcmp(clname, "RT") == 0) {
2159*efd4c9b6SSteve Lawrence 		sched = ZS_SCHED_RT;
2160*efd4c9b6SSteve Lawrence 
2161*efd4c9b6SSteve Lawrence 	} else if (strcmp(clname, "FSS") == 0) {
2162*efd4c9b6SSteve Lawrence 		sched = ZS_SCHED_FSS;
2163*efd4c9b6SSteve Lawrence 	}
2164*efd4c9b6SSteve Lawrence 	return (sched);
2165*efd4c9b6SSteve Lawrence }
2166*efd4c9b6SSteve Lawrence 
2167*efd4c9b6SSteve Lawrence static uint64_t
2168*efd4c9b6SSteve Lawrence zsd_get_zone_rctl_limit(char *name)
2169*efd4c9b6SSteve Lawrence {
2170*efd4c9b6SSteve Lawrence 	rctlblk_t *rblk;
2171*efd4c9b6SSteve Lawrence 
2172*efd4c9b6SSteve Lawrence 	rblk = (rctlblk_t *)alloca(rctlblk_size());
2173*efd4c9b6SSteve Lawrence 	if (getrctl(name, NULL, rblk, RCTL_FIRST)
2174*efd4c9b6SSteve Lawrence 	    != 0) {
2175*efd4c9b6SSteve Lawrence 		return (ZS_LIMIT_NONE);
2176*efd4c9b6SSteve Lawrence 	}
2177*efd4c9b6SSteve Lawrence 	return (rctlblk_get_value(rblk));
2178*efd4c9b6SSteve Lawrence }
2179*efd4c9b6SSteve Lawrence 
2180*efd4c9b6SSteve Lawrence static uint64_t
2181*efd4c9b6SSteve Lawrence zsd_get_zone_rctl_usage(char *name)
2182*efd4c9b6SSteve Lawrence {
2183*efd4c9b6SSteve Lawrence 	rctlblk_t *rblk;
2184*efd4c9b6SSteve Lawrence 
2185*efd4c9b6SSteve Lawrence 	rblk = (rctlblk_t *)alloca(rctlblk_size());
2186*efd4c9b6SSteve Lawrence 	if (getrctl(name, NULL, rblk, RCTL_USAGE)
2187*efd4c9b6SSteve Lawrence 	    != 0) {
2188*efd4c9b6SSteve Lawrence 		return (0);
2189*efd4c9b6SSteve Lawrence 	}
2190*efd4c9b6SSteve Lawrence 	return (rctlblk_get_value(rblk));
2191*efd4c9b6SSteve Lawrence }
2192*efd4c9b6SSteve Lawrence 
2193*efd4c9b6SSteve Lawrence #define	ZSD_NUM_RCTL_VALS 19
2194*efd4c9b6SSteve Lawrence 
2195*efd4c9b6SSteve Lawrence /*
2196*efd4c9b6SSteve Lawrence  * Fetch the limit information for a zone.  This uses zone_enter() as the
2197*efd4c9b6SSteve Lawrence  * getrctl(2) system call only returns rctl information for the zone of
2198*efd4c9b6SSteve Lawrence  * the caller.
2199*efd4c9b6SSteve Lawrence  */
2200*efd4c9b6SSteve Lawrence static int
2201*efd4c9b6SSteve Lawrence zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
2202*efd4c9b6SSteve Lawrence     uint64_t *cpu_cap, uint64_t *ram_cap, uint64_t *locked_cap,
2203*efd4c9b6SSteve Lawrence     uint64_t *vm_cap, uint64_t *processes_cap, uint64_t *processes,
2204*efd4c9b6SSteve Lawrence     uint64_t *lwps_cap, uint64_t *lwps, uint64_t *shm_cap, uint64_t *shm,
2205*efd4c9b6SSteve Lawrence     uint64_t *shmids_cap, uint64_t *shmids, uint64_t *semids_cap,
2206*efd4c9b6SSteve Lawrence     uint64_t *semids, uint64_t *msgids_cap, uint64_t *msgids,
2207*efd4c9b6SSteve Lawrence     uint64_t *lofi_cap, uint64_t *lofi, uint_t *sched)
2208*efd4c9b6SSteve Lawrence {
2209*efd4c9b6SSteve Lawrence 	int p[2], pid, tmpl_fd, ret;
2210*efd4c9b6SSteve Lawrence 	ctid_t ct;
2211*efd4c9b6SSteve Lawrence 	char class[PC_CLNMSZ];
2212*efd4c9b6SSteve Lawrence 	uint64_t vals[ZSD_NUM_RCTL_VALS];
2213*efd4c9b6SSteve Lawrence 	zsd_system_t *sys = ctl->zsctl_system;
2214*efd4c9b6SSteve Lawrence 	int i = 0;
2215*efd4c9b6SSteve Lawrence 	int res = 0;
2216*efd4c9b6SSteve Lawrence 
2217*efd4c9b6SSteve Lawrence 	/* Treat all caps as no cap on error */
2218*efd4c9b6SSteve Lawrence 	*cpu_shares = ZS_LIMIT_NONE;
2219*efd4c9b6SSteve Lawrence 	*cpu_cap = ZS_LIMIT_NONE;
2220*efd4c9b6SSteve Lawrence 	*ram_cap = ZS_LIMIT_NONE;
2221*efd4c9b6SSteve Lawrence 	*locked_cap = ZS_LIMIT_NONE;
2222*efd4c9b6SSteve Lawrence 	*vm_cap = ZS_LIMIT_NONE;
2223*efd4c9b6SSteve Lawrence 
2224*efd4c9b6SSteve Lawrence 	*processes_cap = ZS_LIMIT_NONE;
2225*efd4c9b6SSteve Lawrence 	*lwps_cap = ZS_LIMIT_NONE;
2226*efd4c9b6SSteve Lawrence 	*shm_cap = ZS_LIMIT_NONE;
2227*efd4c9b6SSteve Lawrence 	*shmids_cap = ZS_LIMIT_NONE;
2228*efd4c9b6SSteve Lawrence 	*semids_cap = ZS_LIMIT_NONE;
2229*efd4c9b6SSteve Lawrence 	*msgids_cap = ZS_LIMIT_NONE;
2230*efd4c9b6SSteve Lawrence 	*lofi_cap = ZS_LIMIT_NONE;
2231*efd4c9b6SSteve Lawrence 
2232*efd4c9b6SSteve Lawrence 	*processes = 0;
2233*efd4c9b6SSteve Lawrence 	*lwps = 0;
2234*efd4c9b6SSteve Lawrence 	*shm = 0;
2235*efd4c9b6SSteve Lawrence 	*shmids = 0;
2236*efd4c9b6SSteve Lawrence 	*semids = 0;
2237*efd4c9b6SSteve Lawrence 	*msgids = 0;
2238*efd4c9b6SSteve Lawrence 	*lofi = 0;
2239*efd4c9b6SSteve Lawrence 
2240*efd4c9b6SSteve Lawrence 	/* Get the ram cap first since it is a zone attr */
2241*efd4c9b6SSteve Lawrence 	ret = zone_getattr(zone->zsz_id, ZONE_ATTR_PHYS_MCAP,
2242*efd4c9b6SSteve Lawrence 	    ram_cap, sizeof (*ram_cap));
2243*efd4c9b6SSteve Lawrence 	if (ret < 0 || *ram_cap == 0)
2244*efd4c9b6SSteve Lawrence 		*ram_cap = ZS_LIMIT_NONE;
2245*efd4c9b6SSteve Lawrence 
2246*efd4c9b6SSteve Lawrence 	/* Get the zone's default scheduling class */
2247*efd4c9b6SSteve Lawrence 	ret = zone_getattr(zone->zsz_id, ZONE_ATTR_SCHED_CLASS,
2248*efd4c9b6SSteve Lawrence 	    class, sizeof (class));
2249*efd4c9b6SSteve Lawrence 	if (ret < 0)
2250*efd4c9b6SSteve Lawrence 		return (-1);
2251*efd4c9b6SSteve Lawrence 
2252*efd4c9b6SSteve Lawrence 	*sched = zsd_schedname2int(class, 0);
2253*efd4c9b6SSteve Lawrence 
2254*efd4c9b6SSteve Lawrence 	/* rctl caps must be fetched from within the zone */
2255*efd4c9b6SSteve Lawrence 	if (pipe(p) != 0)
2256*efd4c9b6SSteve Lawrence 		return (-1);
2257*efd4c9b6SSteve Lawrence 
2258*efd4c9b6SSteve Lawrence 	if ((tmpl_fd = init_template()) == -1) {
2259*efd4c9b6SSteve Lawrence 		(void) close(p[0]);
2260*efd4c9b6SSteve Lawrence 		(void) close(p[1]);
2261*efd4c9b6SSteve Lawrence 		return (-1);
2262*efd4c9b6SSteve Lawrence 	}
2263*efd4c9b6SSteve Lawrence 	pid = forkx(0);
2264*efd4c9b6SSteve Lawrence 	if (pid < 0) {
2265*efd4c9b6SSteve Lawrence 		(void) ct_tmpl_clear(tmpl_fd);
2266*efd4c9b6SSteve Lawrence 		(void) close(p[0]);
2267*efd4c9b6SSteve Lawrence 		(void) close(p[1]);
2268*efd4c9b6SSteve Lawrence 		return (-1);
2269*efd4c9b6SSteve Lawrence 	}
2270*efd4c9b6SSteve Lawrence 	if (pid == 0) {
2271*efd4c9b6SSteve Lawrence 
2272*efd4c9b6SSteve Lawrence 		(void) ct_tmpl_clear(tmpl_fd);
2273*efd4c9b6SSteve Lawrence 		(void) close(tmpl_fd);
2274*efd4c9b6SSteve Lawrence 		(void) close(p[0]);
2275*efd4c9b6SSteve Lawrence 		if (zone->zsz_id != getzoneid()) {
2276*efd4c9b6SSteve Lawrence 			if (zone_enter(zone->zsz_id) < 0) {
2277*efd4c9b6SSteve Lawrence 				(void) close(p[1]);
2278*efd4c9b6SSteve Lawrence 				_exit(0);
2279*efd4c9b6SSteve Lawrence 			}
2280*efd4c9b6SSteve Lawrence 		}
2281*efd4c9b6SSteve Lawrence 
2282*efd4c9b6SSteve Lawrence 		/* Get caps for zone, and write them to zonestatd parent. */
2283*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-shares");
2284*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-cap");
2285*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.max-locked-memory");
2286*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.max-swap");
2287*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.max-processes");
2288*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_usage("zone.max-processes");
2289*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.max-lwps");
2290*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_usage("zone.max-lwps");
2291*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-memory");
2292*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-memory");
2293*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-ids");
2294*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-ids");
2295*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.max-sem-ids");
2296*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_usage("zone.max-sem-ids");
2297*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.max-msg-ids");
2298*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_usage("zone.max-msg-ids");
2299*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_limit("zone.max-lofi");
2300*efd4c9b6SSteve Lawrence 		vals[i++] = zsd_get_zone_rctl_usage("zone.max-lofi");
2301*efd4c9b6SSteve Lawrence 
2302*efd4c9b6SSteve Lawrence 		if (write(p[1], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
2303*efd4c9b6SSteve Lawrence 		    ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
2304*efd4c9b6SSteve Lawrence 			(void) close(p[1]);
2305*efd4c9b6SSteve Lawrence 			_exit(1);
2306*efd4c9b6SSteve Lawrence 		}
2307*efd4c9b6SSteve Lawrence 
2308*efd4c9b6SSteve Lawrence 		(void) close(p[1]);
2309*efd4c9b6SSteve Lawrence 		_exit(0);
2310*efd4c9b6SSteve Lawrence 	}
2311*efd4c9b6SSteve Lawrence 	if (contract_latest(&ct) == -1)
2312*efd4c9b6SSteve Lawrence 		ct = -1;
2313*efd4c9b6SSteve Lawrence 
2314*efd4c9b6SSteve Lawrence 	(void) ct_tmpl_clear(tmpl_fd);
2315*efd4c9b6SSteve Lawrence 	(void) close(tmpl_fd);
2316*efd4c9b6SSteve Lawrence 	(void) close(p[1]);
2317*efd4c9b6SSteve Lawrence 	while (waitpid(pid, NULL, 0) != pid)
2318*efd4c9b6SSteve Lawrence 		;
2319*efd4c9b6SSteve Lawrence 
2320*efd4c9b6SSteve Lawrence 	/* Read cap from child in zone */
2321*efd4c9b6SSteve Lawrence 	if (read(p[0], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
2322*efd4c9b6SSteve Lawrence 	    ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
2323*efd4c9b6SSteve Lawrence 		res = -1;
2324*efd4c9b6SSteve Lawrence 		goto cleanup;
2325*efd4c9b6SSteve Lawrence 	}
2326*efd4c9b6SSteve Lawrence 	i = 0;
2327*efd4c9b6SSteve Lawrence 	*cpu_shares = vals[i++];
2328*efd4c9b6SSteve Lawrence 	*cpu_cap = vals[i++];
2329*efd4c9b6SSteve Lawrence 	*locked_cap = vals[i++];
2330*efd4c9b6SSteve Lawrence 	*vm_cap = vals[i++];
2331*efd4c9b6SSteve Lawrence 	*processes_cap = vals[i++];
2332*efd4c9b6SSteve Lawrence 	*processes = vals[i++];
2333*efd4c9b6SSteve Lawrence 	*lwps_cap = vals[i++];
2334*efd4c9b6SSteve Lawrence 	*lwps = vals[i++];
2335*efd4c9b6SSteve Lawrence 	*shm_cap = vals[i++];
2336*efd4c9b6SSteve Lawrence 	*shm = vals[i++];
2337*efd4c9b6SSteve Lawrence 	*shmids_cap = vals[i++];
2338*efd4c9b6SSteve Lawrence 	*shmids = vals[i++];
2339*efd4c9b6SSteve Lawrence 	*semids_cap = vals[i++];
2340*efd4c9b6SSteve Lawrence 	*semids = vals[i++];
2341*efd4c9b6SSteve Lawrence 	*msgids_cap = vals[i++];
2342*efd4c9b6SSteve Lawrence 	*msgids = vals[i++];
2343*efd4c9b6SSteve Lawrence 	*lofi_cap = vals[i++];
2344*efd4c9b6SSteve Lawrence 	*lofi = vals[i++];
2345*efd4c9b6SSteve Lawrence 
2346*efd4c9b6SSteve Lawrence 	/* Interpret maximum values as no cap */
2347*efd4c9b6SSteve Lawrence 	if (*cpu_cap == UINT32_MAX || *cpu_cap == 0)
2348*efd4c9b6SSteve Lawrence 		*cpu_cap = ZS_LIMIT_NONE;
2349*efd4c9b6SSteve Lawrence 	if (*processes_cap == sys->zss_processes_max)
2350*efd4c9b6SSteve Lawrence 		*processes_cap = ZS_LIMIT_NONE;
2351*efd4c9b6SSteve Lawrence 	if (*lwps_cap == sys->zss_lwps_max)
2352*efd4c9b6SSteve Lawrence 		*lwps_cap = ZS_LIMIT_NONE;
2353*efd4c9b6SSteve Lawrence 	if (*shm_cap == sys->zss_shm_max)
2354*efd4c9b6SSteve Lawrence 		*shm_cap = ZS_LIMIT_NONE;
2355*efd4c9b6SSteve Lawrence 	if (*shmids_cap == sys->zss_shmids_max)
2356*efd4c9b6SSteve Lawrence 		*shmids_cap = ZS_LIMIT_NONE;
2357*efd4c9b6SSteve Lawrence 	if (*semids_cap == sys->zss_semids_max)
2358*efd4c9b6SSteve Lawrence 		*semids_cap = ZS_LIMIT_NONE;
2359*efd4c9b6SSteve Lawrence 	if (*msgids_cap == sys->zss_msgids_max)
2360*efd4c9b6SSteve Lawrence 		*msgids_cap = ZS_LIMIT_NONE;
2361*efd4c9b6SSteve Lawrence 	if (*lofi_cap == sys->zss_lofi_max)
2362*efd4c9b6SSteve Lawrence 		*lofi_cap = ZS_LIMIT_NONE;
2363*efd4c9b6SSteve Lawrence 
2364*efd4c9b6SSteve Lawrence 
2365*efd4c9b6SSteve Lawrence cleanup:
2366*efd4c9b6SSteve Lawrence 	(void) close(p[0]);
2367*efd4c9b6SSteve Lawrence 	(void) ct_tmpl_clear(tmpl_fd);
2368*efd4c9b6SSteve Lawrence 	(void) close(tmpl_fd);
2369*efd4c9b6SSteve Lawrence 	(void) contract_abandon_id(ct);
2370*efd4c9b6SSteve Lawrence 
2371*efd4c9b6SSteve Lawrence 	return (res);
2372*efd4c9b6SSteve Lawrence }
2373*efd4c9b6SSteve Lawrence 
2374*efd4c9b6SSteve Lawrence /* Update the current list of running zones */
2375*efd4c9b6SSteve Lawrence static void
2376*efd4c9b6SSteve Lawrence zsd_refresh_zones(zsd_ctl_t *ctl)
2377*efd4c9b6SSteve Lawrence {
2378*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
2379*efd4c9b6SSteve Lawrence 	uint_t old, num;
2380*efd4c9b6SSteve Lawrence 	ushort_t flags;
2381*efd4c9b6SSteve Lawrence 	int i, ret;
2382*efd4c9b6SSteve Lawrence 	zoneid_t *cache;
2383*efd4c9b6SSteve Lawrence 	uint64_t cpu_shares;
2384*efd4c9b6SSteve Lawrence 	uint64_t cpu_cap;
2385*efd4c9b6SSteve Lawrence 	uint64_t ram_cap;
2386*efd4c9b6SSteve Lawrence 	uint64_t locked_cap;
2387*efd4c9b6SSteve Lawrence 	uint64_t vm_cap;
2388*efd4c9b6SSteve Lawrence 	uint64_t processes_cap;
2389*efd4c9b6SSteve Lawrence 	uint64_t processes;
2390*efd4c9b6SSteve Lawrence 	uint64_t lwps_cap;
2391*efd4c9b6SSteve Lawrence 	uint64_t lwps;
2392*efd4c9b6SSteve Lawrence 	uint64_t shm_cap;
2393*efd4c9b6SSteve Lawrence 	uint64_t shm;
2394*efd4c9b6SSteve Lawrence 	uint64_t shmids_cap;
2395*efd4c9b6SSteve Lawrence 	uint64_t shmids;
2396*efd4c9b6SSteve Lawrence 	uint64_t semids_cap;
2397*efd4c9b6SSteve Lawrence 	uint64_t semids;
2398*efd4c9b6SSteve Lawrence 	uint64_t msgids_cap;
2399*efd4c9b6SSteve Lawrence 	uint64_t msgids;
2400*efd4c9b6SSteve Lawrence 	uint64_t lofi_cap;
2401*efd4c9b6SSteve Lawrence 	uint64_t lofi;
2402*efd4c9b6SSteve Lawrence 
2403*efd4c9b6SSteve Lawrence 	char zonename[ZS_ZONENAME_MAX];
2404*efd4c9b6SSteve Lawrence 	char poolname[ZS_POOLNAME_MAX];
2405*efd4c9b6SSteve Lawrence 	char psetname[ZS_PSETNAME_MAX];
2406*efd4c9b6SSteve Lawrence 	uint_t sched;
2407*efd4c9b6SSteve Lawrence 	uint_t cputype;
2408*efd4c9b6SSteve Lawrence 	uint_t iptype;
2409*efd4c9b6SSteve Lawrence 
2410*efd4c9b6SSteve Lawrence 	/* Get the current list of running zones */
2411*efd4c9b6SSteve Lawrence 	for (;;) {
2412*efd4c9b6SSteve Lawrence 		old = num = ctl->zsctl_zone_ncache;
2413*efd4c9b6SSteve Lawrence 		(void) zone_list(ctl->zsctl_zone_cache, &num);
2414*efd4c9b6SSteve Lawrence 		if (num <= old)
2415*efd4c9b6SSteve Lawrence 			break;
2416*efd4c9b6SSteve Lawrence 		if ((cache = (zoneid_t *)realloc(ctl->zsctl_zone_cache,
2417*efd4c9b6SSteve Lawrence 		    (num) * sizeof (zoneid_t))) != NULL) {
2418*efd4c9b6SSteve Lawrence 			ctl->zsctl_zone_ncache = num;
2419*efd4c9b6SSteve Lawrence 			ctl->zsctl_zone_cache = cache;
2420*efd4c9b6SSteve Lawrence 		} else {
2421*efd4c9b6SSteve Lawrence 			/* Could not allocate to get new zone list.  Give up */
2422*efd4c9b6SSteve Lawrence 			return;
2423*efd4c9b6SSteve Lawrence 		}
2424*efd4c9b6SSteve Lawrence 	}
2425*efd4c9b6SSteve Lawrence 
2426*efd4c9b6SSteve Lawrence 	zsd_mark_zones_start(ctl);
2427*efd4c9b6SSteve Lawrence 
2428*efd4c9b6SSteve Lawrence 	for (i = 0; i < num; i++) {
2429*efd4c9b6SSteve Lawrence 
2430*efd4c9b6SSteve Lawrence 		ret = getzonenamebyid(ctl->zsctl_zone_cache[i],
2431*efd4c9b6SSteve Lawrence 		    zonename, sizeof (zonename));
2432*efd4c9b6SSteve Lawrence 		if (ret < 0)
2433*efd4c9b6SSteve Lawrence 			continue;
2434*efd4c9b6SSteve Lawrence 
2435*efd4c9b6SSteve Lawrence 		zone = zsd_lookup_insert_zone(ctl, zonename,
2436*efd4c9b6SSteve Lawrence 		    ctl->zsctl_zone_cache[i]);
2437*efd4c9b6SSteve Lawrence 
2438*efd4c9b6SSteve Lawrence 		ret = zone_getattr(ctl->zsctl_zone_cache[i], ZONE_ATTR_FLAGS,
2439*efd4c9b6SSteve Lawrence 		    &flags, sizeof (flags));
2440*efd4c9b6SSteve Lawrence 		if (ret < 0)
2441*efd4c9b6SSteve Lawrence 			continue;
2442*efd4c9b6SSteve Lawrence 
2443*efd4c9b6SSteve Lawrence 		if (flags & ZF_NET_EXCL)
2444*efd4c9b6SSteve Lawrence 			iptype = ZS_IPTYPE_EXCLUSIVE;
2445*efd4c9b6SSteve Lawrence 		else
2446*efd4c9b6SSteve Lawrence 			iptype = ZS_IPTYPE_SHARED;
2447*efd4c9b6SSteve Lawrence 
2448*efd4c9b6SSteve Lawrence 		zsd_get_zone_pool_pset(ctl, zone, poolname, sizeof (poolname),
2449*efd4c9b6SSteve Lawrence 		    psetname, sizeof (psetname), &cputype);
2450*efd4c9b6SSteve Lawrence 
2451*efd4c9b6SSteve Lawrence 		if (zsd_get_zone_caps(ctl, zone, &cpu_shares, &cpu_cap,
2452*efd4c9b6SSteve Lawrence 		    &ram_cap, &locked_cap, &vm_cap, &processes_cap, &processes,
2453*efd4c9b6SSteve Lawrence 		    &lwps_cap, &lwps, &shm_cap, &shm, &shmids_cap, &shmids,
2454*efd4c9b6SSteve Lawrence 		    &semids_cap, &semids, &msgids_cap, &msgids, &lofi_cap,
2455*efd4c9b6SSteve Lawrence 		    &lofi, &sched) != 0)
2456*efd4c9b6SSteve Lawrence 			continue;
2457*efd4c9b6SSteve Lawrence 
2458*efd4c9b6SSteve Lawrence 		zsd_mark_zone_found(ctl, zone, cpu_shares, cpu_cap, ram_cap,
2459*efd4c9b6SSteve Lawrence 		    locked_cap, vm_cap, processes_cap, processes, lwps_cap,
2460*efd4c9b6SSteve Lawrence 		    lwps, shm_cap, shm, shmids_cap, shmids, semids_cap,
2461*efd4c9b6SSteve Lawrence 		    semids, msgids_cap, msgids, lofi_cap, lofi, poolname,
2462*efd4c9b6SSteve Lawrence 		    psetname, sched, cputype, iptype);
2463*efd4c9b6SSteve Lawrence 	}
2464*efd4c9b6SSteve Lawrence }
2465*efd4c9b6SSteve Lawrence 
2466*efd4c9b6SSteve Lawrence /* Fetch the details of a process from its psinfo_t */
2467*efd4c9b6SSteve Lawrence static void
2468*efd4c9b6SSteve Lawrence zsd_get_proc_info(zsd_ctl_t *ctl, psinfo_t *psinfo, psetid_t *psetid,
2469*efd4c9b6SSteve Lawrence     psetid_t *prev_psetid, zoneid_t *zoneid, zoneid_t *prev_zoneid,
2470*efd4c9b6SSteve Lawrence     timestruc_t *delta, uint_t *sched)
2471*efd4c9b6SSteve Lawrence {
2472*efd4c9b6SSteve Lawrence 	timestruc_t d;
2473*efd4c9b6SSteve Lawrence 	zsd_proc_t *proc;
2474*efd4c9b6SSteve Lawrence 
2475*efd4c9b6SSteve Lawrence 	/* Get cached data for proc */
2476*efd4c9b6SSteve Lawrence 	proc = &(ctl->zsctl_proc_array[psinfo->pr_pid]);
2477*efd4c9b6SSteve Lawrence 	*psetid = psinfo->pr_lwp.pr_bindpset;
2478*efd4c9b6SSteve Lawrence 
2479*efd4c9b6SSteve Lawrence 	if (proc->zspr_psetid == ZS_PSET_ERROR)
2480*efd4c9b6SSteve Lawrence 		*prev_psetid = *psetid;
2481*efd4c9b6SSteve Lawrence 	else
2482*efd4c9b6SSteve Lawrence 		*prev_psetid = proc->zspr_psetid;
2483*efd4c9b6SSteve Lawrence 
2484*efd4c9b6SSteve Lawrence 	*zoneid = psinfo->pr_zoneid;
2485*efd4c9b6SSteve Lawrence 	if (proc->zspr_zoneid == -1)
2486*efd4c9b6SSteve Lawrence 		*prev_zoneid = *zoneid;
2487*efd4c9b6SSteve Lawrence 	else
2488*efd4c9b6SSteve Lawrence 		*prev_zoneid = proc->zspr_zoneid;
2489*efd4c9b6SSteve Lawrence 
2490*efd4c9b6SSteve Lawrence 	TIMESTRUC_DELTA(d, psinfo->pr_time, proc->zspr_usage);
2491*efd4c9b6SSteve Lawrence 	*delta = d;
2492*efd4c9b6SSteve Lawrence 
2493*efd4c9b6SSteve Lawrence 	*sched = zsd_schedname2int(psinfo->pr_lwp.pr_clname,
2494*efd4c9b6SSteve Lawrence 	    psinfo->pr_lwp.pr_pri);
2495*efd4c9b6SSteve Lawrence 
2496*efd4c9b6SSteve Lawrence 	/* Update cached data for proc */
2497*efd4c9b6SSteve Lawrence 	proc->zspr_psetid = psinfo->pr_lwp.pr_bindpset;
2498*efd4c9b6SSteve Lawrence 	proc->zspr_zoneid = psinfo->pr_zoneid;
2499*efd4c9b6SSteve Lawrence 	proc->zspr_sched = *sched;
2500*efd4c9b6SSteve Lawrence 	proc->zspr_usage.tv_sec = psinfo->pr_time.tv_sec;
2501*efd4c9b6SSteve Lawrence 	proc->zspr_usage.tv_nsec = psinfo->pr_time.tv_nsec;
2502*efd4c9b6SSteve Lawrence 	proc->zspr_ppid = psinfo->pr_ppid;
2503*efd4c9b6SSteve Lawrence }
2504*efd4c9b6SSteve Lawrence 
2505*efd4c9b6SSteve Lawrence /*
2506*efd4c9b6SSteve Lawrence  * Reset the known cpu usage of a process. This is done after a process
2507*efd4c9b6SSteve Lawrence  * exits so that if the pid is recycled, data from its previous life is
2508*efd4c9b6SSteve Lawrence  * not reused
2509*efd4c9b6SSteve Lawrence  */
2510*efd4c9b6SSteve Lawrence static void
2511*efd4c9b6SSteve Lawrence zsd_flush_proc_info(zsd_proc_t *proc)
2512*efd4c9b6SSteve Lawrence {
2513*efd4c9b6SSteve Lawrence 	proc->zspr_usage.tv_sec = 0;
2514*efd4c9b6SSteve Lawrence 	proc->zspr_usage.tv_nsec = 0;
2515*efd4c9b6SSteve Lawrence }
2516*efd4c9b6SSteve Lawrence 
2517*efd4c9b6SSteve Lawrence /*
2518*efd4c9b6SSteve Lawrence  * Open the current extended accounting file.  On initialization, open the
2519*efd4c9b6SSteve Lawrence  * file as the current file to be used.  Otherwise, open the file as the
2520*efd4c9b6SSteve Lawrence  * next file to use of the current file reaches EOF.
2521*efd4c9b6SSteve Lawrence  */
2522*efd4c9b6SSteve Lawrence static int
2523*efd4c9b6SSteve Lawrence zsd_open_exacct(zsd_ctl_t *ctl, boolean_t init)
2524*efd4c9b6SSteve Lawrence {
2525*efd4c9b6SSteve Lawrence 	int ret, oret, state, trys = 0, flags;
2526*efd4c9b6SSteve Lawrence 	int *fd, *open;
2527*efd4c9b6SSteve Lawrence 	ea_file_t *eaf;
2528*efd4c9b6SSteve Lawrence 	struct stat64 *stat;
2529*efd4c9b6SSteve Lawrence 	char path[MAXPATHLEN];
2530*efd4c9b6SSteve Lawrence 
2531*efd4c9b6SSteve Lawrence 	/*
2532*efd4c9b6SSteve Lawrence 	 * The accounting file is first opened at the tail.  Following
2533*efd4c9b6SSteve Lawrence 	 * opens to new accounting files are opened at the head.
2534*efd4c9b6SSteve Lawrence 	 */
2535*efd4c9b6SSteve Lawrence 	if (init == B_TRUE) {
2536*efd4c9b6SSteve Lawrence 		flags = EO_NO_VALID_HDR | EO_TAIL;
2537*efd4c9b6SSteve Lawrence 		fd = &ctl->zsctl_proc_fd;
2538*efd4c9b6SSteve Lawrence 		eaf = &ctl->zsctl_proc_eaf;
2539*efd4c9b6SSteve Lawrence 		stat = &ctl->zsctl_proc_stat;
2540*efd4c9b6SSteve Lawrence 		open = &ctl->zsctl_proc_open;
2541*efd4c9b6SSteve Lawrence 	} else {
2542*efd4c9b6SSteve Lawrence 		flags = EO_NO_VALID_HDR | EO_HEAD;
2543*efd4c9b6SSteve Lawrence 		fd = &ctl->zsctl_proc_fd_next;
2544*efd4c9b6SSteve Lawrence 		eaf = &ctl->zsctl_proc_eaf_next;
2545*efd4c9b6SSteve Lawrence 		stat = &ctl->zsctl_proc_stat_next;
2546*efd4c9b6SSteve Lawrence 		open = &ctl->zsctl_proc_open_next;
2547*efd4c9b6SSteve Lawrence 	}
2548*efd4c9b6SSteve Lawrence 
2549*efd4c9b6SSteve Lawrence 	*fd = -1;
2550*efd4c9b6SSteve Lawrence 	*open = 0;
2551*efd4c9b6SSteve Lawrence retry:
2552*efd4c9b6SSteve Lawrence 	/* open accounting files for cpu consumption */
2553*efd4c9b6SSteve Lawrence 	ret = acctctl(AC_STATE_GET | AC_PROC, &state, sizeof (state));
2554*efd4c9b6SSteve Lawrence 	if (ret != 0) {
2555*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to get process accounting state"));
2556*efd4c9b6SSteve Lawrence 		goto err;
2557*efd4c9b6SSteve Lawrence 	}
2558*efd4c9b6SSteve Lawrence 	if (state != AC_ON) {
2559*efd4c9b6SSteve Lawrence 		if (trys > 0) {
2560*efd4c9b6SSteve Lawrence 			zsd_warn(gettext(
2561*efd4c9b6SSteve Lawrence 			    "Unable to enable process accounting"));
2562*efd4c9b6SSteve Lawrence 			goto err;
2563*efd4c9b6SSteve Lawrence 		}
2564*efd4c9b6SSteve Lawrence 		(void) zsd_enable_cpu_stats();
2565*efd4c9b6SSteve Lawrence 		trys++;
2566*efd4c9b6SSteve Lawrence 		goto retry;
2567*efd4c9b6SSteve Lawrence 	}
2568*efd4c9b6SSteve Lawrence 
2569*efd4c9b6SSteve Lawrence 	ret = acctctl(AC_FILE_GET | AC_PROC, path, sizeof (path));
2570*efd4c9b6SSteve Lawrence 	if (ret != 0) {
2571*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to get process accounting file"));
2572*efd4c9b6SSteve Lawrence 		goto err;
2573*efd4c9b6SSteve Lawrence 	}
2574*efd4c9b6SSteve Lawrence 
2575*efd4c9b6SSteve Lawrence 	if ((*fd = open64(path, O_RDONLY, 0)) >= 0 &&
2576*efd4c9b6SSteve Lawrence 	    (oret = ea_fdopen(eaf, *fd, NULL, flags, O_RDONLY)) == 0)
2577*efd4c9b6SSteve Lawrence 		ret = fstat64(*fd, stat);
2578*efd4c9b6SSteve Lawrence 
2579*efd4c9b6SSteve Lawrence 	if (*fd < 0 || oret < 0 || ret < 0) {
2580*efd4c9b6SSteve Lawrence 		struct timespec ts;
2581*efd4c9b6SSteve Lawrence 
2582*efd4c9b6SSteve Lawrence 		/*
2583*efd4c9b6SSteve Lawrence 		 * It is possible the accounting file is momentarily unavailable
2584*efd4c9b6SSteve Lawrence 		 * because it is being rolled.  Try for up to half a second.
2585*efd4c9b6SSteve Lawrence 		 *
2586*efd4c9b6SSteve Lawrence 		 * If failure to open accounting file persists, give up.
2587*efd4c9b6SSteve Lawrence 		 */
2588*efd4c9b6SSteve Lawrence 		if (oret == 0)
2589*efd4c9b6SSteve Lawrence 			(void) ea_close(eaf);
2590*efd4c9b6SSteve Lawrence 		else if (*fd >= 0)
2591*efd4c9b6SSteve Lawrence 			(void) close(*fd);
2592*efd4c9b6SSteve Lawrence 		if (trys > 500) {
2593*efd4c9b6SSteve Lawrence 			zsd_warn(gettext(
2594*efd4c9b6SSteve Lawrence 			    "Unable to open process accounting file"));
2595*efd4c9b6SSteve Lawrence 			goto err;
2596*efd4c9b6SSteve Lawrence 		}
2597*efd4c9b6SSteve Lawrence 		/* wait one millisecond */
2598*efd4c9b6SSteve Lawrence 		ts.tv_sec = 0;
2599*efd4c9b6SSteve Lawrence 		ts.tv_nsec = NANOSEC / 1000;
2600*efd4c9b6SSteve Lawrence 		(void) nanosleep(&ts, NULL);
2601*efd4c9b6SSteve Lawrence 		goto retry;
2602*efd4c9b6SSteve Lawrence 	}
2603*efd4c9b6SSteve Lawrence 	*open = 1;
2604*efd4c9b6SSteve Lawrence 	return (0);
2605*efd4c9b6SSteve Lawrence err:
2606*efd4c9b6SSteve Lawrence 	if (*fd >= 0)
2607*efd4c9b6SSteve Lawrence 		(void) close(*fd);
2608*efd4c9b6SSteve Lawrence 	*open = 0;
2609*efd4c9b6SSteve Lawrence 	*fd = -1;
2610*efd4c9b6SSteve Lawrence 	return (-1);
2611*efd4c9b6SSteve Lawrence }
2612*efd4c9b6SSteve Lawrence 
2613*efd4c9b6SSteve Lawrence /*
2614*efd4c9b6SSteve Lawrence  * Walk /proc and charge each process to its zone and processor set.
2615*efd4c9b6SSteve Lawrence  * Then read exacct data for exited processes, and charge them as well.
2616*efd4c9b6SSteve Lawrence  */
2617*efd4c9b6SSteve Lawrence static void
2618*efd4c9b6SSteve Lawrence zsd_refresh_procs(zsd_ctl_t *ctl, boolean_t init)
2619*efd4c9b6SSteve Lawrence {
2620*efd4c9b6SSteve Lawrence 	DIR *dir;
2621*efd4c9b6SSteve Lawrence 	struct dirent *dent;
2622*efd4c9b6SSteve Lawrence 	psinfo_t psinfo;
2623*efd4c9b6SSteve Lawrence 	int fd, ret;
2624*efd4c9b6SSteve Lawrence 	zsd_proc_t *proc, *pproc, *tmp, *next;
2625*efd4c9b6SSteve Lawrence 	list_t pplist, plist;
2626*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone, *prev_zone;
2627*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset, *prev_pset;
2628*efd4c9b6SSteve Lawrence 	psetid_t psetid, prev_psetid;
2629*efd4c9b6SSteve Lawrence 	zoneid_t zoneid, prev_zoneid;
2630*efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *usage, *prev_usage;
2631*efd4c9b6SSteve Lawrence 	char path[MAXPATHLEN];
2632*efd4c9b6SSteve Lawrence 
2633*efd4c9b6SSteve Lawrence 	ea_object_t object;
2634*efd4c9b6SSteve Lawrence 	ea_object_t pobject;
2635*efd4c9b6SSteve Lawrence 	boolean_t hrtime_expired = B_FALSE;
2636*efd4c9b6SSteve Lawrence 	struct timeval interval_end;
2637*efd4c9b6SSteve Lawrence 
2638*efd4c9b6SSteve Lawrence 	timestruc_t delta, d1, d2;
2639*efd4c9b6SSteve Lawrence 	uint_t sched = 0;
2640*efd4c9b6SSteve Lawrence 
2641*efd4c9b6SSteve Lawrence 	/*
2642*efd4c9b6SSteve Lawrence 	 * Get the current accounting file.  The current accounting file
2643*efd4c9b6SSteve Lawrence 	 * may be different than the file in use, as the accounting file
2644*efd4c9b6SSteve Lawrence 	 * may have been rolled, or manually changed by an admin.
2645*efd4c9b6SSteve Lawrence 	 */
2646*efd4c9b6SSteve Lawrence 	ret = zsd_open_exacct(ctl, init);
2647*efd4c9b6SSteve Lawrence 	if (ret != 0) {
2648*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to track process accounting"));
2649*efd4c9b6SSteve Lawrence 		return;
2650*efd4c9b6SSteve Lawrence 	}
2651*efd4c9b6SSteve Lawrence 
2652*efd4c9b6SSteve Lawrence 	/*
2653*efd4c9b6SSteve Lawrence 	 * Mark the current time as the interval end time.  Don't track
2654*efd4c9b6SSteve Lawrence 	 * processes that exit after this time.
2655*efd4c9b6SSteve Lawrence 	 */
2656*efd4c9b6SSteve Lawrence 	(void) gettimeofday(&interval_end, NULL);
2657*efd4c9b6SSteve Lawrence 
2658*efd4c9b6SSteve Lawrence 	dir = opendir("/proc");
2659*efd4c9b6SSteve Lawrence 	if (dir == NULL) {
2660*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to open /proc"));
2661*efd4c9b6SSteve Lawrence 		return;
2662*efd4c9b6SSteve Lawrence 	}
2663*efd4c9b6SSteve Lawrence 
2664*efd4c9b6SSteve Lawrence 	dent = ctl->zsctl_procfs_dent;
2665*efd4c9b6SSteve Lawrence 
2666*efd4c9b6SSteve Lawrence 	(void) memset(dent, 0, ctl->zsctl_procfs_dent_size);
2667*efd4c9b6SSteve Lawrence 
2668*efd4c9b6SSteve Lawrence 	/* Walk all processes and compute each zone's usage on each pset. */
2669*efd4c9b6SSteve Lawrence 	while (readdir_r(dir, dent) != 0) {
2670*efd4c9b6SSteve Lawrence 
2671*efd4c9b6SSteve Lawrence 		if (strcmp(dent->d_name, ".") == 0 ||
2672*efd4c9b6SSteve Lawrence 		    strcmp(dent->d_name, "..") == 0)
2673*efd4c9b6SSteve Lawrence 			continue;
2674*efd4c9b6SSteve Lawrence 
2675*efd4c9b6SSteve Lawrence 		(void) snprintf(path, sizeof (path), "/proc/%s/psinfo",
2676*efd4c9b6SSteve Lawrence 		    dent->d_name);
2677*efd4c9b6SSteve Lawrence 
2678*efd4c9b6SSteve Lawrence 		fd = open(path, O_RDONLY);
2679*efd4c9b6SSteve Lawrence 		if (fd < 0)
2680*efd4c9b6SSteve Lawrence 			continue;
2681*efd4c9b6SSteve Lawrence 
2682*efd4c9b6SSteve Lawrence 		if (read(fd, &psinfo, sizeof (psinfo)) != sizeof (psinfo)) {
2683*efd4c9b6SSteve Lawrence 			(void) close(fd);
2684*efd4c9b6SSteve Lawrence 			continue;
2685*efd4c9b6SSteve Lawrence 		}
2686*efd4c9b6SSteve Lawrence 		(void) close(fd);
2687*efd4c9b6SSteve Lawrence 
2688*efd4c9b6SSteve Lawrence 		zsd_get_proc_info(ctl, &psinfo, &psetid, &prev_psetid,
2689*efd4c9b6SSteve Lawrence 		    &zoneid, &prev_zoneid, &delta, &sched);
2690*efd4c9b6SSteve Lawrence 
2691*efd4c9b6SSteve Lawrence 		d1.tv_sec = delta.tv_sec / 2;
2692*efd4c9b6SSteve Lawrence 		d1.tv_nsec = delta.tv_nsec / 2;
2693*efd4c9b6SSteve Lawrence 		d2.tv_sec = (delta.tv_sec / 2) + (delta.tv_sec % 2);
2694*efd4c9b6SSteve Lawrence 		d2.tv_nsec = (delta.tv_nsec / 2) + (delta.tv_nsec % 2);
2695*efd4c9b6SSteve Lawrence 
2696*efd4c9b6SSteve Lawrence 		/* Get the zone and pset this process is running in */
2697*efd4c9b6SSteve Lawrence 		zone = zsd_lookup_zone_byid(ctl, zoneid);
2698*efd4c9b6SSteve Lawrence 		if (zone == NULL)
2699*efd4c9b6SSteve Lawrence 			continue;
2700*efd4c9b6SSteve Lawrence 		pset = zsd_lookup_pset_byid(ctl, psetid);
2701*efd4c9b6SSteve Lawrence 		if (pset == NULL)
2702*efd4c9b6SSteve Lawrence 			continue;
2703*efd4c9b6SSteve Lawrence 		usage = zsd_lookup_insert_usage(ctl, pset, zone);
2704*efd4c9b6SSteve Lawrence 		if (usage == NULL)
2705*efd4c9b6SSteve Lawrence 			continue;
2706*efd4c9b6SSteve Lawrence 
2707*efd4c9b6SSteve Lawrence 		/*
2708*efd4c9b6SSteve Lawrence 		 * Get the usage of the previous zone and pset if they were
2709*efd4c9b6SSteve Lawrence 		 * different.
2710*efd4c9b6SSteve Lawrence 		 */
2711*efd4c9b6SSteve Lawrence 		if (zoneid != prev_zoneid)
2712*efd4c9b6SSteve Lawrence 			prev_zone = zsd_lookup_zone_byid(ctl, prev_zoneid);
2713*efd4c9b6SSteve Lawrence 		else
2714*efd4c9b6SSteve Lawrence 			prev_zone = NULL;
2715*efd4c9b6SSteve Lawrence 
2716*efd4c9b6SSteve Lawrence 		if (psetid != prev_psetid)
2717*efd4c9b6SSteve Lawrence 			prev_pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2718*efd4c9b6SSteve Lawrence 		else
2719*efd4c9b6SSteve Lawrence 			prev_pset = NULL;
2720*efd4c9b6SSteve Lawrence 
2721*efd4c9b6SSteve Lawrence 		prev_usage = NULL;
2722*efd4c9b6SSteve Lawrence 		if (prev_zone != NULL || prev_pset != NULL) {
2723*efd4c9b6SSteve Lawrence 			if (prev_zone == NULL)
2724*efd4c9b6SSteve Lawrence 				prev_zone = zone;
2725*efd4c9b6SSteve Lawrence 			if (prev_pset == NULL)
2726*efd4c9b6SSteve Lawrence 				prev_pset = pset;
2727*efd4c9b6SSteve Lawrence 
2728*efd4c9b6SSteve Lawrence 			prev_usage = zsd_lookup_insert_usage(ctl, prev_pset,
2729*efd4c9b6SSteve Lawrence 			    prev_zone);
2730*efd4c9b6SSteve Lawrence 		}
2731*efd4c9b6SSteve Lawrence 
2732*efd4c9b6SSteve Lawrence 		/* Update the usage with the processes info */
2733*efd4c9b6SSteve Lawrence 		if (prev_usage == NULL) {
2734*efd4c9b6SSteve Lawrence 			zsd_mark_pset_usage_found(usage, sched);
2735*efd4c9b6SSteve Lawrence 		} else {
2736*efd4c9b6SSteve Lawrence 			zsd_mark_pset_usage_found(usage, sched);
2737*efd4c9b6SSteve Lawrence 			zsd_mark_pset_usage_found(prev_usage, sched);
2738*efd4c9b6SSteve Lawrence 		}
2739*efd4c9b6SSteve Lawrence 
2740*efd4c9b6SSteve Lawrence 		/*
2741*efd4c9b6SSteve Lawrence 		 * First time around is just to get a starting point.  All
2742*efd4c9b6SSteve Lawrence 		 * usages will be zero.
2743*efd4c9b6SSteve Lawrence 		 */
2744*efd4c9b6SSteve Lawrence 		if (init == B_TRUE)
2745*efd4c9b6SSteve Lawrence 			continue;
2746*efd4c9b6SSteve Lawrence 
2747*efd4c9b6SSteve Lawrence 		if (prev_usage == NULL) {
2748*efd4c9b6SSteve Lawrence 			zsd_add_usage(ctl, usage, &delta);
2749*efd4c9b6SSteve Lawrence 		} else {
2750*efd4c9b6SSteve Lawrence 			zsd_add_usage(ctl, usage, &d1);
2751*efd4c9b6SSteve Lawrence 			zsd_add_usage(ctl, prev_usage, &d2);
2752*efd4c9b6SSteve Lawrence 		}
2753*efd4c9b6SSteve Lawrence 	}
2754*efd4c9b6SSteve Lawrence 	(void) closedir(dir);
2755*efd4c9b6SSteve Lawrence 
2756*efd4c9b6SSteve Lawrence 	/*
2757*efd4c9b6SSteve Lawrence 	 * No need to collect exited proc data on initialization.  Just
2758*efd4c9b6SSteve Lawrence 	 * caching the usage of the known processes to get a zero starting
2759*efd4c9b6SSteve Lawrence 	 * point.
2760*efd4c9b6SSteve Lawrence 	 */
2761*efd4c9b6SSteve Lawrence 	if (init == B_TRUE)
2762*efd4c9b6SSteve Lawrence 		return;
2763*efd4c9b6SSteve Lawrence 
2764*efd4c9b6SSteve Lawrence 	/*
2765*efd4c9b6SSteve Lawrence 	 * Add accounting records to account for processes which have
2766*efd4c9b6SSteve Lawrence 	 * exited.
2767*efd4c9b6SSteve Lawrence 	 */
2768*efd4c9b6SSteve Lawrence 	list_create(&plist, sizeof (zsd_proc_t),
2769*efd4c9b6SSteve Lawrence 	    offsetof(zsd_proc_t, zspr_next));
2770*efd4c9b6SSteve Lawrence 	list_create(&pplist, sizeof (zsd_proc_t),
2771*efd4c9b6SSteve Lawrence 	    offsetof(zsd_proc_t, zspr_next));
2772*efd4c9b6SSteve Lawrence 
2773*efd4c9b6SSteve Lawrence 	for (;;) {
2774*efd4c9b6SSteve Lawrence 		pid_t pid;
2775*efd4c9b6SSteve Lawrence 		pid_t ppid;
2776*efd4c9b6SSteve Lawrence 		timestruc_t user, sys, proc_usage;
2777*efd4c9b6SSteve Lawrence 		timestruc_t finish;
2778*efd4c9b6SSteve Lawrence 		int numfound = 0;
2779*efd4c9b6SSteve Lawrence 
2780*efd4c9b6SSteve Lawrence 		bzero(&object, sizeof (object));
2781*efd4c9b6SSteve Lawrence 		proc = NULL;
2782*efd4c9b6SSteve Lawrence 		zone = NULL;
2783*efd4c9b6SSteve Lawrence 		pset = NULL;
2784*efd4c9b6SSteve Lawrence 		usage = NULL;
2785*efd4c9b6SSteve Lawrence 		ret = ea_get_object(&ctl->zsctl_proc_eaf, &object);
2786*efd4c9b6SSteve Lawrence 		if (ret == EO_ERROR) {
2787*efd4c9b6SSteve Lawrence 			if (ea_error() == EXR_EOF) {
2788*efd4c9b6SSteve Lawrence 
2789*efd4c9b6SSteve Lawrence 				struct stat64 *stat;
2790*efd4c9b6SSteve Lawrence 				struct stat64 *stat_next;
2791*efd4c9b6SSteve Lawrence 
2792*efd4c9b6SSteve Lawrence 				/*
2793*efd4c9b6SSteve Lawrence 				 * See if the next accounting file is the
2794*efd4c9b6SSteve Lawrence 				 * same as the current accounting file.
2795*efd4c9b6SSteve Lawrence 				 */
2796*efd4c9b6SSteve Lawrence 				stat = &(ctl->zsctl_proc_stat);
2797*efd4c9b6SSteve Lawrence 				stat_next = &(ctl->zsctl_proc_stat_next);
2798*efd4c9b6SSteve Lawrence 				if (stat->st_ino == stat_next->st_ino &&
2799*efd4c9b6SSteve Lawrence 				    stat->st_dev == stat_next->st_dev) {
2800*efd4c9b6SSteve Lawrence 					/*
2801*efd4c9b6SSteve Lawrence 					 * End of current accounting file is
2802*efd4c9b6SSteve Lawrence 					 * reached, so finished.  Clear EOF
2803*efd4c9b6SSteve Lawrence 					 * bit for next time around.
2804*efd4c9b6SSteve Lawrence 					 */
2805*efd4c9b6SSteve Lawrence 					ea_clear(&ctl->zsctl_proc_eaf);
2806*efd4c9b6SSteve Lawrence 					break;
2807*efd4c9b6SSteve Lawrence 				} else {
2808*efd4c9b6SSteve Lawrence 					/*
2809*efd4c9b6SSteve Lawrence 					 * Accounting file has changed.  Move
2810*efd4c9b6SSteve Lawrence 					 * to current accounting file.
2811*efd4c9b6SSteve Lawrence 					 */
2812*efd4c9b6SSteve Lawrence 					(void) ea_close(&ctl->zsctl_proc_eaf);
2813*efd4c9b6SSteve Lawrence 
2814*efd4c9b6SSteve Lawrence 					ctl->zsctl_proc_fd =
2815*efd4c9b6SSteve Lawrence 					    ctl->zsctl_proc_fd_next;
2816*efd4c9b6SSteve Lawrence 					ctl->zsctl_proc_eaf =
2817*efd4c9b6SSteve Lawrence 					    ctl->zsctl_proc_eaf_next;
2818*efd4c9b6SSteve Lawrence 					ctl->zsctl_proc_stat =
2819*efd4c9b6SSteve Lawrence 					    ctl->zsctl_proc_stat_next;
2820*efd4c9b6SSteve Lawrence 
2821*efd4c9b6SSteve Lawrence 					ctl->zsctl_proc_fd_next = -1;
2822*efd4c9b6SSteve Lawrence 					ctl->zsctl_proc_open_next = 0;
2823*efd4c9b6SSteve Lawrence 					continue;
2824*efd4c9b6SSteve Lawrence 				}
2825*efd4c9b6SSteve Lawrence 			} else {
2826*efd4c9b6SSteve Lawrence 				/*
2827*efd4c9b6SSteve Lawrence 				 * Other accounting error.  Give up on
2828*efd4c9b6SSteve Lawrence 				 * accounting.
2829*efd4c9b6SSteve Lawrence 				 */
2830*efd4c9b6SSteve Lawrence 				goto ea_err;
2831*efd4c9b6SSteve Lawrence 			}
2832*efd4c9b6SSteve Lawrence 		}
2833*efd4c9b6SSteve Lawrence 		/* Skip if not a process group */
2834*efd4c9b6SSteve Lawrence 		if ((object.eo_catalog & EXT_TYPE_MASK) != EXT_GROUP ||
2835*efd4c9b6SSteve Lawrence 		    (object.eo_catalog & EXD_DATA_MASK) != EXD_GROUP_PROC) {
2836*efd4c9b6SSteve Lawrence 			(void) ea_free_item(&object, EUP_ALLOC);
2837*efd4c9b6SSteve Lawrence 			continue;
2838*efd4c9b6SSteve Lawrence 		}
2839*efd4c9b6SSteve Lawrence 
2840*efd4c9b6SSteve Lawrence 		/* The process group entry should be complete */
2841*efd4c9b6SSteve Lawrence 		while (numfound < 9) {
2842*efd4c9b6SSteve Lawrence 			bzero(&pobject, sizeof (pobject));
2843*efd4c9b6SSteve Lawrence 			ret = ea_get_object(&ctl->zsctl_proc_eaf,
2844*efd4c9b6SSteve Lawrence 			    &pobject);
2845*efd4c9b6SSteve Lawrence 			if (ret < 0) {
2846*efd4c9b6SSteve Lawrence 				(void) ea_free_item(&object, EUP_ALLOC);
2847*efd4c9b6SSteve Lawrence 				zsd_warn(
2848*efd4c9b6SSteve Lawrence 				    "unable to get process accounting data");
2849*efd4c9b6SSteve Lawrence 				goto ea_err;
2850*efd4c9b6SSteve Lawrence 			}
2851*efd4c9b6SSteve Lawrence 			/* Next entries should be process data */
2852*efd4c9b6SSteve Lawrence 			if ((pobject.eo_catalog & EXT_TYPE_MASK) ==
2853*efd4c9b6SSteve Lawrence 			    EXT_GROUP) {
2854*efd4c9b6SSteve Lawrence 				(void) ea_free_item(&object, EUP_ALLOC);
2855*efd4c9b6SSteve Lawrence 				(void) ea_free_item(&pobject, EUP_ALLOC);
2856*efd4c9b6SSteve Lawrence 				zsd_warn(
2857*efd4c9b6SSteve Lawrence 				    "process data of wrong type");
2858*efd4c9b6SSteve Lawrence 				goto ea_err;
2859*efd4c9b6SSteve Lawrence 			}
2860*efd4c9b6SSteve Lawrence 			switch (pobject.eo_catalog & EXD_DATA_MASK) {
2861*efd4c9b6SSteve Lawrence 			case EXD_PROC_PID:
2862*efd4c9b6SSteve Lawrence 				pid = pobject.eo_item.ei_uint32;
2863*efd4c9b6SSteve Lawrence 				proc = &(ctl->zsctl_proc_array[pid]);
2864*efd4c9b6SSteve Lawrence 				/*
2865*efd4c9b6SSteve Lawrence 				 * This process should not be currently in
2866*efd4c9b6SSteve Lawrence 				 * the list of processes to process.
2867*efd4c9b6SSteve Lawrence 				 */
2868*efd4c9b6SSteve Lawrence 				assert(!list_link_active(&proc->zspr_next));
2869*efd4c9b6SSteve Lawrence 				numfound++;
2870*efd4c9b6SSteve Lawrence 				break;
2871*efd4c9b6SSteve Lawrence 			case EXD_PROC_ANCPID:
2872*efd4c9b6SSteve Lawrence 				ppid = pobject.eo_item.ei_uint32;
2873*efd4c9b6SSteve Lawrence 				pproc = &(ctl->zsctl_proc_array[ppid]);
2874*efd4c9b6SSteve Lawrence 				numfound++;
2875*efd4c9b6SSteve Lawrence 				break;
2876*efd4c9b6SSteve Lawrence 			case EXD_PROC_ZONENAME:
2877*efd4c9b6SSteve Lawrence 				zone = zsd_lookup_zone(ctl,
2878*efd4c9b6SSteve Lawrence 				    pobject.eo_item.ei_string, -1);
2879*efd4c9b6SSteve Lawrence 				numfound++;
2880*efd4c9b6SSteve Lawrence 				break;
2881*efd4c9b6SSteve Lawrence 			case EXD_PROC_CPU_USER_SEC:
2882*efd4c9b6SSteve Lawrence 				user.tv_sec =
2883*efd4c9b6SSteve Lawrence 				    pobject.eo_item.ei_uint64;
2884*efd4c9b6SSteve Lawrence 				numfound++;
2885*efd4c9b6SSteve Lawrence 				break;
2886*efd4c9b6SSteve Lawrence 			case EXD_PROC_CPU_USER_NSEC:
2887*efd4c9b6SSteve Lawrence 				user.tv_nsec =
2888*efd4c9b6SSteve Lawrence 				    pobject.eo_item.ei_uint64;
2889*efd4c9b6SSteve Lawrence 				numfound++;
2890*efd4c9b6SSteve Lawrence 				break;
2891*efd4c9b6SSteve Lawrence 			case EXD_PROC_CPU_SYS_SEC:
2892*efd4c9b6SSteve Lawrence 				sys.tv_sec =
2893*efd4c9b6SSteve Lawrence 				    pobject.eo_item.ei_uint64;
2894*efd4c9b6SSteve Lawrence 				numfound++;
2895*efd4c9b6SSteve Lawrence 				break;
2896*efd4c9b6SSteve Lawrence 			case EXD_PROC_CPU_SYS_NSEC:
2897*efd4c9b6SSteve Lawrence 				sys.tv_nsec =
2898*efd4c9b6SSteve Lawrence 				    pobject.eo_item.ei_uint64;
2899*efd4c9b6SSteve Lawrence 				numfound++;
2900*efd4c9b6SSteve Lawrence 				break;
2901*efd4c9b6SSteve Lawrence 			case EXD_PROC_FINISH_SEC:
2902*efd4c9b6SSteve Lawrence 				finish.tv_sec =
2903*efd4c9b6SSteve Lawrence 				    pobject.eo_item.ei_uint64;
2904*efd4c9b6SSteve Lawrence 				numfound++;
2905*efd4c9b6SSteve Lawrence 				break;
2906*efd4c9b6SSteve Lawrence 			case EXD_PROC_FINISH_NSEC:
2907*efd4c9b6SSteve Lawrence 				finish.tv_nsec =
2908*efd4c9b6SSteve Lawrence 				    pobject.eo_item.ei_uint64;
2909*efd4c9b6SSteve Lawrence 				numfound++;
2910*efd4c9b6SSteve Lawrence 				break;
2911*efd4c9b6SSteve Lawrence 			}
2912*efd4c9b6SSteve Lawrence 			(void) ea_free_item(&pobject, EUP_ALLOC);
2913*efd4c9b6SSteve Lawrence 		}
2914*efd4c9b6SSteve Lawrence 		(void) ea_free_item(&object, EUP_ALLOC);
2915*efd4c9b6SSteve Lawrence 		if (numfound != 9) {
2916*efd4c9b6SSteve Lawrence 			zsd_warn(gettext(
2917*efd4c9b6SSteve Lawrence 			    "Malformed process accounting entry found"));
2918*efd4c9b6SSteve Lawrence 			goto proc_done;
2919*efd4c9b6SSteve Lawrence 		}
2920*efd4c9b6SSteve Lawrence 
2921*efd4c9b6SSteve Lawrence 		if (finish.tv_sec > interval_end.tv_sec ||
2922*efd4c9b6SSteve Lawrence 		    (finish.tv_sec == interval_end.tv_sec &&
2923*efd4c9b6SSteve Lawrence 		    finish.tv_nsec > (interval_end.tv_usec * 1000)))
2924*efd4c9b6SSteve Lawrence 			hrtime_expired = B_TRUE;
2925*efd4c9b6SSteve Lawrence 
2926*efd4c9b6SSteve Lawrence 		/*
2927*efd4c9b6SSteve Lawrence 		 * Try to identify the zone and pset to which this
2928*efd4c9b6SSteve Lawrence 		 * exited process belongs.
2929*efd4c9b6SSteve Lawrence 		 */
2930*efd4c9b6SSteve Lawrence 		if (zone == NULL)
2931*efd4c9b6SSteve Lawrence 			goto proc_done;
2932*efd4c9b6SSteve Lawrence 
2933*efd4c9b6SSteve Lawrence 		/* Save proc info */
2934*efd4c9b6SSteve Lawrence 		proc->zspr_ppid = ppid;
2935*efd4c9b6SSteve Lawrence 		proc->zspr_zoneid = zone->zsz_id;
2936*efd4c9b6SSteve Lawrence 
2937*efd4c9b6SSteve Lawrence 		prev_psetid = ZS_PSET_ERROR;
2938*efd4c9b6SSteve Lawrence 		sched = 0;
2939*efd4c9b6SSteve Lawrence 
2940*efd4c9b6SSteve Lawrence 		/*
2941*efd4c9b6SSteve Lawrence 		 * The following tries to deduce the processes pset.
2942*efd4c9b6SSteve Lawrence 		 *
2943*efd4c9b6SSteve Lawrence 		 * First choose pset and sched using cached value from the
2944*efd4c9b6SSteve Lawrence 		 * most recent time the process has been seen.
2945*efd4c9b6SSteve Lawrence 		 *
2946*efd4c9b6SSteve Lawrence 		 * pset and sched can change across zone_enter, so make sure
2947*efd4c9b6SSteve Lawrence 		 * most recent sighting of this process was in the same
2948*efd4c9b6SSteve Lawrence 		 * zone before using most recent known value.
2949*efd4c9b6SSteve Lawrence 		 *
2950*efd4c9b6SSteve Lawrence 		 * If there is no known value, use value of processes
2951*efd4c9b6SSteve Lawrence 		 * parent.  If parent is unknown, walk parents until a known
2952*efd4c9b6SSteve Lawrence 		 * parent is found.
2953*efd4c9b6SSteve Lawrence 		 *
2954*efd4c9b6SSteve Lawrence 		 * If no parent in the zone is found, use the zone's default
2955*efd4c9b6SSteve Lawrence 		 * pset and scheduling class.
2956*efd4c9b6SSteve Lawrence 		 */
2957*efd4c9b6SSteve Lawrence 		if (proc->zspr_psetid != ZS_PSET_ERROR) {
2958*efd4c9b6SSteve Lawrence 			prev_psetid = proc->zspr_psetid;
2959*efd4c9b6SSteve Lawrence 			pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2960*efd4c9b6SSteve Lawrence 			sched = proc->zspr_sched;
2961*efd4c9b6SSteve Lawrence 		} else if (pproc->zspr_zoneid == zone->zsz_id &&
2962*efd4c9b6SSteve Lawrence 		    pproc->zspr_psetid != ZS_PSET_ERROR) {
2963*efd4c9b6SSteve Lawrence 			prev_psetid = pproc->zspr_psetid;
2964*efd4c9b6SSteve Lawrence 			pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2965*efd4c9b6SSteve Lawrence 			sched = pproc->zspr_sched;
2966*efd4c9b6SSteve Lawrence 		}
2967*efd4c9b6SSteve Lawrence 
2968*efd4c9b6SSteve Lawrence 		if (pset == NULL) {
2969*efd4c9b6SSteve Lawrence 			/*
2970*efd4c9b6SSteve Lawrence 			 * Process or processes parent has never been seen.
2971*efd4c9b6SSteve Lawrence 			 * Save to deduce a known parent later.
2972*efd4c9b6SSteve Lawrence 			 */
2973*efd4c9b6SSteve Lawrence 			proc_usage = sys;
2974*efd4c9b6SSteve Lawrence 			TIMESTRUC_ADD_TIMESTRUC(proc_usage, user);
2975*efd4c9b6SSteve Lawrence 			TIMESTRUC_DELTA(delta, proc_usage,
2976*efd4c9b6SSteve Lawrence 			    proc->zspr_usage);
2977*efd4c9b6SSteve Lawrence 			proc->zspr_usage = delta;
2978*efd4c9b6SSteve Lawrence 			list_insert_tail(&plist, proc);
2979*efd4c9b6SSteve Lawrence 			continue;
2980*efd4c9b6SSteve Lawrence 		}
2981*efd4c9b6SSteve Lawrence 
2982*efd4c9b6SSteve Lawrence 		/* Add the zone's usage to the pset */
2983*efd4c9b6SSteve Lawrence 		usage = zsd_lookup_insert_usage(ctl, pset, zone);
2984*efd4c9b6SSteve Lawrence 		if (usage == NULL)
2985*efd4c9b6SSteve Lawrence 			goto proc_done;
2986*efd4c9b6SSteve Lawrence 
2987*efd4c9b6SSteve Lawrence 		zsd_mark_pset_usage_found(usage, sched);
2988*efd4c9b6SSteve Lawrence 
2989*efd4c9b6SSteve Lawrence 		/* compute the usage to add for the exited proc */
2990*efd4c9b6SSteve Lawrence 		proc_usage = sys;
2991*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_TIMESTRUC(proc_usage, user);
2992*efd4c9b6SSteve Lawrence 		TIMESTRUC_DELTA(delta, proc_usage,
2993*efd4c9b6SSteve Lawrence 		    proc->zspr_usage);
2994*efd4c9b6SSteve Lawrence 
2995*efd4c9b6SSteve Lawrence 		zsd_add_usage(ctl, usage, &delta);
2996*efd4c9b6SSteve Lawrence proc_done:
2997*efd4c9b6SSteve Lawrence 		zsd_flush_proc_info(proc);
2998*efd4c9b6SSteve Lawrence 
2999*efd4c9b6SSteve Lawrence 		if (hrtime_expired == B_TRUE)
3000*efd4c9b6SSteve Lawrence 			break;
3001*efd4c9b6SSteve Lawrence 	}
3002*efd4c9b6SSteve Lawrence 	/*
3003*efd4c9b6SSteve Lawrence 	 * close next accounting file.
3004*efd4c9b6SSteve Lawrence 	 */
3005*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_proc_open_next) {
3006*efd4c9b6SSteve Lawrence 		(void) ea_close(
3007*efd4c9b6SSteve Lawrence 		    &ctl->zsctl_proc_eaf_next);
3008*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_open_next = 0;
3009*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_fd_next = -1;
3010*efd4c9b6SSteve Lawrence 	}
3011*efd4c9b6SSteve Lawrence 
3012*efd4c9b6SSteve Lawrence 	/* For the remaining processes, use pset and sched of a known parent */
3013*efd4c9b6SSteve Lawrence 	proc = list_head(&plist);
3014*efd4c9b6SSteve Lawrence 	while (proc != NULL) {
3015*efd4c9b6SSteve Lawrence 		next = proc;
3016*efd4c9b6SSteve Lawrence 		for (;;) {
3017*efd4c9b6SSteve Lawrence 			if (next->zspr_ppid == 0 || next->zspr_ppid == -1) {
3018*efd4c9b6SSteve Lawrence 				/*
3019*efd4c9b6SSteve Lawrence 				 * Kernel process, or parent is unknown, skip
3020*efd4c9b6SSteve Lawrence 				 * process, remove from process list.
3021*efd4c9b6SSteve Lawrence 				 */
3022*efd4c9b6SSteve Lawrence 				tmp = proc;
3023*efd4c9b6SSteve Lawrence 				proc = list_next(&plist, proc);
3024*efd4c9b6SSteve Lawrence 				list_link_init(&tmp->zspr_next);
3025*efd4c9b6SSteve Lawrence 				break;
3026*efd4c9b6SSteve Lawrence 			}
3027*efd4c9b6SSteve Lawrence 			pproc = &(ctl->zsctl_proc_array[next->zspr_ppid]);
3028*efd4c9b6SSteve Lawrence 			if (pproc->zspr_zoneid != proc->zspr_zoneid) {
3029*efd4c9b6SSteve Lawrence 				/*
3030*efd4c9b6SSteve Lawrence 				 * Parent in different zone.  Save process and
3031*efd4c9b6SSteve Lawrence 				 * use zone's default pset and sched below
3032*efd4c9b6SSteve Lawrence 				 */
3033*efd4c9b6SSteve Lawrence 				tmp = proc;
3034*efd4c9b6SSteve Lawrence 				proc = list_next(&plist, proc);
3035*efd4c9b6SSteve Lawrence 				list_remove(&plist, tmp);
3036*efd4c9b6SSteve Lawrence 				list_insert_tail(&pplist, tmp);
3037*efd4c9b6SSteve Lawrence 				break;
3038*efd4c9b6SSteve Lawrence 			}
3039*efd4c9b6SSteve Lawrence 			/* Parent has unknown pset, Search parent's parent  */
3040*efd4c9b6SSteve Lawrence 			if (pproc->zspr_psetid == ZS_PSET_ERROR) {
3041*efd4c9b6SSteve Lawrence 				next = pproc;
3042*efd4c9b6SSteve Lawrence 				continue;
3043*efd4c9b6SSteve Lawrence 			}
3044*efd4c9b6SSteve Lawrence 			/* Found parent with known pset.  Use its info */
3045*efd4c9b6SSteve Lawrence 			proc->zspr_psetid = pproc->zspr_psetid;
3046*efd4c9b6SSteve Lawrence 			proc->zspr_sched = pproc->zspr_sched;
3047*efd4c9b6SSteve Lawrence 			next->zspr_psetid = pproc->zspr_psetid;
3048*efd4c9b6SSteve Lawrence 			next->zspr_sched = pproc->zspr_sched;
3049*efd4c9b6SSteve Lawrence 			zone = zsd_lookup_zone_byid(ctl,
3050*efd4c9b6SSteve Lawrence 			    proc->zspr_zoneid);
3051*efd4c9b6SSteve Lawrence 			if (zone == NULL) {
3052*efd4c9b6SSteve Lawrence 				tmp = proc;
3053*efd4c9b6SSteve Lawrence 				proc = list_next(&plist, proc);
3054*efd4c9b6SSteve Lawrence 				list_remove(&plist, tmp);
3055*efd4c9b6SSteve Lawrence 				list_link_init(&tmp->zspr_next);
3056*efd4c9b6SSteve Lawrence 				break;
3057*efd4c9b6SSteve Lawrence 			}
3058*efd4c9b6SSteve Lawrence 			pset = zsd_lookup_pset_byid(ctl,
3059*efd4c9b6SSteve Lawrence 			    proc->zspr_psetid);
3060*efd4c9b6SSteve Lawrence 			if (pset == NULL) {
3061*efd4c9b6SSteve Lawrence 				tmp = proc;
3062*efd4c9b6SSteve Lawrence 				proc = list_next(&plist, proc);
3063*efd4c9b6SSteve Lawrence 				list_remove(&plist, tmp);
3064*efd4c9b6SSteve Lawrence 				list_link_init(&tmp->zspr_next);
3065*efd4c9b6SSteve Lawrence 				break;
3066*efd4c9b6SSteve Lawrence 			}
3067*efd4c9b6SSteve Lawrence 			/* Add the zone's usage to the pset */
3068*efd4c9b6SSteve Lawrence 			usage = zsd_lookup_insert_usage(ctl, pset, zone);
3069*efd4c9b6SSteve Lawrence 			if (usage == NULL) {
3070*efd4c9b6SSteve Lawrence 				tmp = proc;
3071*efd4c9b6SSteve Lawrence 				proc = list_next(&plist, proc);
3072*efd4c9b6SSteve Lawrence 				list_remove(&plist, tmp);
3073*efd4c9b6SSteve Lawrence 				list_link_init(&tmp->zspr_next);
3074*efd4c9b6SSteve Lawrence 				break;
3075*efd4c9b6SSteve Lawrence 			}
3076*efd4c9b6SSteve Lawrence 			zsd_mark_pset_usage_found(usage, proc->zspr_sched);
3077*efd4c9b6SSteve Lawrence 			zsd_add_usage(ctl, usage, &proc->zspr_usage);
3078*efd4c9b6SSteve Lawrence 			zsd_flush_proc_info(proc);
3079*efd4c9b6SSteve Lawrence 			tmp = proc;
3080*efd4c9b6SSteve Lawrence 			proc = list_next(&plist, proc);
3081*efd4c9b6SSteve Lawrence 			list_remove(&plist, tmp);
3082*efd4c9b6SSteve Lawrence 			list_link_init(&tmp->zspr_next);
3083*efd4c9b6SSteve Lawrence 			break;
3084*efd4c9b6SSteve Lawrence 		}
3085*efd4c9b6SSteve Lawrence 	}
3086*efd4c9b6SSteve Lawrence 	/*
3087*efd4c9b6SSteve Lawrence 	 * Process has never been seen.  Using zone info to
3088*efd4c9b6SSteve Lawrence 	 * determine pset and scheduling class.
3089*efd4c9b6SSteve Lawrence 	 */
3090*efd4c9b6SSteve Lawrence 	proc = list_head(&pplist);
3091*efd4c9b6SSteve Lawrence 	while (proc != NULL) {
3092*efd4c9b6SSteve Lawrence 
3093*efd4c9b6SSteve Lawrence 		zone = zsd_lookup_zone_byid(ctl, proc->zspr_zoneid);
3094*efd4c9b6SSteve Lawrence 		if (zone == NULL)
3095*efd4c9b6SSteve Lawrence 			goto next;
3096*efd4c9b6SSteve Lawrence 		if (zone->zsz_psetid != ZS_PSET_ERROR &&
3097*efd4c9b6SSteve Lawrence 		    zone->zsz_psetid != ZS_PSET_MULTI) {
3098*efd4c9b6SSteve Lawrence 			prev_psetid = zone->zsz_psetid;
3099*efd4c9b6SSteve Lawrence 			pset = zsd_lookup_pset_byid(ctl, prev_psetid);
3100*efd4c9b6SSteve Lawrence 		} else {
3101*efd4c9b6SSteve Lawrence 			pset = zsd_lookup_pset(ctl, zone->zsz_pset, -1);
3102*efd4c9b6SSteve Lawrence 			if (pset != NULL)
3103*efd4c9b6SSteve Lawrence 				prev_psetid = pset->zsp_id;
3104*efd4c9b6SSteve Lawrence 		}
3105*efd4c9b6SSteve Lawrence 		if (pset == NULL)
3106*efd4c9b6SSteve Lawrence 			goto next;
3107*efd4c9b6SSteve Lawrence 
3108*efd4c9b6SSteve Lawrence 		sched = zone->zsz_scheds;
3109*efd4c9b6SSteve Lawrence 		/*
3110*efd4c9b6SSteve Lawrence 		 * Ignore FX high scheduling class if it is not the
3111*efd4c9b6SSteve Lawrence 		 * only scheduling class in the zone.
3112*efd4c9b6SSteve Lawrence 		 */
3113*efd4c9b6SSteve Lawrence 		if (sched != ZS_SCHED_FX_60)
3114*efd4c9b6SSteve Lawrence 			sched &= (~ZS_SCHED_FX_60);
3115*efd4c9b6SSteve Lawrence 		/*
3116*efd4c9b6SSteve Lawrence 		 * If more than one scheduling class has been found
3117*efd4c9b6SSteve Lawrence 		 * in the zone, use zone's default scheduling class for
3118*efd4c9b6SSteve Lawrence 		 * this process.
3119*efd4c9b6SSteve Lawrence 		 */
3120*efd4c9b6SSteve Lawrence 		if ((sched & (sched - 1)) != 0)
3121*efd4c9b6SSteve Lawrence 			sched = zone->zsz_default_sched;
3122*efd4c9b6SSteve Lawrence 
3123*efd4c9b6SSteve Lawrence 		/* Add the zone's usage to the pset */
3124*efd4c9b6SSteve Lawrence 		usage = zsd_lookup_insert_usage(ctl, pset, zone);
3125*efd4c9b6SSteve Lawrence 		if (usage == NULL)
3126*efd4c9b6SSteve Lawrence 			goto next;
3127*efd4c9b6SSteve Lawrence 
3128*efd4c9b6SSteve Lawrence 		zsd_mark_pset_usage_found(usage, sched);
3129*efd4c9b6SSteve Lawrence 		zsd_add_usage(ctl, usage, &proc->zspr_usage);
3130*efd4c9b6SSteve Lawrence next:
3131*efd4c9b6SSteve Lawrence 		tmp = proc;
3132*efd4c9b6SSteve Lawrence 		proc = list_next(&pplist, proc);
3133*efd4c9b6SSteve Lawrence 		zsd_flush_proc_info(tmp);
3134*efd4c9b6SSteve Lawrence 		list_link_init(&tmp->zspr_next);
3135*efd4c9b6SSteve Lawrence 	}
3136*efd4c9b6SSteve Lawrence 	return;
3137*efd4c9b6SSteve Lawrence ea_err:
3138*efd4c9b6SSteve Lawrence 	/*
3139*efd4c9b6SSteve Lawrence 	 * Close the next accounting file if we have not transitioned to it
3140*efd4c9b6SSteve Lawrence 	 * yet.
3141*efd4c9b6SSteve Lawrence 	 */
3142*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_proc_open_next) {
3143*efd4c9b6SSteve Lawrence 		(void) ea_close(&ctl->zsctl_proc_eaf_next);
3144*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_open_next = 0;
3145*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_fd_next = -1;
3146*efd4c9b6SSteve Lawrence 	}
3147*efd4c9b6SSteve Lawrence }
3148*efd4c9b6SSteve Lawrence 
3149*efd4c9b6SSteve Lawrence /*
3150*efd4c9b6SSteve Lawrence  * getvmusage(2) uses size_t's in the passwd data structure, which differ
3151*efd4c9b6SSteve Lawrence  * in size for 32bit and 64 bit kernels.  Since this is a contracted interface,
3152*efd4c9b6SSteve Lawrence  * and zonestatd does not necessarily match the kernel's bitness, marshal
3153*efd4c9b6SSteve Lawrence  * results appropriately.
3154*efd4c9b6SSteve Lawrence  */
3155*efd4c9b6SSteve Lawrence static int
3156*efd4c9b6SSteve Lawrence zsd_getvmusage(zsd_ctl_t *ctl, uint_t flags, time_t age, zsd_vmusage64_t *buf,
3157*efd4c9b6SSteve Lawrence     uint64_t *nres)
3158*efd4c9b6SSteve Lawrence {
3159*efd4c9b6SSteve Lawrence 	zsd_vmusage32_t *vmu32;
3160*efd4c9b6SSteve Lawrence 	zsd_vmusage64_t *vmu64;
3161*efd4c9b6SSteve Lawrence 	uint32_t nres32;
3162*efd4c9b6SSteve Lawrence 	int i;
3163*efd4c9b6SSteve Lawrence 	int ret;
3164*efd4c9b6SSteve Lawrence 
3165*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_kern_bits == 32)  {
3166*efd4c9b6SSteve Lawrence 		nres32 = *nres;
3167*efd4c9b6SSteve Lawrence 		ret = syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE,
3168*efd4c9b6SSteve Lawrence 		    flags, age, (uintptr_t)buf, (uintptr_t)&nres32);
3169*efd4c9b6SSteve Lawrence 		*nres = nres32;
3170*efd4c9b6SSteve Lawrence 		if (ret == 0 && buf != NULL) {
3171*efd4c9b6SSteve Lawrence 			/*
3172*efd4c9b6SSteve Lawrence 			 * An array of vmusage32_t's has been returned.
3173*efd4c9b6SSteve Lawrence 			 * Convert it to an array of vmusage64_t's.
3174*efd4c9b6SSteve Lawrence 			 */
3175*efd4c9b6SSteve Lawrence 			vmu32 = (zsd_vmusage32_t *)buf;
3176*efd4c9b6SSteve Lawrence 			vmu64 = (zsd_vmusage64_t *)buf;
3177*efd4c9b6SSteve Lawrence 			for (i = nres32 - 1; i >= 0; i--) {
3178*efd4c9b6SSteve Lawrence 
3179*efd4c9b6SSteve Lawrence 				vmu64[i].vmu_zoneid = vmu32[i].vmu_zoneid;
3180*efd4c9b6SSteve Lawrence 				vmu64[i].vmu_type = vmu32[i].vmu_type;
3181*efd4c9b6SSteve Lawrence 				vmu64[i].vmu_type = vmu32[i].vmu_type;
3182*efd4c9b6SSteve Lawrence 				vmu64[i].vmu_rss_all = vmu32[i].vmu_rss_all;
3183*efd4c9b6SSteve Lawrence 				vmu64[i].vmu_rss_private =
3184*efd4c9b6SSteve Lawrence 				    vmu32[i].vmu_rss_private;
3185*efd4c9b6SSteve Lawrence 				vmu64[i].vmu_rss_shared =
3186*efd4c9b6SSteve Lawrence 				    vmu32[i].vmu_rss_shared;
3187*efd4c9b6SSteve Lawrence 				vmu64[i].vmu_swap_all = vmu32[i].vmu_swap_all;
3188*efd4c9b6SSteve Lawrence 				vmu64[i].vmu_swap_private =
3189*efd4c9b6SSteve Lawrence 				    vmu32[i].vmu_swap_private;
3190*efd4c9b6SSteve Lawrence 				vmu64[i].vmu_swap_shared =
3191*efd4c9b6SSteve Lawrence 				    vmu32[i].vmu_swap_shared;
3192*efd4c9b6SSteve Lawrence 			}
3193*efd4c9b6SSteve Lawrence 		}
3194*efd4c9b6SSteve Lawrence 		return (ret);
3195*efd4c9b6SSteve Lawrence 	} else {
3196*efd4c9b6SSteve Lawrence 		/*
3197*efd4c9b6SSteve Lawrence 		 * kernel is 64 bit, so use 64 bit structures as zonestat
3198*efd4c9b6SSteve Lawrence 		 * expects.
3199*efd4c9b6SSteve Lawrence 		 */
3200*efd4c9b6SSteve Lawrence 		return (syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE,
3201*efd4c9b6SSteve Lawrence 		    flags, age, (uintptr_t)buf, (uintptr_t)nres));
3202*efd4c9b6SSteve Lawrence 
3203*efd4c9b6SSteve Lawrence 	}
3204*efd4c9b6SSteve Lawrence }
3205*efd4c9b6SSteve Lawrence 
3206*efd4c9b6SSteve Lawrence /*
3207*efd4c9b6SSteve Lawrence  * Update the current physical, virtual, and locked memory usage of the
3208*efd4c9b6SSteve Lawrence  * running zones.
3209*efd4c9b6SSteve Lawrence  */
3210*efd4c9b6SSteve Lawrence static void
3211*efd4c9b6SSteve Lawrence zsd_refresh_memory(zsd_ctl_t *ctl, boolean_t init)
3212*efd4c9b6SSteve Lawrence {
3213*efd4c9b6SSteve Lawrence 
3214*efd4c9b6SSteve Lawrence 	uint64_t phys_total;
3215*efd4c9b6SSteve Lawrence 	uint64_t phys_used;
3216*efd4c9b6SSteve Lawrence 	uint64_t phys_zones;
3217*efd4c9b6SSteve Lawrence 	uint64_t phys_zones_overcount;
3218*efd4c9b6SSteve Lawrence 	uint64_t phys_zones_extra;
3219*efd4c9b6SSteve Lawrence 	uint64_t phys_zones_credit;
3220*efd4c9b6SSteve Lawrence 
3221*efd4c9b6SSteve Lawrence 	uint64_t vm_free;
3222*efd4c9b6SSteve Lawrence 	uint64_t vm_used;
3223*efd4c9b6SSteve Lawrence 
3224*efd4c9b6SSteve Lawrence 	uint64_t disk_swap_total;
3225*efd4c9b6SSteve Lawrence 	uint64_t disk_swap_used;	/* disk swap with contents */
3226*efd4c9b6SSteve Lawrence 
3227*efd4c9b6SSteve Lawrence 	uint64_t physmem;
3228*efd4c9b6SSteve Lawrence 	uint64_t pp_kernel;
3229*efd4c9b6SSteve Lawrence 	uint64_t arc_size = 0;
3230*efd4c9b6SSteve Lawrence 	struct anoninfo ani;
3231*efd4c9b6SSteve Lawrence 
3232*efd4c9b6SSteve Lawrence 	int num_swap_devices;
3233*efd4c9b6SSteve Lawrence 	struct swaptable *swt;
3234*efd4c9b6SSteve Lawrence 	struct swapent *swent;
3235*efd4c9b6SSteve Lawrence 	size_t swt_size;
3236*efd4c9b6SSteve Lawrence 	char *path;
3237*efd4c9b6SSteve Lawrence 
3238*efd4c9b6SSteve Lawrence 	zsd_vmusage64_t *vmusage;
3239*efd4c9b6SSteve Lawrence 	uint64_t num_vmusage;
3240*efd4c9b6SSteve Lawrence 
3241*efd4c9b6SSteve Lawrence 	int i, ret;
3242*efd4c9b6SSteve Lawrence 
3243*efd4c9b6SSteve Lawrence 	zsd_system_t *sys;
3244*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
3245*efd4c9b6SSteve Lawrence 	int vmu_nzones;
3246*efd4c9b6SSteve Lawrence 
3247*efd4c9b6SSteve Lawrence 	kstat_t *kstat;
3248*efd4c9b6SSteve Lawrence 	char kstat_name[KSTAT_STRLEN];
3249*efd4c9b6SSteve Lawrence 	kstat_named_t *knp;
3250*efd4c9b6SSteve Lawrence 	kid_t kid;
3251*efd4c9b6SSteve Lawrence 
3252*efd4c9b6SSteve Lawrence 	if (init)
3253*efd4c9b6SSteve Lawrence 		return;
3254*efd4c9b6SSteve Lawrence 
3255*efd4c9b6SSteve Lawrence 	sys = ctl->zsctl_system;
3256*efd4c9b6SSteve Lawrence 
3257*efd4c9b6SSteve Lawrence 	/* interrogate swap devices to find the amount of disk swap */
3258*efd4c9b6SSteve Lawrence disk_swap_again:
3259*efd4c9b6SSteve Lawrence 	num_swap_devices = swapctl(SC_GETNSWP, NULL);
3260*efd4c9b6SSteve Lawrence 
3261*efd4c9b6SSteve Lawrence 	if (num_swap_devices == 0) {
3262*efd4c9b6SSteve Lawrence 		sys->zss_swap_total = disk_swap_total = 0;
3263*efd4c9b6SSteve Lawrence 		sys->zss_swap_used = disk_swap_used = 0;
3264*efd4c9b6SSteve Lawrence 		/* No disk swap */
3265*efd4c9b6SSteve Lawrence 		goto disk_swap_done;
3266*efd4c9b6SSteve Lawrence 	}
3267*efd4c9b6SSteve Lawrence 	/* see if swap table needs to be larger */
3268*efd4c9b6SSteve Lawrence 	if (num_swap_devices > ctl->zsctl_swap_cache_num) {
3269*efd4c9b6SSteve Lawrence 		swt_size = sizeof (int) +
3270*efd4c9b6SSteve Lawrence 		    (num_swap_devices * sizeof (struct swapent)) +
3271*efd4c9b6SSteve Lawrence 		    (num_swap_devices * MAXPATHLEN);
3272*efd4c9b6SSteve Lawrence 		if (ctl->zsctl_swap_cache != NULL)
3273*efd4c9b6SSteve Lawrence 			free(ctl->zsctl_swap_cache);
3274*efd4c9b6SSteve Lawrence 
3275*efd4c9b6SSteve Lawrence 		swt = (struct swaptable *)malloc(swt_size);
3276*efd4c9b6SSteve Lawrence 		if (swt == NULL) {
3277*efd4c9b6SSteve Lawrence 			/*
3278*efd4c9b6SSteve Lawrence 			 * Could not allocate to get list of swap devices.
3279*efd4c9b6SSteve Lawrence 			 * Just use data from the most recent read, which will
3280*efd4c9b6SSteve Lawrence 			 * be zero if this is the first read.
3281*efd4c9b6SSteve Lawrence 			 */
3282*efd4c9b6SSteve Lawrence 			zsd_warn(gettext("Unable to allocate to determine "
3283*efd4c9b6SSteve Lawrence 			    "virtual memory"));
3284*efd4c9b6SSteve Lawrence 			disk_swap_total = sys->zss_swap_total;
3285*efd4c9b6SSteve Lawrence 			disk_swap_used = sys->zss_swap_used;
3286*efd4c9b6SSteve Lawrence 			goto disk_swap_done;
3287*efd4c9b6SSteve Lawrence 		}
3288*efd4c9b6SSteve Lawrence 		swent = swt->swt_ent;
3289*efd4c9b6SSteve Lawrence 		path = (char *)swt + (sizeof (int) +
3290*efd4c9b6SSteve Lawrence 		    num_swap_devices * sizeof (swapent_t));
3291*efd4c9b6SSteve Lawrence 		for (i = 0; i < num_swap_devices; i++, swent++) {
3292*efd4c9b6SSteve Lawrence 			swent->ste_path = path;
3293*efd4c9b6SSteve Lawrence 			path += MAXPATHLEN;
3294*efd4c9b6SSteve Lawrence 		}
3295*efd4c9b6SSteve Lawrence 		swt->swt_n = num_swap_devices;
3296*efd4c9b6SSteve Lawrence 		ctl->zsctl_swap_cache = swt;
3297*efd4c9b6SSteve Lawrence 		ctl->zsctl_swap_cache_size = swt_size;
3298*efd4c9b6SSteve Lawrence 		ctl->zsctl_swap_cache_num = num_swap_devices;
3299*efd4c9b6SSteve Lawrence 	}
3300*efd4c9b6SSteve Lawrence 	num_swap_devices = swapctl(SC_LIST, ctl->zsctl_swap_cache);
3301*efd4c9b6SSteve Lawrence 	if (num_swap_devices < 0) {
3302*efd4c9b6SSteve Lawrence 		/* More swap devices have arrived */
3303*efd4c9b6SSteve Lawrence 		if (errno == ENOMEM)
3304*efd4c9b6SSteve Lawrence 			goto disk_swap_again;
3305*efd4c9b6SSteve Lawrence 
3306*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to determine disk swap devices"));
3307*efd4c9b6SSteve Lawrence 		/* Unexpected error.  Use existing data */
3308*efd4c9b6SSteve Lawrence 		disk_swap_total = sys->zss_swap_total;
3309*efd4c9b6SSteve Lawrence 		disk_swap_used = sys->zss_swap_used;
3310*efd4c9b6SSteve Lawrence 		goto disk_swap_done;
3311*efd4c9b6SSteve Lawrence 	}
3312*efd4c9b6SSteve Lawrence 
3313*efd4c9b6SSteve Lawrence 	/* add up the disk swap */
3314*efd4c9b6SSteve Lawrence 	disk_swap_total = 0;
3315*efd4c9b6SSteve Lawrence 	disk_swap_used = 0;
3316*efd4c9b6SSteve Lawrence 	swent = ctl->zsctl_swap_cache->swt_ent;
3317*efd4c9b6SSteve Lawrence 	for (i = 0; i < num_swap_devices; i++, swent++) {
3318*efd4c9b6SSteve Lawrence 		disk_swap_total += swent->ste_pages;
3319*efd4c9b6SSteve Lawrence 		disk_swap_used += (swent->ste_pages - swent->ste_free);
3320*efd4c9b6SSteve Lawrence 	}
3321*efd4c9b6SSteve Lawrence 	disk_swap_total *= ctl->zsctl_pagesize;
3322*efd4c9b6SSteve Lawrence 	disk_swap_used *= ctl->zsctl_pagesize;
3323*efd4c9b6SSteve Lawrence 
3324*efd4c9b6SSteve Lawrence 	sys->zss_swap_total = disk_swap_total;
3325*efd4c9b6SSteve Lawrence 	sys->zss_swap_used = disk_swap_used;
3326*efd4c9b6SSteve Lawrence 
3327*efd4c9b6SSteve Lawrence disk_swap_done:
3328*efd4c9b6SSteve Lawrence 
3329*efd4c9b6SSteve Lawrence 	/* get system pages kstat */
3330*efd4c9b6SSteve Lawrence 	kid = -1;
3331*efd4c9b6SSteve Lawrence 	kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "unix", 0, "system_pages");
3332*efd4c9b6SSteve Lawrence 	if (kstat == NULL)
3333*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to lookup system pages kstat"));
3334*efd4c9b6SSteve Lawrence 	else
3335*efd4c9b6SSteve Lawrence 		kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3336*efd4c9b6SSteve Lawrence 
3337*efd4c9b6SSteve Lawrence 	if (kid == -1) {
3338*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to read system pages kstat"));
3339*efd4c9b6SSteve Lawrence 		return;
3340*efd4c9b6SSteve Lawrence 	} else {
3341*efd4c9b6SSteve Lawrence 		knp = kstat_data_lookup(kstat, "physmem");
3342*efd4c9b6SSteve Lawrence 		if (knp == NULL) {
3343*efd4c9b6SSteve Lawrence 			zsd_warn(gettext("Unable to read physmem"));
3344*efd4c9b6SSteve Lawrence 		} else {
3345*efd4c9b6SSteve Lawrence 			if (knp->data_type == KSTAT_DATA_UINT64)
3346*efd4c9b6SSteve Lawrence 				physmem = knp->value.ui64;
3347*efd4c9b6SSteve Lawrence 			else if (knp->data_type == KSTAT_DATA_UINT32)
3348*efd4c9b6SSteve Lawrence 				physmem = knp->value.ui32;
3349*efd4c9b6SSteve Lawrence 			else
3350*efd4c9b6SSteve Lawrence 				return;
3351*efd4c9b6SSteve Lawrence 		}
3352*efd4c9b6SSteve Lawrence 		knp = kstat_data_lookup(kstat, "pp_kernel");
3353*efd4c9b6SSteve Lawrence 		if (knp == NULL) {
3354*efd4c9b6SSteve Lawrence 			zsd_warn(gettext("Unable to read pp_kernel"));
3355*efd4c9b6SSteve Lawrence 		} else {
3356*efd4c9b6SSteve Lawrence 			if (knp->data_type == KSTAT_DATA_UINT64)
3357*efd4c9b6SSteve Lawrence 				pp_kernel = knp->value.ui64;
3358*efd4c9b6SSteve Lawrence 			else if (knp->data_type == KSTAT_DATA_UINT32)
3359*efd4c9b6SSteve Lawrence 				pp_kernel = knp->value.ui32;
3360*efd4c9b6SSteve Lawrence 			else
3361*efd4c9b6SSteve Lawrence 				return;
3362*efd4c9b6SSteve Lawrence 		}
3363*efd4c9b6SSteve Lawrence 	}
3364*efd4c9b6SSteve Lawrence 	physmem *= ctl->zsctl_pagesize;
3365*efd4c9b6SSteve Lawrence 	pp_kernel *= ctl->zsctl_pagesize;
3366*efd4c9b6SSteve Lawrence 
3367*efd4c9b6SSteve Lawrence 	/* get the zfs arc size if available */
3368*efd4c9b6SSteve Lawrence 	arc_size = 0;
3369*efd4c9b6SSteve Lawrence 	kid = -1;
3370*efd4c9b6SSteve Lawrence 	kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "zfs", 0, "arcstats");
3371*efd4c9b6SSteve Lawrence 	if (kstat != NULL)
3372*efd4c9b6SSteve Lawrence 		kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3373*efd4c9b6SSteve Lawrence 	if (kid != -1) {
3374*efd4c9b6SSteve Lawrence 		knp = kstat_data_lookup(kstat, "size");
3375*efd4c9b6SSteve Lawrence 		if (knp != NULL)
3376*efd4c9b6SSteve Lawrence 			if (knp->data_type == KSTAT_DATA_UINT64)
3377*efd4c9b6SSteve Lawrence 				arc_size = knp->value.ui64;
3378*efd4c9b6SSteve Lawrence 	}
3379*efd4c9b6SSteve Lawrence 
3380*efd4c9b6SSteve Lawrence 	/* Try to get swap information */
3381*efd4c9b6SSteve Lawrence 	if (swapctl(SC_AINFO, &ani) < 0) {
3382*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to get swap info"));
3383*efd4c9b6SSteve Lawrence 		return;
3384*efd4c9b6SSteve Lawrence 	}
3385*efd4c9b6SSteve Lawrence 
3386*efd4c9b6SSteve Lawrence vmusage_again:
3387*efd4c9b6SSteve Lawrence 	/* getvmusage to get physical memory usage */
3388*efd4c9b6SSteve Lawrence 	vmusage = ctl->zsctl_vmusage_cache;
3389*efd4c9b6SSteve Lawrence 	num_vmusage = ctl->zsctl_vmusage_cache_num;
3390*efd4c9b6SSteve Lawrence 
3391*efd4c9b6SSteve Lawrence 	ret = zsd_getvmusage(ctl, VMUSAGE_SYSTEM | VMUSAGE_ALL_ZONES, 0,
3392*efd4c9b6SSteve Lawrence 	    vmusage, &num_vmusage);
3393*efd4c9b6SSteve Lawrence 
3394*efd4c9b6SSteve Lawrence 	if (ret != 0) {
3395*efd4c9b6SSteve Lawrence 		/* Unexpected error.  Use existing data */
3396*efd4c9b6SSteve Lawrence 		if (errno != EOVERFLOW) {
3397*efd4c9b6SSteve Lawrence 			zsd_warn(gettext(
3398*efd4c9b6SSteve Lawrence 			    "Unable to read physical memory usage"));
3399*efd4c9b6SSteve Lawrence 			phys_zones = sys->zss_ram_zones;
3400*efd4c9b6SSteve Lawrence 			goto vmusage_done;
3401*efd4c9b6SSteve Lawrence 		}
3402*efd4c9b6SSteve Lawrence 	}
3403*efd4c9b6SSteve Lawrence 	/* vmusage results cache too small */
3404*efd4c9b6SSteve Lawrence 	if (num_vmusage > ctl->zsctl_vmusage_cache_num) {
3405*efd4c9b6SSteve Lawrence 
3406*efd4c9b6SSteve Lawrence 		size_t size = sizeof (zsd_vmusage64_t) * num_vmusage;
3407*efd4c9b6SSteve Lawrence 
3408*efd4c9b6SSteve Lawrence 		if (ctl->zsctl_vmusage_cache != NULL)
3409*efd4c9b6SSteve Lawrence 			free(ctl->zsctl_vmusage_cache);
3410*efd4c9b6SSteve Lawrence 		vmusage = (zsd_vmusage64_t *)malloc(size);
3411*efd4c9b6SSteve Lawrence 		if (vmusage == NULL) {
3412*efd4c9b6SSteve Lawrence 			zsd_warn(gettext("Unable to alloc to determine "
3413*efd4c9b6SSteve Lawrence 			    "physical memory usage"));
3414*efd4c9b6SSteve Lawrence 			phys_zones = sys->zss_ram_zones;
3415*efd4c9b6SSteve Lawrence 			goto vmusage_done;
3416*efd4c9b6SSteve Lawrence 		}
3417*efd4c9b6SSteve Lawrence 		ctl->zsctl_vmusage_cache = vmusage;
3418*efd4c9b6SSteve Lawrence 		ctl->zsctl_vmusage_cache_num = num_vmusage;
3419*efd4c9b6SSteve Lawrence 		goto vmusage_again;
3420*efd4c9b6SSteve Lawrence 	}
3421*efd4c9b6SSteve Lawrence 
3422*efd4c9b6SSteve Lawrence 	phys_zones_overcount = 0;
3423*efd4c9b6SSteve Lawrence 	vmu_nzones = 0;
3424*efd4c9b6SSteve Lawrence 	for (i = 0; i < num_vmusage; i++) {
3425*efd4c9b6SSteve Lawrence 		switch (vmusage[i].vmu_type) {
3426*efd4c9b6SSteve Lawrence 		case VMUSAGE_SYSTEM:
3427*efd4c9b6SSteve Lawrence 			/* total pages backing user process mappings */
3428*efd4c9b6SSteve Lawrence 			phys_zones = sys->zss_ram_zones =
3429*efd4c9b6SSteve Lawrence 			    vmusage[i].vmu_rss_all;
3430*efd4c9b6SSteve Lawrence 			break;
3431*efd4c9b6SSteve Lawrence 		case VMUSAGE_ZONE:
3432*efd4c9b6SSteve Lawrence 			vmu_nzones++;
3433*efd4c9b6SSteve Lawrence 			phys_zones_overcount += vmusage[i].vmu_rss_all;
3434*efd4c9b6SSteve Lawrence 			zone = zsd_lookup_zone_byid(ctl, vmusage[i].vmu_id);
3435*efd4c9b6SSteve Lawrence 			if (zone != NULL)
3436*efd4c9b6SSteve Lawrence 				zone->zsz_usage_ram = vmusage[i].vmu_rss_all;
3437*efd4c9b6SSteve Lawrence 			break;
3438*efd4c9b6SSteve Lawrence 		default:
3439*efd4c9b6SSteve Lawrence 			break;
3440*efd4c9b6SSteve Lawrence 		}
3441*efd4c9b6SSteve Lawrence 	}
3442*efd4c9b6SSteve Lawrence 	/*
3443*efd4c9b6SSteve Lawrence 	 * Figure how much memory was double counted due to text sharing
3444*efd4c9b6SSteve Lawrence 	 * between zones.  Credit this back so that the sum of the zones
3445*efd4c9b6SSteve Lawrence 	 * equals the total zone ram usage;
3446*efd4c9b6SSteve Lawrence 	 */
3447*efd4c9b6SSteve Lawrence 	phys_zones_extra = phys_zones_overcount - phys_zones;
3448*efd4c9b6SSteve Lawrence 	phys_zones_credit = phys_zones_extra / vmu_nzones;
3449*efd4c9b6SSteve Lawrence 
3450*efd4c9b6SSteve Lawrence vmusage_done:
3451*efd4c9b6SSteve Lawrence 
3452*efd4c9b6SSteve Lawrence 	/* walk the zones to get swap and locked kstats.  Fetch ram cap. */
3453*efd4c9b6SSteve Lawrence 	sys->zss_locked_zones = 0;
3454*efd4c9b6SSteve Lawrence 	sys->zss_vm_zones = 0;
3455*efd4c9b6SSteve Lawrence 	for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
3456*efd4c9b6SSteve Lawrence 	    zone = list_next(&ctl->zsctl_zones, zone)) {
3457*efd4c9b6SSteve Lawrence 
3458*efd4c9b6SSteve Lawrence 		/* If zone halted during interval, show memory usage as none */
3459*efd4c9b6SSteve Lawrence 		if (zone->zsz_active == B_FALSE ||
3460*efd4c9b6SSteve Lawrence 		    zone->zsz_deleted == B_TRUE) {
3461*efd4c9b6SSteve Lawrence 			zone->zsz_usage_ram = 0;
3462*efd4c9b6SSteve Lawrence 			zone->zsz_usage_vm = 0;
3463*efd4c9b6SSteve Lawrence 			zone->zsz_usage_locked = 0;
3464*efd4c9b6SSteve Lawrence 			continue;
3465*efd4c9b6SSteve Lawrence 		}
3466*efd4c9b6SSteve Lawrence 
3467*efd4c9b6SSteve Lawrence 		if (phys_zones_credit > 0) {
3468*efd4c9b6SSteve Lawrence 			if (zone->zsz_usage_ram > phys_zones_credit) {
3469*efd4c9b6SSteve Lawrence 				zone->zsz_usage_ram -= phys_zones_credit;
3470*efd4c9b6SSteve Lawrence 			}
3471*efd4c9b6SSteve Lawrence 		}
3472*efd4c9b6SSteve Lawrence 		/*
3473*efd4c9b6SSteve Lawrence 		 * Get zone's swap usage.  Since zone could have halted,
3474*efd4c9b6SSteve Lawrence 		 * treats as zero if cannot read
3475*efd4c9b6SSteve Lawrence 		 */
3476*efd4c9b6SSteve Lawrence 		zone->zsz_usage_vm = 0;
3477*efd4c9b6SSteve Lawrence 		(void) snprintf(kstat_name, sizeof (kstat_name),
3478*efd4c9b6SSteve Lawrence 		    "swapresv_zone_%d", zone->zsz_id);
3479*efd4c9b6SSteve Lawrence 		kid = -1;
3480*efd4c9b6SSteve Lawrence 		kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps",
3481*efd4c9b6SSteve Lawrence 		    zone->zsz_id, kstat_name);
3482*efd4c9b6SSteve Lawrence 		if (kstat != NULL)
3483*efd4c9b6SSteve Lawrence 			kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3484*efd4c9b6SSteve Lawrence 		if (kid != -1) {
3485*efd4c9b6SSteve Lawrence 			knp = kstat_data_lookup(kstat, "usage");
3486*efd4c9b6SSteve Lawrence 			if (knp != NULL &&
3487*efd4c9b6SSteve Lawrence 			    knp->data_type == KSTAT_DATA_UINT64) {
3488*efd4c9b6SSteve Lawrence 				zone->zsz_usage_vm = knp->value.ui64;
3489*efd4c9b6SSteve Lawrence 				sys->zss_vm_zones += knp->value.ui64;
3490*efd4c9b6SSteve Lawrence 			}
3491*efd4c9b6SSteve Lawrence 		}
3492*efd4c9b6SSteve Lawrence 		/*
3493*efd4c9b6SSteve Lawrence 		 * Get zone's locked usage.  Since zone could have halted,
3494*efd4c9b6SSteve Lawrence 		 * treats as zero if cannot read
3495*efd4c9b6SSteve Lawrence 		 */
3496*efd4c9b6SSteve Lawrence 		zone->zsz_usage_locked = 0;
3497*efd4c9b6SSteve Lawrence 		(void) snprintf(kstat_name, sizeof (kstat_name),
3498*efd4c9b6SSteve Lawrence 		    "lockedmem_zone_%d", zone->zsz_id);
3499*efd4c9b6SSteve Lawrence 		kid = -1;
3500*efd4c9b6SSteve Lawrence 		kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps",
3501*efd4c9b6SSteve Lawrence 		    zone->zsz_id, kstat_name);
3502*efd4c9b6SSteve Lawrence 		if (kstat != NULL)
3503*efd4c9b6SSteve Lawrence 			kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3504*efd4c9b6SSteve Lawrence 		if (kid != -1) {
3505*efd4c9b6SSteve Lawrence 			knp = kstat_data_lookup(kstat, "usage");
3506*efd4c9b6SSteve Lawrence 			if (knp != NULL &&
3507*efd4c9b6SSteve Lawrence 			    knp->data_type == KSTAT_DATA_UINT64) {
3508*efd4c9b6SSteve Lawrence 				zone->zsz_usage_locked = knp->value.ui64;
3509*efd4c9b6SSteve Lawrence 				/*
3510*efd4c9b6SSteve Lawrence 				 * Since locked memory accounting for zones
3511*efd4c9b6SSteve Lawrence 				 * can double count ddi locked memory, cap each
3512*efd4c9b6SSteve Lawrence 				 * zone's locked usage at its ram usage.
3513*efd4c9b6SSteve Lawrence 				 */
3514*efd4c9b6SSteve Lawrence 				if (zone->zsz_usage_locked >
3515*efd4c9b6SSteve Lawrence 				    zone->zsz_usage_ram)
3516*efd4c9b6SSteve Lawrence 					zone->zsz_usage_locked =
3517*efd4c9b6SSteve Lawrence 					    zone->zsz_usage_ram;
3518*efd4c9b6SSteve Lawrence 				sys->zss_locked_zones +=
3519*efd4c9b6SSteve Lawrence 				    zone->zsz_usage_locked;
3520*efd4c9b6SSteve Lawrence 			}
3521*efd4c9b6SSteve Lawrence 		}
3522*efd4c9b6SSteve Lawrence 	}
3523*efd4c9b6SSteve Lawrence 
3524*efd4c9b6SSteve Lawrence 	phys_total =
3525*efd4c9b6SSteve Lawrence 	    sysconf(_SC_PHYS_PAGES) * ctl->zsctl_pagesize;
3526*efd4c9b6SSteve Lawrence 
3527*efd4c9b6SSteve Lawrence 	phys_used = (sysconf(_SC_PHYS_PAGES) - sysconf(_SC_AVPHYS_PAGES))
3528*efd4c9b6SSteve Lawrence 	    * ctl->zsctl_pagesize;
3529*efd4c9b6SSteve Lawrence 
3530*efd4c9b6SSteve Lawrence 	/* Compute remaining statistics */
3531*efd4c9b6SSteve Lawrence 	sys->zss_ram_total = phys_total;
3532*efd4c9b6SSteve Lawrence 	sys->zss_ram_zones = phys_zones;
3533*efd4c9b6SSteve Lawrence 	sys->zss_ram_kern = phys_used - phys_zones - arc_size;
3534*efd4c9b6SSteve Lawrence 
3535*efd4c9b6SSteve Lawrence 	/*
3536*efd4c9b6SSteve Lawrence 	 * The total for kernel locked memory should include
3537*efd4c9b6SSteve Lawrence 	 * segkp locked pages, but oh well.  The arc size is subtracted,
3538*efd4c9b6SSteve Lawrence 	 * as that physical memory is reclaimable.
3539*efd4c9b6SSteve Lawrence 	 */
3540*efd4c9b6SSteve Lawrence 	sys->zss_locked_kern = pp_kernel - arc_size;
3541*efd4c9b6SSteve Lawrence 	/* Add memory used by kernel startup and obp to kernel locked */
3542*efd4c9b6SSteve Lawrence 	if ((phys_total - physmem) > 0)
3543*efd4c9b6SSteve Lawrence 		sys->zss_locked_kern += phys_total - physmem;
3544*efd4c9b6SSteve Lawrence 
3545*efd4c9b6SSteve Lawrence 	/*
3546*efd4c9b6SSteve Lawrence 	 * Add in the portion of (RAM+DISK) that is not available as swap,
3547*efd4c9b6SSteve Lawrence 	 * and consider it swap used by the kernel.
3548*efd4c9b6SSteve Lawrence 	 */
3549*efd4c9b6SSteve Lawrence 	sys->zss_vm_total = phys_total + disk_swap_total;
3550*efd4c9b6SSteve Lawrence 	vm_free = (ani.ani_max - ani.ani_resv) * ctl->zsctl_pagesize;
3551*efd4c9b6SSteve Lawrence 	vm_used = sys->zss_vm_total - vm_free;
3552*efd4c9b6SSteve Lawrence 	sys->zss_vm_kern = vm_used - sys->zss_vm_zones - arc_size;
3553*efd4c9b6SSteve Lawrence }
3554*efd4c9b6SSteve Lawrence 
3555*efd4c9b6SSteve Lawrence /*
3556*efd4c9b6SSteve Lawrence  * Charge each cpu's usage to its processor sets.  Also add the cpu's total
3557*efd4c9b6SSteve Lawrence  * time to each zone using the processor set.  This tracks the maximum
3558*efd4c9b6SSteve Lawrence  * amount of cpu time that a zone could have used.
3559*efd4c9b6SSteve Lawrence  */
3560*efd4c9b6SSteve Lawrence static void
3561*efd4c9b6SSteve Lawrence zsd_refresh_cpu_stats(zsd_ctl_t *ctl, boolean_t init)
3562*efd4c9b6SSteve Lawrence {
3563*efd4c9b6SSteve Lawrence 	zsd_system_t *sys;
3564*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
3565*efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *usage;
3566*efd4c9b6SSteve Lawrence 	zsd_cpu_t *cpu;
3567*efd4c9b6SSteve Lawrence 	zsd_cpu_t *cpu_next;
3568*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset;
3569*efd4c9b6SSteve Lawrence 	timestruc_t ts;
3570*efd4c9b6SSteve Lawrence 	uint64_t hrtime;
3571*efd4c9b6SSteve Lawrence 	timestruc_t delta;
3572*efd4c9b6SSteve Lawrence 
3573*efd4c9b6SSteve Lawrence 	/* Update the per-cpu kstat data */
3574*efd4c9b6SSteve Lawrence 	cpu_next = list_head(&ctl->zsctl_cpus);
3575*efd4c9b6SSteve Lawrence 	while (cpu_next != NULL) {
3576*efd4c9b6SSteve Lawrence 		cpu = cpu_next;
3577*efd4c9b6SSteve Lawrence 		cpu_next = list_next(&ctl->zsctl_cpus, cpu);
3578*efd4c9b6SSteve Lawrence 		zsd_update_cpu_stats(ctl, cpu);
3579*efd4c9b6SSteve Lawrence 	}
3580*efd4c9b6SSteve Lawrence 	/* Update the elapsed real time */
3581*efd4c9b6SSteve Lawrence 	hrtime = gethrtime();
3582*efd4c9b6SSteve Lawrence 	if (init) {
3583*efd4c9b6SSteve Lawrence 		/* first time around, store hrtime for future comparision */
3584*efd4c9b6SSteve Lawrence 		ctl->zsctl_hrtime = hrtime;
3585*efd4c9b6SSteve Lawrence 		ctl->zsctl_hrtime_prev = hrtime;
3586*efd4c9b6SSteve Lawrence 
3587*efd4c9b6SSteve Lawrence 	} else {
3588*efd4c9b6SSteve Lawrence 		/* Compute increase in hrtime since the most recent read */
3589*efd4c9b6SSteve Lawrence 		ctl->zsctl_hrtime_prev = ctl->zsctl_hrtime;
3590*efd4c9b6SSteve Lawrence 		ctl->zsctl_hrtime = hrtime;
3591*efd4c9b6SSteve Lawrence 		if ((hrtime = hrtime - ctl->zsctl_hrtime_prev) > 0)
3592*efd4c9b6SSteve Lawrence 			TIMESTRUC_ADD_NANOSEC(ctl->zsctl_hrtime_total, hrtime);
3593*efd4c9b6SSteve Lawrence 	}
3594*efd4c9b6SSteve Lawrence 
3595*efd4c9b6SSteve Lawrence 	/* On initialization, all psets have zero time  */
3596*efd4c9b6SSteve Lawrence 	if (init)
3597*efd4c9b6SSteve Lawrence 		return;
3598*efd4c9b6SSteve Lawrence 
3599*efd4c9b6SSteve Lawrence 	for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
3600*efd4c9b6SSteve Lawrence 	    pset = list_next(&ctl->zsctl_psets, pset)) {
3601*efd4c9b6SSteve Lawrence 
3602*efd4c9b6SSteve Lawrence 		if (pset->zsp_active == B_FALSE) {
3603*efd4c9b6SSteve Lawrence 			zsd_warn(gettext("Internal error,inactive pset found"));
3604*efd4c9b6SSteve Lawrence 			continue;
3605*efd4c9b6SSteve Lawrence 		}
3606*efd4c9b6SSteve Lawrence 
3607*efd4c9b6SSteve Lawrence 		/* sum total used time for pset */
3608*efd4c9b6SSteve Lawrence 		ts.tv_sec = 0;
3609*efd4c9b6SSteve Lawrence 		ts.tv_nsec = 0;
3610*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_intr);
3611*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_kern);
3612*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_user);
3613*efd4c9b6SSteve Lawrence 		/* kernel time in pset is total time minus zone time */
3614*efd4c9b6SSteve Lawrence 		TIMESTRUC_DELTA(pset->zsp_usage_kern, ts,
3615*efd4c9b6SSteve Lawrence 		    pset->zsp_usage_zones);
3616*efd4c9b6SSteve Lawrence 		if (pset->zsp_usage_kern.tv_sec < 0 ||
3617*efd4c9b6SSteve Lawrence 		    pset->zsp_usage_kern.tv_nsec < 0) {
3618*efd4c9b6SSteve Lawrence 			pset->zsp_usage_kern.tv_sec = 0;
3619*efd4c9b6SSteve Lawrence 			pset->zsp_usage_kern.tv_nsec = 0;
3620*efd4c9b6SSteve Lawrence 		}
3621*efd4c9b6SSteve Lawrence 		/* Total pset elapsed time is used time plus idle time */
3622*efd4c9b6SSteve Lawrence 		TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_idle);
3623*efd4c9b6SSteve Lawrence 
3624*efd4c9b6SSteve Lawrence 		TIMESTRUC_DELTA(delta, ts, pset->zsp_total_time);
3625*efd4c9b6SSteve Lawrence 
3626*efd4c9b6SSteve Lawrence 		for (usage = list_head(&pset->zsp_usage_list); usage != NULL;
3627*efd4c9b6SSteve Lawrence 		    usage = list_next(&pset->zsp_usage_list, usage)) {
3628*efd4c9b6SSteve Lawrence 
3629*efd4c9b6SSteve Lawrence 			zone = usage->zsu_zone;
3630*efd4c9b6SSteve Lawrence 			if (usage->zsu_cpu_shares != ZS_LIMIT_NONE &&
3631*efd4c9b6SSteve Lawrence 			    usage->zsu_cpu_shares != ZS_SHARES_UNLIMITED &&
3632*efd4c9b6SSteve Lawrence 			    usage->zsu_cpu_shares != 0) {
3633*efd4c9b6SSteve Lawrence 				/*
3634*efd4c9b6SSteve Lawrence 				 * Figure out how many nanoseconds of share time
3635*efd4c9b6SSteve Lawrence 				 * to give to the zone
3636*efd4c9b6SSteve Lawrence 				 */
3637*efd4c9b6SSteve Lawrence 				hrtime = delta.tv_sec;
3638*efd4c9b6SSteve Lawrence 				hrtime *= NANOSEC;
3639*efd4c9b6SSteve Lawrence 				hrtime += delta.tv_nsec;
3640*efd4c9b6SSteve Lawrence 				hrtime *= usage->zsu_cpu_shares;
3641*efd4c9b6SSteve Lawrence 				hrtime /= pset->zsp_cpu_shares;
3642*efd4c9b6SSteve Lawrence 				TIMESTRUC_ADD_NANOSEC(zone->zsz_share_time,
3643*efd4c9b6SSteve Lawrence 				    hrtime);
3644*efd4c9b6SSteve Lawrence 			}
3645*efd4c9b6SSteve Lawrence 			/* Add pset time to each zone using pset */
3646*efd4c9b6SSteve Lawrence 			TIMESTRUC_ADD_TIMESTRUC(zone->zsz_pset_time, delta);
3647*efd4c9b6SSteve Lawrence 
3648*efd4c9b6SSteve Lawrence 			zone->zsz_cpus_online += pset->zsp_online;
3649*efd4c9b6SSteve Lawrence 		}
3650*efd4c9b6SSteve Lawrence 		pset->zsp_total_time = ts;
3651*efd4c9b6SSteve Lawrence 	}
3652*efd4c9b6SSteve Lawrence 
3653*efd4c9b6SSteve Lawrence 	for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
3654*efd4c9b6SSteve Lawrence 	    zone = list_next(&ctl->zsctl_zones, zone)) {
3655*efd4c9b6SSteve Lawrence 
3656*efd4c9b6SSteve Lawrence 		/* update cpu cap tracking if the zone has a cpu cap */
3657*efd4c9b6SSteve Lawrence 		if (zone->zsz_cpu_cap != ZS_LIMIT_NONE) {
3658*efd4c9b6SSteve Lawrence 			uint64_t elapsed;
3659*efd4c9b6SSteve Lawrence 
3660*efd4c9b6SSteve Lawrence 			elapsed = ctl->zsctl_hrtime - ctl->zsctl_hrtime_prev;
3661*efd4c9b6SSteve Lawrence 			elapsed *= zone->zsz_cpu_cap;
3662*efd4c9b6SSteve Lawrence 			elapsed = elapsed / 100;
3663*efd4c9b6SSteve Lawrence 			TIMESTRUC_ADD_NANOSEC(zone->zsz_cap_time, elapsed);
3664*efd4c9b6SSteve Lawrence 		}
3665*efd4c9b6SSteve Lawrence 	}
3666*efd4c9b6SSteve Lawrence 	sys = ctl->zsctl_system;
3667*efd4c9b6SSteve Lawrence 	ts.tv_sec = 0;
3668*efd4c9b6SSteve Lawrence 	ts.tv_nsec = 0;
3669*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_intr);
3670*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_kern);
3671*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_user);
3672*efd4c9b6SSteve Lawrence 
3673*efd4c9b6SSteve Lawrence 	/* kernel time in pset is total time minus zone time */
3674*efd4c9b6SSteve Lawrence 	TIMESTRUC_DELTA(sys->zss_cpu_usage_kern, ts,
3675*efd4c9b6SSteve Lawrence 	    sys->zss_cpu_usage_zones);
3676*efd4c9b6SSteve Lawrence 	if (sys->zss_cpu_usage_kern.tv_sec < 0 ||
3677*efd4c9b6SSteve Lawrence 	    sys->zss_cpu_usage_kern.tv_nsec < 0) {
3678*efd4c9b6SSteve Lawrence 		sys->zss_cpu_usage_kern.tv_sec = 0;
3679*efd4c9b6SSteve Lawrence 		sys->zss_cpu_usage_kern.tv_nsec = 0;
3680*efd4c9b6SSteve Lawrence 	}
3681*efd4c9b6SSteve Lawrence 	/* Total pset elapsed time is used time plus idle time */
3682*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_idle);
3683*efd4c9b6SSteve Lawrence 	sys->zss_cpu_total_time = ts;
3684*efd4c9b6SSteve Lawrence }
3685*efd4c9b6SSteve Lawrence 
3686*efd4c9b6SSteve Lawrence /*
3687*efd4c9b6SSteve Lawrence  * Saves current usage data to a cache that is read by libzonestat when
3688*efd4c9b6SSteve Lawrence  * calling zs_usage_read().
3689*efd4c9b6SSteve Lawrence  *
3690*efd4c9b6SSteve Lawrence  * All pointers in the cached data structure are set to NULL.  When
3691*efd4c9b6SSteve Lawrence  * libzonestat reads the cached data, it will set the pointers relative to
3692*efd4c9b6SSteve Lawrence  * its address space.
3693*efd4c9b6SSteve Lawrence  */
3694*efd4c9b6SSteve Lawrence static void
3695*efd4c9b6SSteve Lawrence zsd_usage_cache_update(zsd_ctl_t *ctl)
3696*efd4c9b6SSteve Lawrence {
3697*efd4c9b6SSteve Lawrence 	zs_usage_cache_t *cache;
3698*efd4c9b6SSteve Lawrence 	zs_usage_cache_t *old;
3699*efd4c9b6SSteve Lawrence 	zs_usage_t *usage;
3700*efd4c9b6SSteve Lawrence 
3701*efd4c9b6SSteve Lawrence 	zs_system_t *sys;
3702*efd4c9b6SSteve Lawrence 	zsd_system_t *dsys;
3703*efd4c9b6SSteve Lawrence 	zs_zone_t *zone = NULL;
3704*efd4c9b6SSteve Lawrence 	zsd_zone_t *dzone;
3705*efd4c9b6SSteve Lawrence 	zs_pset_t *pset = NULL;
3706*efd4c9b6SSteve Lawrence 	zsd_pset_t *dpset;
3707*efd4c9b6SSteve Lawrence 	zs_pset_zone_t *pusage;
3708*efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *dpusage;
3709*efd4c9b6SSteve Lawrence 
3710*efd4c9b6SSteve Lawrence 	char *next;
3711*efd4c9b6SSteve Lawrence 	uint_t size, i, j;
3712*efd4c9b6SSteve Lawrence 
3713*efd4c9b6SSteve Lawrence 	size =
3714*efd4c9b6SSteve Lawrence 	    sizeof (zs_usage_cache_t) +
3715*efd4c9b6SSteve Lawrence 	    sizeof (zs_usage_t) +
3716*efd4c9b6SSteve Lawrence 	    sizeof (zs_system_t) +
3717*efd4c9b6SSteve Lawrence 	    sizeof (zs_zone_t) * ctl->zsctl_nzones +
3718*efd4c9b6SSteve Lawrence 	    sizeof (zs_pset_t) *  ctl->zsctl_npsets +
3719*efd4c9b6SSteve Lawrence 	    sizeof (zs_pset_zone_t) * ctl->zsctl_npset_usages;
3720*efd4c9b6SSteve Lawrence 
3721*efd4c9b6SSteve Lawrence 	cache = (zs_usage_cache_t *)malloc(size);
3722*efd4c9b6SSteve Lawrence 	if (cache == NULL) {
3723*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to allocate usage cache\n"));
3724*efd4c9b6SSteve Lawrence 		return;
3725*efd4c9b6SSteve Lawrence 	}
3726*efd4c9b6SSteve Lawrence 
3727*efd4c9b6SSteve Lawrence 	next = (char *)cache;
3728*efd4c9b6SSteve Lawrence 	cache->zsuc_size = size - sizeof (zs_usage_cache_t);
3729*efd4c9b6SSteve Lawrence 	next += sizeof (zs_usage_cache_t);
3730*efd4c9b6SSteve Lawrence 
3731*efd4c9b6SSteve Lawrence 	/* LINTED */
3732*efd4c9b6SSteve Lawrence 	usage = cache->zsuc_usage = (zs_usage_t *)next;
3733*efd4c9b6SSteve Lawrence 	next += sizeof (zs_usage_t);
3734*efd4c9b6SSteve Lawrence 	usage->zsu_start = g_start;
3735*efd4c9b6SSteve Lawrence 	usage->zsu_hrstart = g_hrstart;
3736*efd4c9b6SSteve Lawrence 	usage->zsu_time = g_now;
3737*efd4c9b6SSteve Lawrence 	usage->zsu_hrtime = g_hrnow;
3738*efd4c9b6SSteve Lawrence 	usage->zsu_nzones = ctl->zsctl_nzones;
3739*efd4c9b6SSteve Lawrence 	usage->zsu_npsets = ctl->zsctl_npsets;
3740*efd4c9b6SSteve Lawrence 	usage->zsu_system = NULL;
3741*efd4c9b6SSteve Lawrence 
3742*efd4c9b6SSteve Lawrence 	/* LINTED */
3743*efd4c9b6SSteve Lawrence 	sys = (zs_system_t *)next;
3744*efd4c9b6SSteve Lawrence 	next += sizeof (zs_system_t);
3745*efd4c9b6SSteve Lawrence 	dsys = ctl->zsctl_system;
3746*efd4c9b6SSteve Lawrence 	sys->zss_ram_total = dsys->zss_ram_total;
3747*efd4c9b6SSteve Lawrence 	sys->zss_ram_kern = dsys->zss_ram_kern;
3748*efd4c9b6SSteve Lawrence 	sys->zss_ram_zones = dsys->zss_ram_zones;
3749*efd4c9b6SSteve Lawrence 	sys->zss_locked_kern = dsys->zss_locked_kern;
3750*efd4c9b6SSteve Lawrence 	sys->zss_locked_zones = dsys->zss_locked_zones;
3751*efd4c9b6SSteve Lawrence 	sys->zss_vm_total = dsys->zss_vm_total;
3752*efd4c9b6SSteve Lawrence 	sys->zss_vm_kern = dsys->zss_vm_kern;
3753*efd4c9b6SSteve Lawrence 	sys->zss_vm_zones = dsys->zss_vm_zones;
3754*efd4c9b6SSteve Lawrence 	sys->zss_swap_total = dsys->zss_swap_total;
3755*efd4c9b6SSteve Lawrence 	sys->zss_swap_used = dsys->zss_swap_used;
3756*efd4c9b6SSteve Lawrence 	sys->zss_ncpus = dsys->zss_ncpus;
3757*efd4c9b6SSteve Lawrence 	sys->zss_ncpus_online = dsys->zss_ncpus_online;
3758*efd4c9b6SSteve Lawrence 
3759*efd4c9b6SSteve Lawrence 	sys->zss_processes_max = dsys->zss_maxpid;
3760*efd4c9b6SSteve Lawrence 	sys->zss_lwps_max = dsys->zss_lwps_max;
3761*efd4c9b6SSteve Lawrence 	sys->zss_shm_max = dsys->zss_shm_max;
3762*efd4c9b6SSteve Lawrence 	sys->zss_shmids_max = dsys->zss_shmids_max;
3763*efd4c9b6SSteve Lawrence 	sys->zss_semids_max = dsys->zss_semids_max;
3764*efd4c9b6SSteve Lawrence 	sys->zss_msgids_max = dsys->zss_msgids_max;
3765*efd4c9b6SSteve Lawrence 	sys->zss_lofi_max = dsys->zss_lofi_max;
3766*efd4c9b6SSteve Lawrence 
3767*efd4c9b6SSteve Lawrence 	sys->zss_processes = dsys->zss_processes;
3768*efd4c9b6SSteve Lawrence 	sys->zss_lwps = dsys->zss_lwps;
3769*efd4c9b6SSteve Lawrence 	sys->zss_shm = dsys->zss_shm;
3770*efd4c9b6SSteve Lawrence 	sys->zss_shmids = dsys->zss_shmids;
3771*efd4c9b6SSteve Lawrence 	sys->zss_semids = dsys->zss_semids;
3772*efd4c9b6SSteve Lawrence 	sys->zss_msgids = dsys->zss_msgids;
3773*efd4c9b6SSteve Lawrence 	sys->zss_lofi = dsys->zss_lofi;
3774*efd4c9b6SSteve Lawrence 
3775*efd4c9b6SSteve Lawrence 	sys->zss_cpu_total_time = dsys->zss_cpu_total_time;
3776*efd4c9b6SSteve Lawrence 	sys->zss_cpu_usage_zones = dsys->zss_cpu_usage_zones;
3777*efd4c9b6SSteve Lawrence 	sys->zss_cpu_usage_kern = dsys->zss_cpu_usage_kern;
3778*efd4c9b6SSteve Lawrence 
3779*efd4c9b6SSteve Lawrence 	for (i = 0, dzone = list_head(&ctl->zsctl_zones);
3780*efd4c9b6SSteve Lawrence 	    i < ctl->zsctl_nzones;
3781*efd4c9b6SSteve Lawrence 	    i++, dzone = list_next(&ctl->zsctl_zones, dzone)) {
3782*efd4c9b6SSteve Lawrence 		/* LINTED */
3783*efd4c9b6SSteve Lawrence 		zone = (zs_zone_t *)next;
3784*efd4c9b6SSteve Lawrence 		next += sizeof (zs_zone_t);
3785*efd4c9b6SSteve Lawrence 		list_link_init(&zone->zsz_next);
3786*efd4c9b6SSteve Lawrence 		zone->zsz_system = NULL;
3787*efd4c9b6SSteve Lawrence 
3788*efd4c9b6SSteve Lawrence 		(void) strlcpy(zone->zsz_name, dzone->zsz_name,
3789*efd4c9b6SSteve Lawrence 		    sizeof (zone->zsz_name));
3790*efd4c9b6SSteve Lawrence 		(void) strlcpy(zone->zsz_pool, dzone->zsz_pool,
3791*efd4c9b6SSteve Lawrence 		    sizeof (zone->zsz_pool));
3792*efd4c9b6SSteve Lawrence 		(void) strlcpy(zone->zsz_pset, dzone->zsz_pset,
3793*efd4c9b6SSteve Lawrence 		    sizeof (zone->zsz_pset));
3794*efd4c9b6SSteve Lawrence 		zone->zsz_id = dzone->zsz_id;
3795*efd4c9b6SSteve Lawrence 		zone->zsz_cputype = dzone->zsz_cputype;
3796*efd4c9b6SSteve Lawrence 		zone->zsz_iptype = dzone->zsz_iptype;
3797*efd4c9b6SSteve Lawrence 		zone->zsz_start = dzone->zsz_start;
3798*efd4c9b6SSteve Lawrence 		zone->zsz_hrstart = dzone->zsz_hrstart;
3799*efd4c9b6SSteve Lawrence 		zone->zsz_scheds = dzone->zsz_scheds;
3800*efd4c9b6SSteve Lawrence 		zone->zsz_cpu_shares = dzone->zsz_cpu_shares;
3801*efd4c9b6SSteve Lawrence 		zone->zsz_cpu_cap = dzone->zsz_cpu_cap;
3802*efd4c9b6SSteve Lawrence 		zone->zsz_ram_cap = dzone->zsz_ram_cap;
3803*efd4c9b6SSteve Lawrence 		zone->zsz_vm_cap = dzone->zsz_vm_cap;
3804*efd4c9b6SSteve Lawrence 		zone->zsz_locked_cap = dzone->zsz_locked_cap;
3805*efd4c9b6SSteve Lawrence 		zone->zsz_cpu_usage = dzone->zsz_cpu_usage;
3806*efd4c9b6SSteve Lawrence 		zone->zsz_cpus_online = dzone->zsz_cpus_online;
3807*efd4c9b6SSteve Lawrence 		zone->zsz_pset_time = dzone->zsz_pset_time;
3808*efd4c9b6SSteve Lawrence 		zone->zsz_cap_time = dzone->zsz_cap_time;
3809*efd4c9b6SSteve Lawrence 		zone->zsz_share_time = dzone->zsz_share_time;
3810*efd4c9b6SSteve Lawrence 		zone->zsz_usage_ram = dzone->zsz_usage_ram;
3811*efd4c9b6SSteve Lawrence 		zone->zsz_usage_locked = dzone->zsz_usage_locked;
3812*efd4c9b6SSteve Lawrence 		zone->zsz_usage_vm = dzone->zsz_usage_vm;
3813*efd4c9b6SSteve Lawrence 
3814*efd4c9b6SSteve Lawrence 		zone->zsz_processes_cap = dzone->zsz_processes_cap;
3815*efd4c9b6SSteve Lawrence 		zone->zsz_lwps_cap = dzone->zsz_lwps_cap;
3816*efd4c9b6SSteve Lawrence 		zone->zsz_shm_cap = dzone->zsz_shm_cap;
3817*efd4c9b6SSteve Lawrence 		zone->zsz_shmids_cap = dzone->zsz_shmids_cap;
3818*efd4c9b6SSteve Lawrence 		zone->zsz_semids_cap = dzone->zsz_semids_cap;
3819*efd4c9b6SSteve Lawrence 		zone->zsz_msgids_cap = dzone->zsz_msgids_cap;
3820*efd4c9b6SSteve Lawrence 		zone->zsz_lofi_cap = dzone->zsz_lofi_cap;
3821*efd4c9b6SSteve Lawrence 
3822*efd4c9b6SSteve Lawrence 		zone->zsz_processes = dzone->zsz_processes;
3823*efd4c9b6SSteve Lawrence 		zone->zsz_lwps = dzone->zsz_lwps;
3824*efd4c9b6SSteve Lawrence 		zone->zsz_shm = dzone->zsz_shm;
3825*efd4c9b6SSteve Lawrence 		zone->zsz_shmids = dzone->zsz_shmids;
3826*efd4c9b6SSteve Lawrence 		zone->zsz_semids = dzone->zsz_semids;
3827*efd4c9b6SSteve Lawrence 		zone->zsz_msgids = dzone->zsz_msgids;
3828*efd4c9b6SSteve Lawrence 		zone->zsz_lofi = dzone->zsz_lofi;
3829*efd4c9b6SSteve Lawrence 	}
3830*efd4c9b6SSteve Lawrence 
3831*efd4c9b6SSteve Lawrence 	for (i = 0, dpset = list_head(&ctl->zsctl_psets);
3832*efd4c9b6SSteve Lawrence 	    i < ctl->zsctl_npsets;
3833*efd4c9b6SSteve Lawrence 	    i++, dpset = list_next(&ctl->zsctl_psets, dpset)) {
3834*efd4c9b6SSteve Lawrence 		/* LINTED */
3835*efd4c9b6SSteve Lawrence 		pset = (zs_pset_t *)next;
3836*efd4c9b6SSteve Lawrence 		next += sizeof (zs_pset_t);
3837*efd4c9b6SSteve Lawrence 		list_link_init(&pset->zsp_next);
3838*efd4c9b6SSteve Lawrence 		(void) strlcpy(pset->zsp_name, dpset->zsp_name,
3839*efd4c9b6SSteve Lawrence 		    sizeof (pset->zsp_name));
3840*efd4c9b6SSteve Lawrence 		pset->zsp_id = dpset->zsp_id;
3841*efd4c9b6SSteve Lawrence 		pset->zsp_cputype = dpset->zsp_cputype;
3842*efd4c9b6SSteve Lawrence 		pset->zsp_start = dpset->zsp_start;
3843*efd4c9b6SSteve Lawrence 		pset->zsp_hrstart = dpset->zsp_hrstart;
3844*efd4c9b6SSteve Lawrence 		pset->zsp_online = dpset->zsp_online;
3845*efd4c9b6SSteve Lawrence 		pset->zsp_size = dpset->zsp_size;
3846*efd4c9b6SSteve Lawrence 		pset->zsp_min = dpset->zsp_min;
3847*efd4c9b6SSteve Lawrence 		pset->zsp_max = dpset->zsp_max;
3848*efd4c9b6SSteve Lawrence 		pset->zsp_importance = dpset->zsp_importance;
3849*efd4c9b6SSteve Lawrence 		pset->zsp_scheds = dpset->zsp_scheds;
3850*efd4c9b6SSteve Lawrence 		pset->zsp_cpu_shares = dpset->zsp_cpu_shares;
3851*efd4c9b6SSteve Lawrence 		pset->zsp_total_time = dpset->zsp_total_time;
3852*efd4c9b6SSteve Lawrence 		pset->zsp_usage_kern = dpset->zsp_usage_kern;
3853*efd4c9b6SSteve Lawrence 		pset->zsp_usage_zones = dpset->zsp_usage_zones;
3854*efd4c9b6SSteve Lawrence 		pset->zsp_nusage = dpset->zsp_nusage;
3855*efd4c9b6SSteve Lawrence 		/* Add pset usages for pset */
3856*efd4c9b6SSteve Lawrence 		for (j = 0, dpusage = list_head(&dpset->zsp_usage_list);
3857*efd4c9b6SSteve Lawrence 		    j < dpset->zsp_nusage;
3858*efd4c9b6SSteve Lawrence 		    j++, dpusage = list_next(&dpset->zsp_usage_list, dpusage)) {
3859*efd4c9b6SSteve Lawrence 			/* LINTED */
3860*efd4c9b6SSteve Lawrence 			pusage = (zs_pset_zone_t *)next;
3861*efd4c9b6SSteve Lawrence 			next += sizeof (zs_pset_zone_t);
3862*efd4c9b6SSteve Lawrence 			/* pointers are computed by client */
3863*efd4c9b6SSteve Lawrence 			pusage->zspz_pset = NULL;
3864*efd4c9b6SSteve Lawrence 			pusage->zspz_zone = NULL;
3865*efd4c9b6SSteve Lawrence 			list_link_init(&pusage->zspz_next);
3866*efd4c9b6SSteve Lawrence 			pusage->zspz_zoneid = dpusage->zsu_zone->zsz_id;
3867*efd4c9b6SSteve Lawrence 			pusage->zspz_start = dpusage->zsu_start;
3868*efd4c9b6SSteve Lawrence 			pusage->zspz_hrstart = dpusage->zsu_hrstart;
3869*efd4c9b6SSteve Lawrence 			pusage->zspz_hrstart = dpusage->zsu_hrstart;
3870*efd4c9b6SSteve Lawrence 			pusage->zspz_cpu_shares = dpusage->zsu_cpu_shares;
3871*efd4c9b6SSteve Lawrence 			pusage->zspz_scheds = dpusage->zsu_scheds;
3872*efd4c9b6SSteve Lawrence 			pusage->zspz_cpu_usage = dpusage->zsu_cpu_usage;
3873*efd4c9b6SSteve Lawrence 		}
3874*efd4c9b6SSteve Lawrence 	}
3875*efd4c9b6SSteve Lawrence 
3876*efd4c9b6SSteve Lawrence 	/* Update the current cache pointer */
3877*efd4c9b6SSteve Lawrence 	(void) mutex_lock(&g_usage_cache_lock);
3878*efd4c9b6SSteve Lawrence 		old = g_usage_cache;
3879*efd4c9b6SSteve Lawrence 		cache->zsuc_ref = 1;
3880*efd4c9b6SSteve Lawrence 		cache->zsuc_gen = g_gen_next;
3881*efd4c9b6SSteve Lawrence 		usage->zsu_gen = g_gen_next;
3882*efd4c9b6SSteve Lawrence 		usage->zsu_size = size;
3883*efd4c9b6SSteve Lawrence 		g_usage_cache = cache;
3884*efd4c9b6SSteve Lawrence 		if (old != NULL) {
3885*efd4c9b6SSteve Lawrence 			old->zsuc_ref--;
3886*efd4c9b6SSteve Lawrence 			if (old->zsuc_ref == 0)
3887*efd4c9b6SSteve Lawrence 				free(old);
3888*efd4c9b6SSteve Lawrence 		}
3889*efd4c9b6SSteve Lawrence 		g_gen_next++;
3890*efd4c9b6SSteve Lawrence 	/* Wake up any clients that are waiting for this calculation */
3891*efd4c9b6SSteve Lawrence 	if (g_usage_cache_kickers > 0) {
3892*efd4c9b6SSteve Lawrence 		(void) cond_broadcast(&g_usage_cache_wait);
3893*efd4c9b6SSteve Lawrence 	}
3894*efd4c9b6SSteve Lawrence 	(void) mutex_unlock(&g_usage_cache_lock);
3895*efd4c9b6SSteve Lawrence }
3896*efd4c9b6SSteve Lawrence 
3897*efd4c9b6SSteve Lawrence static zs_usage_cache_t *
3898*efd4c9b6SSteve Lawrence zsd_usage_cache_hold_locked()
3899*efd4c9b6SSteve Lawrence {
3900*efd4c9b6SSteve Lawrence 	zs_usage_cache_t *ret;
3901*efd4c9b6SSteve Lawrence 
3902*efd4c9b6SSteve Lawrence 	ret = g_usage_cache;
3903*efd4c9b6SSteve Lawrence 	ret->zsuc_ref++;
3904*efd4c9b6SSteve Lawrence 	return (ret);
3905*efd4c9b6SSteve Lawrence }
3906*efd4c9b6SSteve Lawrence 
3907*efd4c9b6SSteve Lawrence void
3908*efd4c9b6SSteve Lawrence zsd_usage_cache_rele(zs_usage_cache_t *cache)
3909*efd4c9b6SSteve Lawrence {
3910*efd4c9b6SSteve Lawrence 	(void) mutex_lock(&g_usage_cache_lock);
3911*efd4c9b6SSteve Lawrence 	cache->zsuc_ref--;
3912*efd4c9b6SSteve Lawrence 	if (cache->zsuc_ref == 0)
3913*efd4c9b6SSteve Lawrence 		free(cache);
3914*efd4c9b6SSteve Lawrence 	(void) mutex_unlock(&g_usage_cache_lock);
3915*efd4c9b6SSteve Lawrence }
3916*efd4c9b6SSteve Lawrence 
3917*efd4c9b6SSteve Lawrence /* Close the handles held by zsd_open() */
3918*efd4c9b6SSteve Lawrence void
3919*efd4c9b6SSteve Lawrence zsd_close(zsd_ctl_t *ctl)
3920*efd4c9b6SSteve Lawrence {
3921*efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
3922*efd4c9b6SSteve Lawrence 	zsd_pset_t *pset;
3923*efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *usage;
3924*efd4c9b6SSteve Lawrence 	zsd_cpu_t *cpu;
3925*efd4c9b6SSteve Lawrence 	int id;
3926*efd4c9b6SSteve Lawrence 
3927*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_kstat_ctl) {
3928*efd4c9b6SSteve Lawrence 		(void) kstat_close(ctl->zsctl_kstat_ctl);
3929*efd4c9b6SSteve Lawrence 		ctl->zsctl_kstat_ctl = NULL;
3930*efd4c9b6SSteve Lawrence 	}
3931*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_proc_open) {
3932*efd4c9b6SSteve Lawrence 		(void) ea_close(&ctl->zsctl_proc_eaf);
3933*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_open = 0;
3934*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_fd = -1;
3935*efd4c9b6SSteve Lawrence 	}
3936*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_pool_conf) {
3937*efd4c9b6SSteve Lawrence 		if (ctl->zsctl_pool_status == POOL_ENABLED)
3938*efd4c9b6SSteve Lawrence 			(void) pool_conf_close(ctl->zsctl_pool_conf);
3939*efd4c9b6SSteve Lawrence 		ctl->zsctl_pool_status = POOL_DISABLED;
3940*efd4c9b6SSteve Lawrence 	}
3941*efd4c9b6SSteve Lawrence 
3942*efd4c9b6SSteve Lawrence 	while ((zone = list_head(&ctl->zsctl_zones)) != NULL) {
3943*efd4c9b6SSteve Lawrence 		list_remove(&ctl->zsctl_zones, zone);
3944*efd4c9b6SSteve Lawrence 		free(zone);
3945*efd4c9b6SSteve Lawrence 		ctl->zsctl_nzones--;
3946*efd4c9b6SSteve Lawrence 	}
3947*efd4c9b6SSteve Lawrence 
3948*efd4c9b6SSteve Lawrence 	while ((pset = list_head(&ctl->zsctl_psets)) != NULL) {
3949*efd4c9b6SSteve Lawrence 		while ((usage = list_head(&pset->zsp_usage_list))
3950*efd4c9b6SSteve Lawrence 		    != NULL) {
3951*efd4c9b6SSteve Lawrence 			list_remove(&pset->zsp_usage_list, usage);
3952*efd4c9b6SSteve Lawrence 			ctl->zsctl_npset_usages--;
3953*efd4c9b6SSteve Lawrence 			free(usage);
3954*efd4c9b6SSteve Lawrence 		}
3955*efd4c9b6SSteve Lawrence 		list_remove(&ctl->zsctl_psets, pset);
3956*efd4c9b6SSteve Lawrence 		free(pset);
3957*efd4c9b6SSteve Lawrence 		ctl->zsctl_npsets--;
3958*efd4c9b6SSteve Lawrence 	}
3959*efd4c9b6SSteve Lawrence 
3960*efd4c9b6SSteve Lawrence 	/* Release all cpus being tracked */
3961*efd4c9b6SSteve Lawrence 	while (cpu = list_head(&ctl->zsctl_cpus)) {
3962*efd4c9b6SSteve Lawrence 		list_remove(&ctl->zsctl_cpus, cpu);
3963*efd4c9b6SSteve Lawrence 		id = cpu->zsc_id;
3964*efd4c9b6SSteve Lawrence 		bzero(cpu, sizeof (zsd_cpu_t));
3965*efd4c9b6SSteve Lawrence 		cpu->zsc_id = id;
3966*efd4c9b6SSteve Lawrence 		cpu->zsc_allocated = B_FALSE;
3967*efd4c9b6SSteve Lawrence 		cpu->zsc_psetid = ZS_PSET_ERROR;
3968*efd4c9b6SSteve Lawrence 		cpu->zsc_psetid_prev = ZS_PSET_ERROR;
3969*efd4c9b6SSteve Lawrence 	}
3970*efd4c9b6SSteve Lawrence 
3971*efd4c9b6SSteve Lawrence 	assert(ctl->zsctl_npset_usages == 0);
3972*efd4c9b6SSteve Lawrence 	assert(ctl->zsctl_npsets == 0);
3973*efd4c9b6SSteve Lawrence 	assert(ctl->zsctl_nzones == 0);
3974*efd4c9b6SSteve Lawrence 	(void) zsd_disable_cpu_stats();
3975*efd4c9b6SSteve Lawrence }
3976*efd4c9b6SSteve Lawrence 
3977*efd4c9b6SSteve Lawrence 
3978*efd4c9b6SSteve Lawrence /*
3979*efd4c9b6SSteve Lawrence  * Update the utilization data for all zones and processor sets.
3980*efd4c9b6SSteve Lawrence  */
3981*efd4c9b6SSteve Lawrence static int
3982*efd4c9b6SSteve Lawrence zsd_read(zsd_ctl_t *ctl, boolean_t init, boolean_t do_memory)
3983*efd4c9b6SSteve Lawrence {
3984*efd4c9b6SSteve Lawrence 	(void) kstat_chain_update(ctl->zsctl_kstat_ctl);
3985*efd4c9b6SSteve Lawrence 	(void) gettimeofday(&(ctl->zsctl_timeofday), NULL);
3986*efd4c9b6SSteve Lawrence 
3987*efd4c9b6SSteve Lawrence 	zsd_refresh_system(ctl);
3988*efd4c9b6SSteve Lawrence 
3989*efd4c9b6SSteve Lawrence 	/*
3990*efd4c9b6SSteve Lawrence 	 * Memory calculation is expensive.  Only update it on sample
3991*efd4c9b6SSteve Lawrence 	 * intervals.
3992*efd4c9b6SSteve Lawrence 	 */
3993*efd4c9b6SSteve Lawrence 	if (do_memory == B_TRUE)
3994*efd4c9b6SSteve Lawrence 		zsd_refresh_memory(ctl, init);
3995*efd4c9b6SSteve Lawrence 	zsd_refresh_zones(ctl);
3996*efd4c9b6SSteve Lawrence 	zsd_refresh_psets(ctl);
3997*efd4c9b6SSteve Lawrence 	zsd_refresh_procs(ctl, init);
3998*efd4c9b6SSteve Lawrence 	zsd_refresh_cpu_stats(ctl, init);
3999*efd4c9b6SSteve Lawrence 
4000*efd4c9b6SSteve Lawrence 	/*
4001*efd4c9b6SSteve Lawrence 	 * Delete objects that no longer exist.
4002*efd4c9b6SSteve Lawrence 	 * Pset usages must be deleted first as they point to zone and
4003*efd4c9b6SSteve Lawrence 	 * pset objects.
4004*efd4c9b6SSteve Lawrence 	 */
4005*efd4c9b6SSteve Lawrence 	zsd_mark_pset_usages_end(ctl);
4006*efd4c9b6SSteve Lawrence 	zsd_mark_psets_end(ctl);
4007*efd4c9b6SSteve Lawrence 	zsd_mark_cpus_end(ctl);
4008*efd4c9b6SSteve Lawrence 	zsd_mark_zones_end(ctl);
4009*efd4c9b6SSteve Lawrence 
4010*efd4c9b6SSteve Lawrence 	/*
4011*efd4c9b6SSteve Lawrence 	 * Save results for clients.
4012*efd4c9b6SSteve Lawrence 	 */
4013*efd4c9b6SSteve Lawrence 	zsd_usage_cache_update(ctl);
4014*efd4c9b6SSteve Lawrence 
4015*efd4c9b6SSteve Lawrence 	/*
4016*efd4c9b6SSteve Lawrence 	 * Roll process accounting file.
4017*efd4c9b6SSteve Lawrence 	 */
4018*efd4c9b6SSteve Lawrence 	(void) zsd_roll_exacct();
4019*efd4c9b6SSteve Lawrence 	return (0);
4020*efd4c9b6SSteve Lawrence }
4021*efd4c9b6SSteve Lawrence 
4022*efd4c9b6SSteve Lawrence /*
4023*efd4c9b6SSteve Lawrence  * Get the system rctl, which is the upper most limit
4024*efd4c9b6SSteve Lawrence  */
4025*efd4c9b6SSteve Lawrence static uint64_t
4026*efd4c9b6SSteve Lawrence zsd_get_system_rctl(char *name)
4027*efd4c9b6SSteve Lawrence {
4028*efd4c9b6SSteve Lawrence 	rctlblk_t *rblk, *rblk_last;
4029*efd4c9b6SSteve Lawrence 
4030*efd4c9b6SSteve Lawrence 	rblk = (rctlblk_t *)alloca(rctlblk_size());
4031*efd4c9b6SSteve Lawrence 	rblk_last = (rctlblk_t *)alloca(rctlblk_size());
4032*efd4c9b6SSteve Lawrence 
4033*efd4c9b6SSteve Lawrence 	if (getrctl(name, NULL, rblk_last, RCTL_FIRST) != 0)
4034*efd4c9b6SSteve Lawrence 		return (ZS_LIMIT_NONE);
4035*efd4c9b6SSteve Lawrence 
4036*efd4c9b6SSteve Lawrence 	while (getrctl(name, rblk_last, rblk, RCTL_NEXT) == 0)
4037*efd4c9b6SSteve Lawrence 		(void) bcopy(rblk, rblk_last, rctlblk_size());
4038*efd4c9b6SSteve Lawrence 
4039*efd4c9b6SSteve Lawrence 	return (rctlblk_get_value(rblk_last));
4040*efd4c9b6SSteve Lawrence }
4041*efd4c9b6SSteve Lawrence 
4042*efd4c9b6SSteve Lawrence /*
4043*efd4c9b6SSteve Lawrence  * Open any necessary subsystems for collecting utilization data,
4044*efd4c9b6SSteve Lawrence  * allocate and initialize data structures, and get initial utilization.
4045*efd4c9b6SSteve Lawrence  *
4046*efd4c9b6SSteve Lawrence  * Errors:
4047*efd4c9b6SSteve Lawrence  *	ENOMEM	out of memory
4048*efd4c9b6SSteve Lawrence  *	EINVAL  other error
4049*efd4c9b6SSteve Lawrence  */
4050*efd4c9b6SSteve Lawrence static zsd_ctl_t *
4051*efd4c9b6SSteve Lawrence zsd_open(zsd_ctl_t *ctl)
4052*efd4c9b6SSteve Lawrence {
4053*efd4c9b6SSteve Lawrence 	zsd_system_t *system;
4054*efd4c9b6SSteve Lawrence 
4055*efd4c9b6SSteve Lawrence 	char path[MAXPATHLEN];
4056*efd4c9b6SSteve Lawrence 	long pathmax;
4057*efd4c9b6SSteve Lawrence 	struct statvfs svfs;
4058*efd4c9b6SSteve Lawrence 	int ret;
4059*efd4c9b6SSteve Lawrence 	int i;
4060*efd4c9b6SSteve Lawrence 	size_t size;
4061*efd4c9b6SSteve Lawrence 	int err;
4062*efd4c9b6SSteve Lawrence 
4063*efd4c9b6SSteve Lawrence 	if (ctl == NULL && (ctl = (zsd_ctl_t *)calloc(1,
4064*efd4c9b6SSteve Lawrence 	    sizeof (zsd_ctl_t))) == NULL) {
4065*efd4c9b6SSteve Lawrence 			zsd_warn(gettext("Out of Memory"));
4066*efd4c9b6SSteve Lawrence 			errno = ENOMEM;
4067*efd4c9b6SSteve Lawrence 			goto err;
4068*efd4c9b6SSteve Lawrence 	}
4069*efd4c9b6SSteve Lawrence 	ctl->zsctl_proc_fd = -1;
4070*efd4c9b6SSteve Lawrence 
4071*efd4c9b6SSteve Lawrence 	/* open kstats */
4072*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_kstat_ctl == NULL &&
4073*efd4c9b6SSteve Lawrence 	    (ctl->zsctl_kstat_ctl = kstat_open()) == NULL) {
4074*efd4c9b6SSteve Lawrence 		err = errno;
4075*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to open kstats"));
4076*efd4c9b6SSteve Lawrence 		errno = err;
4077*efd4c9b6SSteve Lawrence 		if (errno != ENOMEM)
4078*efd4c9b6SSteve Lawrence 			errno = EAGAIN;
4079*efd4c9b6SSteve Lawrence 		goto err;
4080*efd4c9b6SSteve Lawrence 	}
4081*efd4c9b6SSteve Lawrence 
4082*efd4c9b6SSteve Lawrence 	/*
4083*efd4c9b6SSteve Lawrence 	 * These are set when the accounting file is opened by
4084*efd4c9b6SSteve Lawrence 	 * zsd_update_procs()
4085*efd4c9b6SSteve Lawrence 	 */
4086*efd4c9b6SSteve Lawrence 	ctl->zsctl_proc_fd = -1;
4087*efd4c9b6SSteve Lawrence 	ctl->zsctl_proc_fd_next = -1;
4088*efd4c9b6SSteve Lawrence 	ctl->zsctl_proc_open = 0;
4089*efd4c9b6SSteve Lawrence 	ctl->zsctl_proc_open_next = 0;
4090*efd4c9b6SSteve Lawrence 
4091*efd4c9b6SSteve Lawrence check_exacct:
4092*efd4c9b6SSteve Lawrence 	(void) zsd_enable_cpu_stats();
4093*efd4c9b6SSteve Lawrence 
4094*efd4c9b6SSteve Lawrence 	/* Create structures to track usage */
4095*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_system == NULL && (ctl->zsctl_system = (zsd_system_t *)
4096*efd4c9b6SSteve Lawrence 	    calloc(1, sizeof (zsd_system_t))) == NULL) {
4097*efd4c9b6SSteve Lawrence 		ret = -1;
4098*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Out of Memory"));
4099*efd4c9b6SSteve Lawrence 		errno = ENOMEM;
4100*efd4c9b6SSteve Lawrence 		goto err;
4101*efd4c9b6SSteve Lawrence 	}
4102*efd4c9b6SSteve Lawrence 	system = ctl->zsctl_system;
4103*efd4c9b6SSteve Lawrence 	/* get the kernel bitness to know structure layout for getvmusage */
4104*efd4c9b6SSteve Lawrence 	ret = sysinfo(SI_ARCHITECTURE_64, path, sizeof (path));
4105*efd4c9b6SSteve Lawrence 	if (ret < 0)
4106*efd4c9b6SSteve Lawrence 		ctl->zsctl_kern_bits = 32;
4107*efd4c9b6SSteve Lawrence 	else
4108*efd4c9b6SSteve Lawrence 		ctl->zsctl_kern_bits = 64;
4109*efd4c9b6SSteve Lawrence 	ctl->zsctl_pagesize = sysconf(_SC_PAGESIZE);
4110*efd4c9b6SSteve Lawrence 
4111*efd4c9b6SSteve Lawrence 	size = sysconf(_SC_CPUID_MAX);
4112*efd4c9b6SSteve Lawrence 	ctl->zsctl_maxcpuid = size;
4113*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_cpu_array == NULL && (ctl->zsctl_cpu_array =
4114*efd4c9b6SSteve Lawrence 	    (zsd_cpu_t *)calloc(size + 1, sizeof (zsd_cpu_t))) == NULL) {
4115*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Out of Memory"));
4116*efd4c9b6SSteve Lawrence 		errno = ENOMEM;
4117*efd4c9b6SSteve Lawrence 		goto err;
4118*efd4c9b6SSteve Lawrence 	}
4119*efd4c9b6SSteve Lawrence 	for (i = 0; i <= ctl->zsctl_maxcpuid; i++) {
4120*efd4c9b6SSteve Lawrence 		ctl->zsctl_cpu_array[i].zsc_id = i;
4121*efd4c9b6SSteve Lawrence 		ctl->zsctl_cpu_array[i].zsc_allocated = B_FALSE;
4122*efd4c9b6SSteve Lawrence 		ctl->zsctl_cpu_array[i].zsc_psetid = ZS_PSET_ERROR;
4123*efd4c9b6SSteve Lawrence 		ctl->zsctl_cpu_array[i].zsc_psetid_prev = ZS_PSET_ERROR;
4124*efd4c9b6SSteve Lawrence 	}
4125*efd4c9b6SSteve Lawrence 	if (statvfs("/proc", &svfs) != 0 ||
4126*efd4c9b6SSteve Lawrence 	    strcmp("/proc", svfs.f_fstr) != 0) {
4127*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("/proc not a procfs filesystem"));
4128*efd4c9b6SSteve Lawrence 		errno = EINVAL;
4129*efd4c9b6SSteve Lawrence 		goto err;
4130*efd4c9b6SSteve Lawrence 	}
4131*efd4c9b6SSteve Lawrence 
4132*efd4c9b6SSteve Lawrence 	size = sysconf(_SC_MAXPID) + 1;
4133*efd4c9b6SSteve Lawrence 	ctl->zsctl_maxproc = size;
4134*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_proc_array == NULL &&
4135*efd4c9b6SSteve Lawrence 	    (ctl->zsctl_proc_array = (zsd_proc_t *)calloc(size,
4136*efd4c9b6SSteve Lawrence 	    sizeof (zsd_proc_t))) == NULL) {
4137*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Out of Memory"));
4138*efd4c9b6SSteve Lawrence 		errno = ENOMEM;
4139*efd4c9b6SSteve Lawrence 		goto err;
4140*efd4c9b6SSteve Lawrence 	}
4141*efd4c9b6SSteve Lawrence 	for (i = 0; i <= ctl->zsctl_maxproc; i++) {
4142*efd4c9b6SSteve Lawrence 		list_link_init(&(ctl->zsctl_proc_array[i].zspr_next));
4143*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_array[i].zspr_psetid = ZS_PSET_ERROR;
4144*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_array[i].zspr_zoneid = -1;
4145*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_array[i].zspr_usage.tv_sec = 0;
4146*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_array[i].zspr_usage.tv_nsec = 0;
4147*efd4c9b6SSteve Lawrence 		ctl->zsctl_proc_array[i].zspr_ppid = -1;
4148*efd4c9b6SSteve Lawrence 	}
4149*efd4c9b6SSteve Lawrence 
4150*efd4c9b6SSteve Lawrence 	list_create(&ctl->zsctl_zones, sizeof (zsd_zone_t),
4151*efd4c9b6SSteve Lawrence 	    offsetof(zsd_zone_t, zsz_next));
4152*efd4c9b6SSteve Lawrence 
4153*efd4c9b6SSteve Lawrence 	list_create(&ctl->zsctl_psets, sizeof (zsd_pset_t),
4154*efd4c9b6SSteve Lawrence 	    offsetof(zsd_pset_t, zsp_next));
4155*efd4c9b6SSteve Lawrence 
4156*efd4c9b6SSteve Lawrence 	list_create(&ctl->zsctl_cpus, sizeof (zsd_cpu_t),
4157*efd4c9b6SSteve Lawrence 	    offsetof(zsd_cpu_t, zsc_next));
4158*efd4c9b6SSteve Lawrence 
4159*efd4c9b6SSteve Lawrence 	pathmax = pathconf("/proc", _PC_NAME_MAX);
4160*efd4c9b6SSteve Lawrence 	if (pathmax < 0) {
4161*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to determine max path of /proc"));
4162*efd4c9b6SSteve Lawrence 		errno = EINVAL;
4163*efd4c9b6SSteve Lawrence 		goto err;
4164*efd4c9b6SSteve Lawrence 	}
4165*efd4c9b6SSteve Lawrence 	size = sizeof (struct dirent) + pathmax + 1;
4166*efd4c9b6SSteve Lawrence 
4167*efd4c9b6SSteve Lawrence 	ctl->zsctl_procfs_dent_size = size;
4168*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_procfs_dent == NULL &&
4169*efd4c9b6SSteve Lawrence 	    (ctl->zsctl_procfs_dent = (struct dirent *)calloc(1, size))
4170*efd4c9b6SSteve Lawrence 	    == NULL) {
4171*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Out of Memory"));
4172*efd4c9b6SSteve Lawrence 		errno = ENOMEM;
4173*efd4c9b6SSteve Lawrence 		goto err;
4174*efd4c9b6SSteve Lawrence 	}
4175*efd4c9b6SSteve Lawrence 
4176*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_pool_conf == NULL &&
4177*efd4c9b6SSteve Lawrence 	    (ctl->zsctl_pool_conf = pool_conf_alloc()) == NULL) {
4178*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Out of Memory"));
4179*efd4c9b6SSteve Lawrence 		errno = ENOMEM;
4180*efd4c9b6SSteve Lawrence 		goto err;
4181*efd4c9b6SSteve Lawrence 	}
4182*efd4c9b6SSteve Lawrence 	ctl->zsctl_pool_status = POOL_DISABLED;
4183*efd4c9b6SSteve Lawrence 	ctl->zsctl_pool_changed = 0;
4184*efd4c9b6SSteve Lawrence 
4185*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_pool_vals[0] == NULL &&
4186*efd4c9b6SSteve Lawrence 	    (ctl->zsctl_pool_vals[0] = pool_value_alloc()) == NULL) {
4187*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Out of Memory"));
4188*efd4c9b6SSteve Lawrence 		errno = ENOMEM;
4189*efd4c9b6SSteve Lawrence 		goto err;
4190*efd4c9b6SSteve Lawrence 	}
4191*efd4c9b6SSteve Lawrence 	if (ctl->zsctl_pool_vals[1] == NULL &&
4192*efd4c9b6SSteve Lawrence 	    (ctl->zsctl_pool_vals[1] = pool_value_alloc()) == NULL) {
4193*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Out of Memory"));
4194*efd4c9b6SSteve Lawrence 		errno = ENOMEM;
4195*efd4c9b6SSteve Lawrence 		goto err;
4196*efd4c9b6SSteve Lawrence 	}
4197*efd4c9b6SSteve Lawrence 	ctl->zsctl_pool_vals[2] = NULL;
4198*efd4c9b6SSteve Lawrence 
4199*efd4c9b6SSteve Lawrence 	/*
4200*efd4c9b6SSteve Lawrence 	 * get system limits
4201*efd4c9b6SSteve Lawrence 	 */
4202*efd4c9b6SSteve Lawrence 	system->zss_maxpid = size = sysconf(_SC_MAXPID);
4203*efd4c9b6SSteve Lawrence 	system->zss_processes_max = zsd_get_system_rctl("zone.max-processes");
4204*efd4c9b6SSteve Lawrence 	system->zss_lwps_max = zsd_get_system_rctl("zone.max-lwps");
4205*efd4c9b6SSteve Lawrence 	system->zss_shm_max = zsd_get_system_rctl("zone.max-shm-memory");
4206*efd4c9b6SSteve Lawrence 	system->zss_shmids_max = zsd_get_system_rctl("zone.max-shm-ids");
4207*efd4c9b6SSteve Lawrence 	system->zss_semids_max = zsd_get_system_rctl("zone.max-sem-ids");
4208*efd4c9b6SSteve Lawrence 	system->zss_msgids_max = zsd_get_system_rctl("zone.max-msg-ids");
4209*efd4c9b6SSteve Lawrence 	system->zss_lofi_max = zsd_get_system_rctl("zone.max-lofi");
4210*efd4c9b6SSteve Lawrence 
4211*efd4c9b6SSteve Lawrence 	g_gen_next = 1;
4212*efd4c9b6SSteve Lawrence 
4213*efd4c9b6SSteve Lawrence 	if (zsd_read(ctl, B_TRUE, B_FALSE) != 0)
4214*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Reading zone statistics failed"));
4215*efd4c9b6SSteve Lawrence 
4216*efd4c9b6SSteve Lawrence 	return (ctl);
4217*efd4c9b6SSteve Lawrence err:
4218*efd4c9b6SSteve Lawrence 	if (ctl)
4219*efd4c9b6SSteve Lawrence 		zsd_close(ctl);
4220*efd4c9b6SSteve Lawrence 
4221*efd4c9b6SSteve Lawrence 	return (NULL);
4222*efd4c9b6SSteve Lawrence }
4223*efd4c9b6SSteve Lawrence 
4224*efd4c9b6SSteve Lawrence /* Copy utilization data to buffer, filtering data if non-global zone. */
4225*efd4c9b6SSteve Lawrence static void
4226*efd4c9b6SSteve Lawrence zsd_usage_filter(zoneid_t zid, zs_usage_cache_t *cache, zs_usage_t *usage,
4227*efd4c9b6SSteve Lawrence     boolean_t is_gz)
4228*efd4c9b6SSteve Lawrence {
4229*efd4c9b6SSteve Lawrence 	zs_usage_t *cusage;
4230*efd4c9b6SSteve Lawrence 	zs_system_t *sys, *csys;
4231*efd4c9b6SSteve Lawrence 	zs_zone_t *zone, *czone;
4232*efd4c9b6SSteve Lawrence 	zs_pset_t *pset, *cpset;
4233*efd4c9b6SSteve Lawrence 	zs_pset_zone_t *pz, *cpz, *foundpz;
4234*efd4c9b6SSteve Lawrence 	size_t size = 0, csize = 0;
4235*efd4c9b6SSteve Lawrence 	char *start, *cstart;
4236*efd4c9b6SSteve Lawrence 	int i, j;
4237*efd4c9b6SSteve Lawrence 	timestruc_t delta;
4238*efd4c9b6SSteve Lawrence 
4239*efd4c9b6SSteve Lawrence 	/* Privileged users in the global zone get everything */
4240*efd4c9b6SSteve Lawrence 	if (is_gz) {
4241*efd4c9b6SSteve Lawrence 		cusage = cache->zsuc_usage;
4242*efd4c9b6SSteve Lawrence 		(void) bcopy(cusage, usage, cusage->zsu_size);
4243*efd4c9b6SSteve Lawrence 		return;
4244*efd4c9b6SSteve Lawrence 	}
4245*efd4c9b6SSteve Lawrence 
4246*efd4c9b6SSteve Lawrence 	/* Zones just get their own usage */
4247*efd4c9b6SSteve Lawrence 	cusage = cache->zsuc_usage;
4248*efd4c9b6SSteve Lawrence 
4249*efd4c9b6SSteve Lawrence 	start = (char *)usage;
4250*efd4c9b6SSteve Lawrence 	cstart = (char *)cusage;
4251*efd4c9b6SSteve Lawrence 	size += sizeof (zs_usage_t);
4252*efd4c9b6SSteve Lawrence 	csize += sizeof (zs_usage_t);
4253*efd4c9b6SSteve Lawrence 
4254*efd4c9b6SSteve Lawrence 	usage->zsu_start = cusage->zsu_start;
4255*efd4c9b6SSteve Lawrence 	usage->zsu_hrstart = cusage->zsu_hrstart;
4256*efd4c9b6SSteve Lawrence 	usage->zsu_time = cusage->zsu_time;
4257*efd4c9b6SSteve Lawrence 	usage->zsu_hrtime = cusage->zsu_hrtime;
4258*efd4c9b6SSteve Lawrence 	usage->zsu_gen = cusage->zsu_gen;
4259*efd4c9b6SSteve Lawrence 	usage->zsu_nzones = 1;
4260*efd4c9b6SSteve Lawrence 	usage->zsu_npsets = 0;
4261*efd4c9b6SSteve Lawrence 
4262*efd4c9b6SSteve Lawrence 	/* LINTED */
4263*efd4c9b6SSteve Lawrence 	sys = (zs_system_t *)(start + size);
4264*efd4c9b6SSteve Lawrence 	/* LINTED */
4265*efd4c9b6SSteve Lawrence 	csys = (zs_system_t *)(cstart + csize);
4266*efd4c9b6SSteve Lawrence 	size += sizeof (zs_system_t);
4267*efd4c9b6SSteve Lawrence 	csize += sizeof (zs_system_t);
4268*efd4c9b6SSteve Lawrence 
4269*efd4c9b6SSteve Lawrence 	/* Save system limits but not usage */
4270*efd4c9b6SSteve Lawrence 	*sys = *csys;
4271*efd4c9b6SSteve Lawrence 	sys->zss_ncpus = 0;
4272*efd4c9b6SSteve Lawrence 	sys->zss_ncpus_online = 0;
4273*efd4c9b6SSteve Lawrence 
4274*efd4c9b6SSteve Lawrence 	/* LINTED */
4275*efd4c9b6SSteve Lawrence 	zone = (zs_zone_t *)(start + size);
4276*efd4c9b6SSteve Lawrence 	/* LINTED */
4277*efd4c9b6SSteve Lawrence 	czone = (zs_zone_t *)(cstart + csize);
4278*efd4c9b6SSteve Lawrence 	/* Find the matching zone */
4279*efd4c9b6SSteve Lawrence 	for (i = 0; i < cusage->zsu_nzones; i++) {
4280*efd4c9b6SSteve Lawrence 		if (czone->zsz_id == zid) {
4281*efd4c9b6SSteve Lawrence 			*zone = *czone;
4282*efd4c9b6SSteve Lawrence 			size += sizeof (zs_zone_t);
4283*efd4c9b6SSteve Lawrence 		}
4284*efd4c9b6SSteve Lawrence 		csize += sizeof (zs_zone_t);
4285*efd4c9b6SSteve Lawrence 		/* LINTED */
4286*efd4c9b6SSteve Lawrence 		czone = (zs_zone_t *)(cstart + csize);
4287*efd4c9b6SSteve Lawrence 	}
4288*efd4c9b6SSteve Lawrence 	sys->zss_ram_kern += (sys->zss_ram_zones - zone->zsz_usage_ram);
4289*efd4c9b6SSteve Lawrence 	sys->zss_ram_zones = zone->zsz_usage_ram;
4290*efd4c9b6SSteve Lawrence 
4291*efd4c9b6SSteve Lawrence 	sys->zss_vm_kern += (sys->zss_vm_zones - zone->zsz_usage_vm);
4292*efd4c9b6SSteve Lawrence 	sys->zss_vm_zones = zone->zsz_usage_vm;
4293*efd4c9b6SSteve Lawrence 
4294*efd4c9b6SSteve Lawrence 	sys->zss_locked_kern += (sys->zss_locked_zones -
4295*efd4c9b6SSteve Lawrence 	    zone->zsz_usage_locked);
4296*efd4c9b6SSteve Lawrence 	sys->zss_locked_zones = zone->zsz_usage_locked;
4297*efd4c9b6SSteve Lawrence 
4298*efd4c9b6SSteve Lawrence 	TIMESTRUC_DELTA(delta, sys->zss_cpu_usage_zones, zone->zsz_cpu_usage);
4299*efd4c9b6SSteve Lawrence 	TIMESTRUC_ADD_TIMESTRUC(sys->zss_cpu_usage_kern, delta);
4300*efd4c9b6SSteve Lawrence 	sys->zss_cpu_usage_zones = zone->zsz_cpu_usage;
4301*efd4c9b6SSteve Lawrence 
4302*efd4c9b6SSteve Lawrence 	/* LINTED */
4303*efd4c9b6SSteve Lawrence 	pset = (zs_pset_t *)(start + size);
4304*efd4c9b6SSteve Lawrence 	/* LINTED */
4305*efd4c9b6SSteve Lawrence 	cpset = (zs_pset_t *)(cstart + csize);
4306*efd4c9b6SSteve Lawrence 	for (i = 0; i < cusage->zsu_npsets; i++) {
4307*efd4c9b6SSteve Lawrence 		csize += sizeof (zs_pset_t);
4308*efd4c9b6SSteve Lawrence 		/* LINTED */
4309*efd4c9b6SSteve Lawrence 		cpz = (zs_pset_zone_t *)(csize + cstart);
4310*efd4c9b6SSteve Lawrence 		foundpz = NULL;
4311*efd4c9b6SSteve Lawrence 		for (j = 0; j < cpset->zsp_nusage; j++) {
4312*efd4c9b6SSteve Lawrence 			if (cpz->zspz_zoneid == zid)
4313*efd4c9b6SSteve Lawrence 				foundpz = cpz;
4314*efd4c9b6SSteve Lawrence 
4315*efd4c9b6SSteve Lawrence 			csize += sizeof (zs_pset_zone_t);
4316*efd4c9b6SSteve Lawrence 			/* LINTED */
4317*efd4c9b6SSteve Lawrence 			cpz = (zs_pset_zone_t *)(csize + cstart);
4318*efd4c9b6SSteve Lawrence 		}
4319*efd4c9b6SSteve Lawrence 		if (foundpz != NULL) {
4320*efd4c9b6SSteve Lawrence 			size += sizeof (zs_pset_t);
4321*efd4c9b6SSteve Lawrence 			/* LINTED */
4322*efd4c9b6SSteve Lawrence 			pz = (zs_pset_zone_t *)(start + size);
4323*efd4c9b6SSteve Lawrence 			size += sizeof (zs_pset_zone_t);
4324*efd4c9b6SSteve Lawrence 
4325*efd4c9b6SSteve Lawrence 			*pset = *cpset;
4326*efd4c9b6SSteve Lawrence 			*pz = *foundpz;
4327*efd4c9b6SSteve Lawrence 
4328*efd4c9b6SSteve Lawrence 			TIMESTRUC_DELTA(delta, pset->zsp_usage_zones,
4329*efd4c9b6SSteve Lawrence 			    pz->zspz_cpu_usage);
4330*efd4c9b6SSteve Lawrence 			TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_kern, delta);
4331*efd4c9b6SSteve Lawrence 			pset->zsp_usage_zones = pz->zspz_cpu_usage;
4332*efd4c9b6SSteve Lawrence 			pset->zsp_nusage = 1;
4333*efd4c9b6SSteve Lawrence 			usage->zsu_npsets++;
4334*efd4c9b6SSteve Lawrence 			sys->zss_ncpus += pset->zsp_size;
4335*efd4c9b6SSteve Lawrence 			sys->zss_ncpus_online += pset->zsp_online;
4336*efd4c9b6SSteve Lawrence 		}
4337*efd4c9b6SSteve Lawrence 		/* LINTED */
4338*efd4c9b6SSteve Lawrence 		cpset = (zs_pset_t *)(cstart + csize);
4339*efd4c9b6SSteve Lawrence 	}
4340*efd4c9b6SSteve Lawrence 	usage->zsu_size = size;
4341*efd4c9b6SSteve Lawrence }
4342*efd4c9b6SSteve Lawrence 
4343*efd4c9b6SSteve Lawrence /*
4344*efd4c9b6SSteve Lawrence  * Respond to new connections from libzonestat.so.  Also respond to zoneadmd,
4345*efd4c9b6SSteve Lawrence  * which reports new zones.
4346*efd4c9b6SSteve Lawrence  */
4347*efd4c9b6SSteve Lawrence /* ARGSUSED */
4348*efd4c9b6SSteve Lawrence static void
4349*efd4c9b6SSteve Lawrence zsd_server(void *cookie, char *argp, size_t arg_size,
4350*efd4c9b6SSteve Lawrence     door_desc_t *dp, uint_t n_desc)
4351*efd4c9b6SSteve Lawrence {
4352*efd4c9b6SSteve Lawrence 	int *args, cmd;
4353*efd4c9b6SSteve Lawrence 	door_desc_t door;
4354*efd4c9b6SSteve Lawrence 	ucred_t *ucred;
4355*efd4c9b6SSteve Lawrence 	const priv_set_t *eset;
4356*efd4c9b6SSteve Lawrence 
4357*efd4c9b6SSteve Lawrence 	if (argp == DOOR_UNREF_DATA) {
4358*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4359*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4360*efd4c9b6SSteve Lawrence 	}
4361*efd4c9b6SSteve Lawrence 
4362*efd4c9b6SSteve Lawrence 	if (arg_size != sizeof (cmd) * 2) {
4363*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4364*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4365*efd4c9b6SSteve Lawrence 	}
4366*efd4c9b6SSteve Lawrence 
4367*efd4c9b6SSteve Lawrence 	/* LINTED */
4368*efd4c9b6SSteve Lawrence 	args = (int *)argp;
4369*efd4c9b6SSteve Lawrence 	cmd = args[0];
4370*efd4c9b6SSteve Lawrence 
4371*efd4c9b6SSteve Lawrence 	/* If connection, return door to stat server */
4372*efd4c9b6SSteve Lawrence 	if (cmd == ZSD_CMD_CONNECT) {
4373*efd4c9b6SSteve Lawrence 
4374*efd4c9b6SSteve Lawrence 		/* Verify client compilation version */
4375*efd4c9b6SSteve Lawrence 		if (args[1] != ZS_VERSION) {
4376*efd4c9b6SSteve Lawrence 			args[1] = ZSD_STATUS_VERSION_MISMATCH;
4377*efd4c9b6SSteve Lawrence 			(void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4378*efd4c9b6SSteve Lawrence 			thr_exit(NULL);
4379*efd4c9b6SSteve Lawrence 		}
4380*efd4c9b6SSteve Lawrence 		ucred = alloca(ucred_size());
4381*efd4c9b6SSteve Lawrence 		/* Verify client permission */
4382*efd4c9b6SSteve Lawrence 		if (door_ucred(&ucred) != 0) {
4383*efd4c9b6SSteve Lawrence 			args[1] = ZSD_STATUS_INTERNAL_ERROR;
4384*efd4c9b6SSteve Lawrence 			(void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4385*efd4c9b6SSteve Lawrence 			thr_exit(NULL);
4386*efd4c9b6SSteve Lawrence 		}
4387*efd4c9b6SSteve Lawrence 
4388*efd4c9b6SSteve Lawrence 		eset = ucred_getprivset(ucred, PRIV_EFFECTIVE);
4389*efd4c9b6SSteve Lawrence 		if (eset == NULL) {
4390*efd4c9b6SSteve Lawrence 			args[1] = ZSD_STATUS_INTERNAL_ERROR;
4391*efd4c9b6SSteve Lawrence 			(void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4392*efd4c9b6SSteve Lawrence 			thr_exit(NULL);
4393*efd4c9b6SSteve Lawrence 		}
4394*efd4c9b6SSteve Lawrence 		if (!priv_ismember(eset, PRIV_PROC_INFO)) {
4395*efd4c9b6SSteve Lawrence 			args[1] = ZSD_STATUS_PERMISSION;
4396*efd4c9b6SSteve Lawrence 			(void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4397*efd4c9b6SSteve Lawrence 			thr_exit(NULL);
4398*efd4c9b6SSteve Lawrence 		}
4399*efd4c9b6SSteve Lawrence 
4400*efd4c9b6SSteve Lawrence 		/* Return stat server door */
4401*efd4c9b6SSteve Lawrence 		args[1] = ZSD_STATUS_OK;
4402*efd4c9b6SSteve Lawrence 		door.d_attributes = DOOR_DESCRIPTOR;
4403*efd4c9b6SSteve Lawrence 		door.d_data.d_desc.d_descriptor = g_stat_door;
4404*efd4c9b6SSteve Lawrence 		(void) door_return(argp, sizeof (cmd) * 2, &door, 1);
4405*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4406*efd4c9b6SSteve Lawrence 	}
4407*efd4c9b6SSteve Lawrence 
4408*efd4c9b6SSteve Lawrence 	/* Respond to zoneadmd informing zonestatd of a new zone */
4409*efd4c9b6SSteve Lawrence 	if (cmd == ZSD_CMD_NEW_ZONE) {
4410*efd4c9b6SSteve Lawrence 		zsd_fattach_zone(args[1], g_server_door, B_FALSE);
4411*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4412*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4413*efd4c9b6SSteve Lawrence 	}
4414*efd4c9b6SSteve Lawrence 
4415*efd4c9b6SSteve Lawrence 	args[1] = ZSD_STATUS_INTERNAL_ERROR;
4416*efd4c9b6SSteve Lawrence 	(void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4417*efd4c9b6SSteve Lawrence 	thr_exit(NULL);
4418*efd4c9b6SSteve Lawrence }
4419*efd4c9b6SSteve Lawrence 
4420*efd4c9b6SSteve Lawrence /*
4421*efd4c9b6SSteve Lawrence  * Respond to libzonestat.so clients with the current utlilzation data.
4422*efd4c9b6SSteve Lawrence  */
4423*efd4c9b6SSteve Lawrence /* ARGSUSED */
4424*efd4c9b6SSteve Lawrence static void
4425*efd4c9b6SSteve Lawrence zsd_stat_server(void *cookie, char *argp, size_t arg_size,
4426*efd4c9b6SSteve Lawrence     door_desc_t *dp, uint_t n_desc)
4427*efd4c9b6SSteve Lawrence {
4428*efd4c9b6SSteve Lawrence 	uint64_t *args, cmd;
4429*efd4c9b6SSteve Lawrence 	zs_usage_cache_t *cache;
4430*efd4c9b6SSteve Lawrence 	int ret;
4431*efd4c9b6SSteve Lawrence 	char *rvalp;
4432*efd4c9b6SSteve Lawrence 	size_t rvals;
4433*efd4c9b6SSteve Lawrence 	zs_usage_t *usage;
4434*efd4c9b6SSteve Lawrence 	ucred_t *ucred;
4435*efd4c9b6SSteve Lawrence 	zoneid_t zoneid;
4436*efd4c9b6SSteve Lawrence 	const priv_set_t *eset;
4437*efd4c9b6SSteve Lawrence 	boolean_t is_gz = B_FALSE;
4438*efd4c9b6SSteve Lawrence 
4439*efd4c9b6SSteve Lawrence 	/* Tell stat thread there are no more clients */
4440*efd4c9b6SSteve Lawrence 	if (argp == DOOR_UNREF_DATA) {
4441*efd4c9b6SSteve Lawrence 		(void) mutex_lock(&g_usage_cache_lock);
4442*efd4c9b6SSteve Lawrence 		g_hasclient = B_FALSE;
4443*efd4c9b6SSteve Lawrence 		(void) cond_signal(&g_usage_cache_kick);
4444*efd4c9b6SSteve Lawrence 		(void) mutex_unlock(&g_usage_cache_lock);
4445*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4446*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4447*efd4c9b6SSteve Lawrence 	}
4448*efd4c9b6SSteve Lawrence 	if (arg_size != sizeof (cmd) * 2) {
4449*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4450*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4451*efd4c9b6SSteve Lawrence 	}
4452*efd4c9b6SSteve Lawrence 	/* LINTED */
4453*efd4c9b6SSteve Lawrence 	args = (uint64_t *)argp;
4454*efd4c9b6SSteve Lawrence 	cmd = args[0];
4455*efd4c9b6SSteve Lawrence 	if (cmd != ZSD_CMD_READ) {
4456*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4457*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4458*efd4c9b6SSteve Lawrence 	}
4459*efd4c9b6SSteve Lawrence 	ucred = alloca(ucred_size());
4460*efd4c9b6SSteve Lawrence 	if (door_ucred(&ucred) != 0) {
4461*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4462*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4463*efd4c9b6SSteve Lawrence 	}
4464*efd4c9b6SSteve Lawrence 	zoneid = ucred_getzoneid(ucred);
4465*efd4c9b6SSteve Lawrence 
4466*efd4c9b6SSteve Lawrence 	if (zoneid == GLOBAL_ZONEID)
4467*efd4c9b6SSteve Lawrence 		is_gz = B_TRUE;
4468*efd4c9b6SSteve Lawrence 
4469*efd4c9b6SSteve Lawrence 	eset = ucred_getprivset(ucred, PRIV_EFFECTIVE);
4470*efd4c9b6SSteve Lawrence 	if (eset == NULL) {
4471*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4472*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4473*efd4c9b6SSteve Lawrence 	}
4474*efd4c9b6SSteve Lawrence 	if (!priv_ismember(eset, PRIV_PROC_INFO)) {
4475*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4476*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4477*efd4c9b6SSteve Lawrence 	}
4478*efd4c9b6SSteve Lawrence 	(void) mutex_lock(&g_usage_cache_lock);
4479*efd4c9b6SSteve Lawrence 	g_hasclient = B_TRUE;
4480*efd4c9b6SSteve Lawrence 
4481*efd4c9b6SSteve Lawrence 	/*
4482*efd4c9b6SSteve Lawrence 	 * Force a new cpu calculation for client.  This will force a
4483*efd4c9b6SSteve Lawrence 	 * new memory calculation if the memory data is older than the
4484*efd4c9b6SSteve Lawrence 	 * sample period.
4485*efd4c9b6SSteve Lawrence 	 */
4486*efd4c9b6SSteve Lawrence 	g_usage_cache_kickers++;
4487*efd4c9b6SSteve Lawrence 	(void) cond_signal(&g_usage_cache_kick);
4488*efd4c9b6SSteve Lawrence 	ret = cond_wait(&g_usage_cache_wait, &g_usage_cache_lock);
4489*efd4c9b6SSteve Lawrence 	g_usage_cache_kickers--;
4490*efd4c9b6SSteve Lawrence 	if (ret != 0 && errno == EINTR) {
4491*efd4c9b6SSteve Lawrence 		(void) mutex_unlock(&g_usage_cache_lock);
4492*efd4c9b6SSteve Lawrence 		zsd_warn(gettext(
4493*efd4c9b6SSteve Lawrence 		    "Interrupted before writing usage size to client\n"));
4494*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4495*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4496*efd4c9b6SSteve Lawrence 	}
4497*efd4c9b6SSteve Lawrence 	cache = zsd_usage_cache_hold_locked();
4498*efd4c9b6SSteve Lawrence 	if (cache == NULL) {
4499*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Usage cache empty.\n"));
4500*efd4c9b6SSteve Lawrence 		(void) door_return(NULL, 0, NULL, 0);
4501*efd4c9b6SSteve Lawrence 		thr_exit(NULL);
4502*efd4c9b6SSteve Lawrence 	}
4503*efd4c9b6SSteve Lawrence 	(void) mutex_unlock(&g_usage_cache_lock);
4504*efd4c9b6SSteve Lawrence 
4505*efd4c9b6SSteve Lawrence 	/* Copy current usage data to stack to send to client */
4506*efd4c9b6SSteve Lawrence 	usage = (zs_usage_t *)alloca(cache->zsuc_size);
4507*efd4c9b6SSteve Lawrence 
4508*efd4c9b6SSteve Lawrence 	/* Filter out results if caller is non-global zone */
4509*efd4c9b6SSteve Lawrence 	zsd_usage_filter(zoneid, cache, usage, is_gz);
4510*efd4c9b6SSteve Lawrence 
4511*efd4c9b6SSteve Lawrence 	rvalp = (void *)usage;
4512*efd4c9b6SSteve Lawrence 	rvals = usage->zsu_size;
4513*efd4c9b6SSteve Lawrence 	zsd_usage_cache_rele(cache);
4514*efd4c9b6SSteve Lawrence 
4515*efd4c9b6SSteve Lawrence 	(void) door_return(rvalp, rvals, 0, NULL);
4516*efd4c9b6SSteve Lawrence 	thr_exit(NULL);
4517*efd4c9b6SSteve Lawrence }
4518*efd4c9b6SSteve Lawrence 
4519*efd4c9b6SSteve Lawrence static volatile boolean_t g_quit;
4520*efd4c9b6SSteve Lawrence 
4521*efd4c9b6SSteve Lawrence /* ARGSUSED */
4522*efd4c9b6SSteve Lawrence static void
4523*efd4c9b6SSteve Lawrence zonestat_quithandler(int sig)
4524*efd4c9b6SSteve Lawrence {
4525*efd4c9b6SSteve Lawrence 	g_quit = B_TRUE;
4526*efd4c9b6SSteve Lawrence }
4527*efd4c9b6SSteve Lawrence 
4528*efd4c9b6SSteve Lawrence /*
4529*efd4c9b6SSteve Lawrence  * The stat thread generates new utilization data when clients request
4530*efd4c9b6SSteve Lawrence  * it.  It also manages opening and closing the subsystems used to gather
4531*efd4c9b6SSteve Lawrence  * data depending on if clients exist.
4532*efd4c9b6SSteve Lawrence  */
4533*efd4c9b6SSteve Lawrence /* ARGSUSED */
4534*efd4c9b6SSteve Lawrence void *
4535*efd4c9b6SSteve Lawrence stat_thread(void *arg)
4536*efd4c9b6SSteve Lawrence {
4537*efd4c9b6SSteve Lawrence 	time_t start;
4538*efd4c9b6SSteve Lawrence 	time_t now;
4539*efd4c9b6SSteve Lawrence 	time_t next_memory;
4540*efd4c9b6SSteve Lawrence 	boolean_t do_memory;
4541*efd4c9b6SSteve Lawrence 	boolean_t do_read;
4542*efd4c9b6SSteve Lawrence 	boolean_t do_close;
4543*efd4c9b6SSteve Lawrence 
4544*efd4c9b6SSteve Lawrence 	start = time(NULL);
4545*efd4c9b6SSteve Lawrence 	if (start < 0) {
4546*efd4c9b6SSteve Lawrence 		if (g_quit == B_TRUE)
4547*efd4c9b6SSteve Lawrence 			goto quit;
4548*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to fetch current time"));
4549*efd4c9b6SSteve Lawrence 		g_quit = B_TRUE;
4550*efd4c9b6SSteve Lawrence 		goto quit;
4551*efd4c9b6SSteve Lawrence 	}
4552*efd4c9b6SSteve Lawrence 
4553*efd4c9b6SSteve Lawrence 	next_memory = start;
4554*efd4c9b6SSteve Lawrence 	while (g_quit == B_FALSE) {
4555*efd4c9b6SSteve Lawrence 		for (;;) {
4556*efd4c9b6SSteve Lawrence 			/*
4557*efd4c9b6SSteve Lawrence 			 * These are used to decide if the most recent memory
4558*efd4c9b6SSteve Lawrence 			 * calculation was within a sample interval,
4559*efd4c9b6SSteve Lawrence 			 * and weather or not the usage collection needs to
4560*efd4c9b6SSteve Lawrence 			 * be opened or closed.
4561*efd4c9b6SSteve Lawrence 			 */
4562*efd4c9b6SSteve Lawrence 			do_memory = B_FALSE;
4563*efd4c9b6SSteve Lawrence 			do_read = B_FALSE;
4564*efd4c9b6SSteve Lawrence 			do_close = B_FALSE;
4565*efd4c9b6SSteve Lawrence 
4566*efd4c9b6SSteve Lawrence 			/*
4567*efd4c9b6SSteve Lawrence 			 * If all clients have gone, close usage collecting
4568*efd4c9b6SSteve Lawrence 			 */
4569*efd4c9b6SSteve Lawrence 			(void) mutex_lock(&g_usage_cache_lock);
4570*efd4c9b6SSteve Lawrence 			if (!g_hasclient && g_open == B_TRUE) {
4571*efd4c9b6SSteve Lawrence 				do_close = B_TRUE;
4572*efd4c9b6SSteve Lawrence 				(void) mutex_unlock(&g_usage_cache_lock);
4573*efd4c9b6SSteve Lawrence 				break;
4574*efd4c9b6SSteve Lawrence 			}
4575*efd4c9b6SSteve Lawrence 			if (g_quit == B_TRUE) {
4576*efd4c9b6SSteve Lawrence 				(void) mutex_unlock(
4577*efd4c9b6SSteve Lawrence 				    &g_usage_cache_lock);
4578*efd4c9b6SSteve Lawrence 				break;
4579*efd4c9b6SSteve Lawrence 			}
4580*efd4c9b6SSteve Lawrence 			/*
4581*efd4c9b6SSteve Lawrence 			 * Wait for a usage data request
4582*efd4c9b6SSteve Lawrence 			 */
4583*efd4c9b6SSteve Lawrence 			if (g_usage_cache_kickers == 0) {
4584*efd4c9b6SSteve Lawrence 				(void) cond_wait(&g_usage_cache_kick,
4585*efd4c9b6SSteve Lawrence 				    &g_usage_cache_lock);
4586*efd4c9b6SSteve Lawrence 			}
4587*efd4c9b6SSteve Lawrence 			now = time(NULL);
4588*efd4c9b6SSteve Lawrence 			if (now < 0) {
4589*efd4c9b6SSteve Lawrence 				if (g_quit == B_TRUE) {
4590*efd4c9b6SSteve Lawrence 					(void) mutex_unlock(
4591*efd4c9b6SSteve Lawrence 					    &g_usage_cache_lock);
4592*efd4c9b6SSteve Lawrence 					goto quit;
4593*efd4c9b6SSteve Lawrence 				}
4594*efd4c9b6SSteve Lawrence 				g_quit = B_TRUE;
4595*efd4c9b6SSteve Lawrence 				(void) mutex_unlock(&g_usage_cache_lock);
4596*efd4c9b6SSteve Lawrence 				zsd_warn(gettext(
4597*efd4c9b6SSteve Lawrence 				    "Unable to fetch current time"));
4598*efd4c9b6SSteve Lawrence 				goto quit;
4599*efd4c9b6SSteve Lawrence 			}
4600*efd4c9b6SSteve Lawrence 			if (g_hasclient) {
4601*efd4c9b6SSteve Lawrence 				do_read = B_TRUE;
4602*efd4c9b6SSteve Lawrence 				if (now >= next_memory) {
4603*efd4c9b6SSteve Lawrence 					do_memory = B_TRUE;
4604*efd4c9b6SSteve Lawrence 					next_memory = now + g_interval;
4605*efd4c9b6SSteve Lawrence 				}
4606*efd4c9b6SSteve Lawrence 			} else {
4607*efd4c9b6SSteve Lawrence 				do_close = B_TRUE;
4608*efd4c9b6SSteve Lawrence 			}
4609*efd4c9b6SSteve Lawrence 			(void) mutex_unlock(&g_usage_cache_lock);
4610*efd4c9b6SSteve Lawrence 			if (do_read || do_close)
4611*efd4c9b6SSteve Lawrence 				break;
4612*efd4c9b6SSteve Lawrence 		}
4613*efd4c9b6SSteve Lawrence 		g_now = now;
4614*efd4c9b6SSteve Lawrence 		g_hrnow = gethrtime();
4615*efd4c9b6SSteve Lawrence 		if (g_hasclient && g_open == B_FALSE) {
4616*efd4c9b6SSteve Lawrence 			g_start = g_now;
4617*efd4c9b6SSteve Lawrence 			g_hrstart = g_hrnow;
4618*efd4c9b6SSteve Lawrence 			g_ctl = zsd_open(g_ctl);
4619*efd4c9b6SSteve Lawrence 			if (g_ctl == NULL)
4620*efd4c9b6SSteve Lawrence 				zsd_warn(gettext(
4621*efd4c9b6SSteve Lawrence 				    "Unable to open zone statistics"));
4622*efd4c9b6SSteve Lawrence 			else
4623*efd4c9b6SSteve Lawrence 				g_open = B_TRUE;
4624*efd4c9b6SSteve Lawrence 		}
4625*efd4c9b6SSteve Lawrence 		if (do_read && g_ctl) {
4626*efd4c9b6SSteve Lawrence 			if (zsd_read(g_ctl, B_FALSE, do_memory) != 0) {
4627*efd4c9b6SSteve Lawrence 				zsd_warn(gettext(
4628*efd4c9b6SSteve Lawrence 				    "Unable to read zone statistics"));
4629*efd4c9b6SSteve Lawrence 				g_quit = B_TRUE;
4630*efd4c9b6SSteve Lawrence 				return (NULL);
4631*efd4c9b6SSteve Lawrence 			}
4632*efd4c9b6SSteve Lawrence 		}
4633*efd4c9b6SSteve Lawrence 		(void) mutex_lock(&g_usage_cache_lock);
4634*efd4c9b6SSteve Lawrence 		if (!g_hasclient && g_open == B_TRUE && g_ctl) {
4635*efd4c9b6SSteve Lawrence 			(void) mutex_unlock(&g_usage_cache_lock);
4636*efd4c9b6SSteve Lawrence 			zsd_close(g_ctl);
4637*efd4c9b6SSteve Lawrence 			g_open = B_FALSE;
4638*efd4c9b6SSteve Lawrence 		} else {
4639*efd4c9b6SSteve Lawrence 			(void) mutex_unlock(&g_usage_cache_lock);
4640*efd4c9b6SSteve Lawrence 		}
4641*efd4c9b6SSteve Lawrence 	}
4642*efd4c9b6SSteve Lawrence quit:
4643*efd4c9b6SSteve Lawrence 	if (g_open)
4644*efd4c9b6SSteve Lawrence 		zsd_close(g_ctl);
4645*efd4c9b6SSteve Lawrence 
4646*efd4c9b6SSteve Lawrence 	(void) thr_kill(g_main, SIGINT);
4647*efd4c9b6SSteve Lawrence 	thr_exit(NULL);
4648*efd4c9b6SSteve Lawrence 	return (NULL);
4649*efd4c9b6SSteve Lawrence }
4650*efd4c9b6SSteve Lawrence 
4651*efd4c9b6SSteve Lawrence void
4652*efd4c9b6SSteve Lawrence zsd_set_fx()
4653*efd4c9b6SSteve Lawrence {
4654*efd4c9b6SSteve Lawrence 	pcinfo_t pcinfo;
4655*efd4c9b6SSteve Lawrence 	pcparms_t pcparms;
4656*efd4c9b6SSteve Lawrence 
4657*efd4c9b6SSteve Lawrence 	(void) strlcpy(pcinfo.pc_clname, "FX", sizeof (pcinfo.pc_clname));
4658*efd4c9b6SSteve Lawrence 	if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) == -1) {
4659*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("cannot get FX class parameters"));
4660*efd4c9b6SSteve Lawrence 		return;
4661*efd4c9b6SSteve Lawrence 	}
4662*efd4c9b6SSteve Lawrence 	pcparms.pc_cid = pcinfo.pc_cid;
4663*efd4c9b6SSteve Lawrence 	((fxparms_t *)pcparms.pc_clparms)->fx_upri = 60;
4664*efd4c9b6SSteve Lawrence 	((fxparms_t *)pcparms.pc_clparms)->fx_uprilim = 60;
4665*efd4c9b6SSteve Lawrence 	((fxparms_t *)pcparms.pc_clparms)->fx_tqsecs = 0;
4666*efd4c9b6SSteve Lawrence 	((fxparms_t *)pcparms.pc_clparms)->fx_tqnsecs = FX_NOCHANGE;
4667*efd4c9b6SSteve Lawrence 	if (priocntl(P_PID, getpid(), PC_SETPARMS, (caddr_t)&pcparms) == -1)
4668*efd4c9b6SSteve Lawrence 		zsd_warn(gettext("cannot enter the FX class"));
4669*efd4c9b6SSteve Lawrence }
4670*efd4c9b6SSteve Lawrence 
4671*efd4c9b6SSteve Lawrence static int pipe_fd;
4672*efd4c9b6SSteve Lawrence 
4673*efd4c9b6SSteve Lawrence static void
4674*efd4c9b6SSteve Lawrence daemonize_ready(char status)
4675*efd4c9b6SSteve Lawrence {
4676*efd4c9b6SSteve Lawrence 	/*
4677*efd4c9b6SSteve Lawrence 	 * wake the parent with a clue
4678*efd4c9b6SSteve Lawrence 	 */
4679*efd4c9b6SSteve Lawrence 	(void) write(pipe_fd, &status, 1);
4680*efd4c9b6SSteve Lawrence 	(void) close(pipe_fd);
4681*efd4c9b6SSteve Lawrence }
4682*efd4c9b6SSteve Lawrence 
4683*efd4c9b6SSteve Lawrence static int
4684*efd4c9b6SSteve Lawrence daemonize_start(void)
4685*efd4c9b6SSteve Lawrence {
4686*efd4c9b6SSteve Lawrence 	char data;
4687*efd4c9b6SSteve Lawrence 	int status;
4688*efd4c9b6SSteve Lawrence 
4689*efd4c9b6SSteve Lawrence 	int filedes[2];
4690*efd4c9b6SSteve Lawrence 	pid_t pid;
4691*efd4c9b6SSteve Lawrence 
4692*efd4c9b6SSteve Lawrence 	(void) close(0);
4693*efd4c9b6SSteve Lawrence 	(void) dup2(2, 1);
4694*efd4c9b6SSteve Lawrence 
4695*efd4c9b6SSteve Lawrence 	if (pipe(filedes) < 0)
4696*efd4c9b6SSteve Lawrence 		return (-1);
4697*efd4c9b6SSteve Lawrence 
4698*efd4c9b6SSteve Lawrence 	(void) fflush(NULL);
4699*efd4c9b6SSteve Lawrence 
4700*efd4c9b6SSteve Lawrence 	if ((pid = fork1()) < 0)
4701*efd4c9b6SSteve Lawrence 		return (-1);
4702*efd4c9b6SSteve Lawrence 
4703*efd4c9b6SSteve Lawrence 	if (pid != 0) {
4704*efd4c9b6SSteve Lawrence 		/*
4705*efd4c9b6SSteve Lawrence 		 * parent
4706*efd4c9b6SSteve Lawrence 		 */
4707*efd4c9b6SSteve Lawrence 		struct sigaction act;
4708*efd4c9b6SSteve Lawrence 
4709*efd4c9b6SSteve Lawrence 		act.sa_sigaction = SIG_DFL;
4710*efd4c9b6SSteve Lawrence 		(void) sigemptyset(&act.sa_mask);
4711*efd4c9b6SSteve Lawrence 		act.sa_flags = 0;
4712*efd4c9b6SSteve Lawrence 
4713*efd4c9b6SSteve Lawrence 		(void) sigaction(SIGPIPE, &act, NULL);  /* ignore SIGPIPE */
4714*efd4c9b6SSteve Lawrence 
4715*efd4c9b6SSteve Lawrence 		(void) close(filedes[1]);
4716*efd4c9b6SSteve Lawrence 		if (read(filedes[0], &data, 1) == 1) {
4717*efd4c9b6SSteve Lawrence 			/* forward ready code via exit status */
4718*efd4c9b6SSteve Lawrence 			exit(data);
4719*efd4c9b6SSteve Lawrence 		}
4720*efd4c9b6SSteve Lawrence 		status = -1;
4721*efd4c9b6SSteve Lawrence 		(void) wait4(pid, &status, 0, NULL);
4722*efd4c9b6SSteve Lawrence 		/* daemon process exited before becoming ready */
4723*efd4c9b6SSteve Lawrence 		if (WIFEXITED(status)) {
4724*efd4c9b6SSteve Lawrence 			/* assume daemon process printed useful message */
4725*efd4c9b6SSteve Lawrence 			exit(WEXITSTATUS(status));
4726*efd4c9b6SSteve Lawrence 		} else {
4727*efd4c9b6SSteve Lawrence 			zsd_warn(gettext("daemon process killed or died"));
4728*efd4c9b6SSteve Lawrence 			exit(1);
4729*efd4c9b6SSteve Lawrence 		}
4730*efd4c9b6SSteve Lawrence 	}
4731*efd4c9b6SSteve Lawrence 
4732*efd4c9b6SSteve Lawrence 	/*
4733*efd4c9b6SSteve Lawrence 	 * child
4734*efd4c9b6SSteve Lawrence 	 */
4735*efd4c9b6SSteve Lawrence 	pipe_fd = filedes[1];
4736*efd4c9b6SSteve Lawrence 	(void) close(filedes[0]);
4737*efd4c9b6SSteve Lawrence 
4738*efd4c9b6SSteve Lawrence 	/*
4739*efd4c9b6SSteve Lawrence 	 * generic Unix setup
4740*efd4c9b6SSteve Lawrence 	 */
4741*efd4c9b6SSteve Lawrence 	(void) setsid();
4742*efd4c9b6SSteve Lawrence 	(void) umask(0000);
4743*efd4c9b6SSteve Lawrence 
4744*efd4c9b6SSteve Lawrence 	return (0);
4745*efd4c9b6SSteve Lawrence }
4746*efd4c9b6SSteve Lawrence 
4747*efd4c9b6SSteve Lawrence static void
4748*efd4c9b6SSteve Lawrence fattach_all_zones(boolean_t detach_only)
4749*efd4c9b6SSteve Lawrence {
4750*efd4c9b6SSteve Lawrence 	zoneid_t *zids;
4751*efd4c9b6SSteve Lawrence 	uint_t nzids, nzids_last;
4752*efd4c9b6SSteve Lawrence 	int i;
4753*efd4c9b6SSteve Lawrence 
4754*efd4c9b6SSteve Lawrence again:
4755*efd4c9b6SSteve Lawrence 	(void) zone_list(NULL, &nzids);
4756*efd4c9b6SSteve Lawrence 	nzids_last = nzids;
4757*efd4c9b6SSteve Lawrence 	zids = (zoneid_t *)malloc(sizeof (zoneid_t) * nzids_last);
4758*efd4c9b6SSteve Lawrence 	if (zids == NULL)
4759*efd4c9b6SSteve Lawrence 		zsd_error(gettext("Out of memory"));
4760*efd4c9b6SSteve Lawrence 
4761*efd4c9b6SSteve Lawrence 	(void) zone_list(zids, &nzids);
4762*efd4c9b6SSteve Lawrence 	if (nzids > nzids_last) {
4763*efd4c9b6SSteve Lawrence 		free(zids);
4764*efd4c9b6SSteve Lawrence 		goto again;
4765*efd4c9b6SSteve Lawrence 	}
4766*efd4c9b6SSteve Lawrence 	for (i = 0; i < nzids; i++)
4767*efd4c9b6SSteve Lawrence 		zsd_fattach_zone(zids[i], g_server_door, detach_only);
4768*efd4c9b6SSteve Lawrence 
4769*efd4c9b6SSteve Lawrence 	free(zids);
4770*efd4c9b6SSteve Lawrence }
4771*efd4c9b6SSteve Lawrence 
4772*efd4c9b6SSteve Lawrence int
4773*efd4c9b6SSteve Lawrence main(int argc, char *argv[])
4774*efd4c9b6SSteve Lawrence {
4775*efd4c9b6SSteve Lawrence 
4776*efd4c9b6SSteve Lawrence 	int arg;
4777*efd4c9b6SSteve Lawrence 	thread_t tid;
4778*efd4c9b6SSteve Lawrence 	scf_simple_prop_t *prop;
4779*efd4c9b6SSteve Lawrence 	uint64_t *intervalp;
4780*efd4c9b6SSteve Lawrence 	boolean_t opt_cleanup = B_FALSE;
4781*efd4c9b6SSteve Lawrence 
4782*efd4c9b6SSteve Lawrence 	g_main = thr_self();
4783*efd4c9b6SSteve Lawrence 	g_quit = B_FALSE;
4784*efd4c9b6SSteve Lawrence 	(void) signal(SIGINT, zonestat_quithandler);
4785*efd4c9b6SSteve Lawrence 	(void) signal(SIGTERM, zonestat_quithandler);
4786*efd4c9b6SSteve Lawrence 	(void) signal(SIGHUP, zonestat_quithandler);
4787*efd4c9b6SSteve Lawrence /*	(void) sigignore(SIGCHLD); */
4788*efd4c9b6SSteve Lawrence 	(void) sigignore(SIGPIPE);
4789*efd4c9b6SSteve Lawrence 
4790*efd4c9b6SSteve Lawrence 	if (getzoneid() != GLOBAL_ZONEID)
4791*efd4c9b6SSteve Lawrence 		zsd_error(gettext("Must be run from global zone only"));
4792*efd4c9b6SSteve Lawrence 
4793*efd4c9b6SSteve Lawrence 	while ((arg = getopt(argc, argv, "c"))
4794*efd4c9b6SSteve Lawrence 	    != EOF) {
4795*efd4c9b6SSteve Lawrence 		switch (arg) {
4796*efd4c9b6SSteve Lawrence 		case 'c':
4797*efd4c9b6SSteve Lawrence 			opt_cleanup = B_TRUE;
4798*efd4c9b6SSteve Lawrence 			break;
4799*efd4c9b6SSteve Lawrence 		default:
4800*efd4c9b6SSteve Lawrence 			zsd_error(gettext("Invalid option"));
4801*efd4c9b6SSteve Lawrence 		}
4802*efd4c9b6SSteve Lawrence 	}
4803*efd4c9b6SSteve Lawrence 
4804*efd4c9b6SSteve Lawrence 	if (opt_cleanup) {
4805*efd4c9b6SSteve Lawrence 		if (zsd_disable_cpu_stats() != 0)
4806*efd4c9b6SSteve Lawrence 			exit(1);
4807*efd4c9b6SSteve Lawrence 		else
4808*efd4c9b6SSteve Lawrence 			exit(0);
4809*efd4c9b6SSteve Lawrence 	}
4810*efd4c9b6SSteve Lawrence 
4811*efd4c9b6SSteve Lawrence 	/* Get the configured sample interval */
4812*efd4c9b6SSteve Lawrence 	prop = scf_simple_prop_get(NULL, "svc:/system/zones-monitoring:default",
4813*efd4c9b6SSteve Lawrence 	    "config", "sample_interval");
4814*efd4c9b6SSteve Lawrence 	if (prop == NULL)
4815*efd4c9b6SSteve Lawrence 		zsd_error(gettext("Unable to fetch SMF property "
4816*efd4c9b6SSteve Lawrence 		    "\"config/sample_interval\""));
4817*efd4c9b6SSteve Lawrence 
4818*efd4c9b6SSteve Lawrence 	if (scf_simple_prop_type(prop) != SCF_TYPE_COUNT)
4819*efd4c9b6SSteve Lawrence 		zsd_error(gettext("Malformed SMF property "
4820*efd4c9b6SSteve Lawrence 		    "\"config/sample_interval\".  Must be of type \"count\""));
4821*efd4c9b6SSteve Lawrence 
4822*efd4c9b6SSteve Lawrence 	intervalp = scf_simple_prop_next_count(prop);
4823*efd4c9b6SSteve Lawrence 	g_interval = *intervalp;
4824*efd4c9b6SSteve Lawrence 	if (g_interval == 0)
4825*efd4c9b6SSteve Lawrence 		zsd_error(gettext("Malformed SMF property "
4826*efd4c9b6SSteve Lawrence 		    "\"config/sample_interval\".  Must be greater than zero"));
4827*efd4c9b6SSteve Lawrence 
4828*efd4c9b6SSteve Lawrence 	scf_simple_prop_free(prop);
4829*efd4c9b6SSteve Lawrence 
4830*efd4c9b6SSteve Lawrence 	if (daemonize_start() < 0)
4831*efd4c9b6SSteve Lawrence 		zsd_error(gettext("Unable to start daemon\n"));
4832*efd4c9b6SSteve Lawrence 
4833*efd4c9b6SSteve Lawrence 	/* Run at high priority */
4834*efd4c9b6SSteve Lawrence 	zsd_set_fx();
4835*efd4c9b6SSteve Lawrence 
4836*efd4c9b6SSteve Lawrence 	(void) mutex_init(&g_usage_cache_lock, USYNC_THREAD, NULL);
4837*efd4c9b6SSteve Lawrence 	(void) cond_init(&g_usage_cache_kick, USYNC_THREAD, NULL);
4838*efd4c9b6SSteve Lawrence 	(void) cond_init(&g_usage_cache_wait, USYNC_THREAD, NULL);
4839*efd4c9b6SSteve Lawrence 
4840*efd4c9b6SSteve Lawrence 	g_server_door = door_create(zsd_server, NULL,
4841*efd4c9b6SSteve Lawrence 	    DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
4842*efd4c9b6SSteve Lawrence 	if (g_server_door < 0)
4843*efd4c9b6SSteve Lawrence 		zsd_error(gettext("Unable to create server door\n"));
4844*efd4c9b6SSteve Lawrence 
4845*efd4c9b6SSteve Lawrence 
4846*efd4c9b6SSteve Lawrence 	g_stat_door = door_create(zsd_stat_server, NULL, DOOR_UNREF_MULTI |
4847*efd4c9b6SSteve Lawrence 	    DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
4848*efd4c9b6SSteve Lawrence 	if (g_stat_door < 0)
4849*efd4c9b6SSteve Lawrence 		zsd_error(gettext("Unable to create statistics door\n"));
4850*efd4c9b6SSteve Lawrence 
4851*efd4c9b6SSteve Lawrence 	fattach_all_zones(B_FALSE);
4852*efd4c9b6SSteve Lawrence 
4853*efd4c9b6SSteve Lawrence 	if (thr_create(NULL, 0, stat_thread, NULL, 0, &tid) != 0)
4854*efd4c9b6SSteve Lawrence 		zsd_error(gettext("Unable to create statistics thread\n"));
4855*efd4c9b6SSteve Lawrence 
4856*efd4c9b6SSteve Lawrence 	daemonize_ready(0);
4857*efd4c9b6SSteve Lawrence 
4858*efd4c9b6SSteve Lawrence 	/* Wait for signal to quit */
4859*efd4c9b6SSteve Lawrence 	while (g_quit == B_FALSE)
4860*efd4c9b6SSteve Lawrence 		(void) pause();
4861*efd4c9b6SSteve Lawrence 
4862*efd4c9b6SSteve Lawrence 	/* detach doors */
4863*efd4c9b6SSteve Lawrence 	fattach_all_zones(B_TRUE);
4864*efd4c9b6SSteve Lawrence 
4865*efd4c9b6SSteve Lawrence 	(void) door_revoke(g_server_door);
4866*efd4c9b6SSteve Lawrence 	(void) door_revoke(g_stat_door);
4867*efd4c9b6SSteve Lawrence 
4868*efd4c9b6SSteve Lawrence 	/* kick stat thread and wait for it to close the statistics */
4869*efd4c9b6SSteve Lawrence 	(void) mutex_lock(&g_usage_cache_lock);
4870*efd4c9b6SSteve Lawrence 	g_quit = B_TRUE;
4871*efd4c9b6SSteve Lawrence 	(void) cond_signal(&g_usage_cache_kick);
4872*efd4c9b6SSteve Lawrence 	(void) mutex_unlock(&g_usage_cache_lock);
4873*efd4c9b6SSteve Lawrence end:
4874*efd4c9b6SSteve Lawrence 	(void) thr_join(tid, NULL, NULL);
4875*efd4c9b6SSteve Lawrence 	return (0);
4876*efd4c9b6SSteve Lawrence }
4877