1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25 #include <alloca.h>
26 #include <assert.h>
27 #include <dirent.h>
28 #include <dlfcn.h>
29 #include <door.h>
30 #include <errno.h>
31 #include <exacct.h>
32 #include <ctype.h>
33 #include <fcntl.h>
34 #include <kstat.h>
35 #include <libcontract.h>
36 #include <libintl.h>
37 #include <libscf.h>
38 #include <zonestat.h>
39 #include <zonestat_impl.h>
40 #include <limits.h>
41 #include <pool.h>
42 #include <procfs.h>
43 #include <rctl.h>
44 #include <thread.h>
45 #include <signal.h>
46 #include <stdarg.h>
47 #include <stddef.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <strings.h>
51 #include <synch.h>
52 #include <sys/acctctl.h>
53 #include <sys/contract/process.h>
54 #include <sys/ctfs.h>
55 #include <sys/fork.h>
56 #include <sys/param.h>
57 #include <sys/priocntl.h>
58 #include <sys/fxpriocntl.h>
59 #include <sys/processor.h>
60 #include <sys/pset.h>
61 #include <sys/socket.h>
62 #include <sys/stat.h>
63 #include <sys/statvfs.h>
64 #include <sys/swap.h>
65 #include <sys/systeminfo.h>
66 #include <thread.h>
67 #include <sys/list.h>
68 #include <sys/time.h>
69 #include <sys/types.h>
70 #include <sys/vm_usage.h>
71 #include <sys/wait.h>
72 #include <sys/zone.h>
73 #include <time.h>
74 #include <ucred.h>
75 #include <unistd.h>
76 #include <vm/anon.h>
77 #include <zone.h>
78 #include <zonestat.h>
79
80 #define MAX_PSET_NAME 1024 /* Taken from PV_NAME_MAX_LEN */
81 #define ZSD_PSET_UNLIMITED UINT16_MAX
82 #define ZONESTAT_EXACCT_FILE "/var/adm/exacct/zonestat-process"
83
84 /*
85 * zonestatd implements gathering cpu and memory utilization data for
86 * running zones. It has these components:
87 *
88 * zsd_server:
89 * Door server to respond to client connections. Each client
90 * will connect using libzonestat.so, which will open and
91 * call /var/tmp/.zonestat_door. Each connecting client is given
92 * a file descriptor to the stat server.
93 *
94 * The zsd_server also responds to zoneadmd, which reports when a
95 * new zone is booted. This is used to fattach the zsd_server door
96 * into the new zone.
97 *
98 * zsd_stat_server:
99 * Receives client requests for the current utilization data. Each
100 * client request will cause zonestatd to update the current utilization
101 * data by kicking the stat_thread.
102 *
103 * If the client is in a non-global zone, the utilization data will
104 * be filtered to only show the given zone. The usage by all other zones
105 * will be added to the system utilization.
106 *
107 * stat_thread:
108 * The stat thread implements querying the system to determine the
109 * current utilization data for each running zone. This includes
110 * inspecting the system's processor set configuration, as well as details
111 * of each zone, such as their configured limits, and which processor
112 * sets they are running in.
113 *
114 * The stat_thread will only update memory utilization data as often as
115 * the configured config/sample_interval on the zones-monitoring service.
116 */
117
118 /*
119 * The private vmusage structure unfortunately uses size_t types, and assumes
120 * the caller's bitness matches the kernel's bitness. Since the getvmusage()
121 * system call is contracted, and zonestatd is 32 bit, the following structures
122 * are used to interact with a 32bit or 64 bit kernel.
123 */
124 typedef struct zsd_vmusage32 {
125 id_t vmu_zoneid;
126 uint_t vmu_type;
127 id_t vmu_id;
128
129 uint32_t vmu_rss_all;
130 uint32_t vmu_rss_private;
131 uint32_t vmu_rss_shared;
132 uint32_t vmu_swap_all;
133 uint32_t vmu_swap_private;
134 uint32_t vmu_swap_shared;
135 } zsd_vmusage32_t;
136
137 typedef struct zsd_vmusage64 {
138 id_t vmu_zoneid;
139 uint_t vmu_type;
140 id_t vmu_id;
141 /*
142 * An amd64 kernel will align the following uint64_t members, but a
143 * 32bit i386 process will not without help.
144 */
145 int vmu_align_next_members_on_8_bytes;
146 uint64_t vmu_rss_all;
147 uint64_t vmu_rss_private;
148 uint64_t vmu_rss_shared;
149 uint64_t vmu_swap_all;
150 uint64_t vmu_swap_private;
151 uint64_t vmu_swap_shared;
152 } zsd_vmusage64_t;
153
154 struct zsd_zone;
155
156 /* Used to store a zone's usage of a pset */
157 typedef struct zsd_pset_usage {
158 struct zsd_zone *zsu_zone;
159 struct zsd_pset *zsu_pset;
160
161 list_node_t zsu_next;
162
163 zoneid_t zsu_zoneid;
164 boolean_t zsu_found; /* zone bound at end of interval */
165 boolean_t zsu_active; /* zone was bound during interval */
166 boolean_t zsu_new; /* zone newly bound in this interval */
167 boolean_t zsu_deleted; /* zone was unbound in this interval */
168 boolean_t zsu_empty; /* no procs in pset in this interval */
169 time_t zsu_start; /* time when zone was found in pset */
170 hrtime_t zsu_hrstart; /* time when zone was found in pset */
171 uint64_t zsu_cpu_shares;
172 uint_t zsu_scheds; /* schedulers found in this pass */
173 timestruc_t zsu_cpu_usage; /* cpu time used */
174 } zsd_pset_usage_t;
175
176 /* Used to store a pset's utilization */
177 typedef struct zsd_pset {
178 psetid_t zsp_id;
179 list_node_t zsp_next;
180 char zsp_name[ZS_PSETNAME_MAX];
181
182 uint_t zsp_cputype; /* default, dedicated or shared */
183 boolean_t zsp_found; /* pset found at end of interval */
184 boolean_t zsp_new; /* pset new in this interval */
185 boolean_t zsp_deleted; /* pset deleted in this interval */
186 boolean_t zsp_active; /* pset existed during interval */
187 boolean_t zsp_empty; /* no processes in pset */
188 time_t zsp_start;
189 hrtime_t zsp_hrstart;
190
191 uint64_t zsp_online; /* online cpus in interval */
192 uint64_t zsp_size; /* size in this interval */
193 uint64_t zsp_min; /* configured min in this interval */
194 uint64_t zsp_max; /* configured max in this interval */
195 int64_t zsp_importance; /* configured max in this interval */
196
197 uint_t zsp_scheds; /* scheds of processes found in pset */
198 uint64_t zsp_cpu_shares; /* total shares in this interval */
199
200 timestruc_t zsp_total_time;
201 timestruc_t zsp_usage_kern;
202 timestruc_t zsp_usage_zones;
203
204 /* Individual zone usages of pset */
205 list_t zsp_usage_list;
206 int zsp_nusage;
207
208 /* Summed kstat values from individual cpus in pset */
209 timestruc_t zsp_idle;
210 timestruc_t zsp_intr;
211 timestruc_t zsp_kern;
212 timestruc_t zsp_user;
213
214 } zsd_pset_t;
215
216 /* Used to track an individual cpu's utilization as reported by kstats */
217 typedef struct zsd_cpu {
218 processorid_t zsc_id;
219 list_node_t zsc_next;
220 psetid_t zsc_psetid;
221 psetid_t zsc_psetid_prev;
222 zsd_pset_t *zsc_pset;
223
224 boolean_t zsc_found; /* cpu online in this interval */
225 boolean_t zsc_onlined; /* cpu onlined during this interval */
226 boolean_t zsc_offlined; /* cpu offlined during this interval */
227 boolean_t zsc_active; /* cpu online during this interval */
228 boolean_t zsc_allocated; /* True if cpu has ever been found */
229
230 /* kstats this interval */
231 uint64_t zsc_nsec_idle;
232 uint64_t zsc_nsec_intr;
233 uint64_t zsc_nsec_kern;
234 uint64_t zsc_nsec_user;
235
236 /* kstats in most recent interval */
237 uint64_t zsc_nsec_idle_prev;
238 uint64_t zsc_nsec_intr_prev;
239 uint64_t zsc_nsec_kern_prev;
240 uint64_t zsc_nsec_user_prev;
241
242 /* Total kstat increases since zonestatd started reading kstats */
243 timestruc_t zsc_idle;
244 timestruc_t zsc_intr;
245 timestruc_t zsc_kern;
246 timestruc_t zsc_user;
247
248 } zsd_cpu_t;
249
250 /* Used to describe an individual zone and its utilization */
251 typedef struct zsd_zone {
252 zoneid_t zsz_id;
253 list_node_t zsz_next;
254 char zsz_name[ZS_ZONENAME_MAX];
255 uint_t zsz_cputype;
256 uint_t zsz_iptype;
257 time_t zsz_start;
258 hrtime_t zsz_hrstart;
259
260 char zsz_pool[ZS_POOLNAME_MAX];
261 char zsz_pset[ZS_PSETNAME_MAX];
262 int zsz_default_sched;
263 /* These are deduced by inspecting processes */
264 psetid_t zsz_psetid;
265 uint_t zsz_scheds;
266
267 boolean_t zsz_new; /* zone booted during this interval */
268 boolean_t zsz_deleted; /* halted during this interval */
269 boolean_t zsz_active; /* running in this interval */
270 boolean_t zsz_empty; /* no processes in this interval */
271 boolean_t zsz_gone; /* not installed in this interval */
272 boolean_t zsz_found; /* Running at end of this interval */
273
274 uint64_t zsz_cpu_shares;
275 uint64_t zsz_cpu_cap;
276 uint64_t zsz_ram_cap;
277 uint64_t zsz_locked_cap;
278 uint64_t zsz_vm_cap;
279
280 uint64_t zsz_cpus_online;
281 timestruc_t zsz_cpu_usage; /* cpu time of cpu cap */
282 timestruc_t zsz_cap_time; /* cpu time of cpu cap */
283 timestruc_t zsz_share_time; /* cpu time of share of cpu */
284 timestruc_t zsz_pset_time; /* time of all psets zone is bound to */
285
286 uint64_t zsz_usage_ram;
287 uint64_t zsz_usage_locked;
288 uint64_t zsz_usage_vm;
289
290 uint64_t zsz_processes_cap;
291 uint64_t zsz_lwps_cap;
292 uint64_t zsz_shm_cap;
293 uint64_t zsz_shmids_cap;
294 uint64_t zsz_semids_cap;
295 uint64_t zsz_msgids_cap;
296 uint64_t zsz_lofi_cap;
297
298 uint64_t zsz_processes;
299 uint64_t zsz_lwps;
300 uint64_t zsz_shm;
301 uint64_t zsz_shmids;
302 uint64_t zsz_semids;
303 uint64_t zsz_msgids;
304 uint64_t zsz_lofi;
305
306 } zsd_zone_t;
307
308 /*
309 * Used to track the cpu usage of an individual processes.
310 *
311 * zonestatd sweeps /proc each interval and charges the cpu usage of processes.
312 * to their zone. As processes exit, their extended accounting records are
313 * read and the difference of their total and known usage is charged to their
314 * zone.
315 *
316 * If a process is never seen in /proc, the total usage on its extended
317 * accounting record will be charged to its zone.
318 */
319 typedef struct zsd_proc {
320 list_node_t zspr_next;
321 pid_t zspr_ppid;
322 psetid_t zspr_psetid;
323 zoneid_t zspr_zoneid;
324 int zspr_sched;
325 timestruc_t zspr_usage;
326 } zsd_proc_t;
327
328 /* Used to track the overall resource usage of the system */
329 typedef struct zsd_system {
330
331 uint64_t zss_ram_total;
332 uint64_t zss_ram_kern;
333 uint64_t zss_ram_zones;
334
335 uint64_t zss_locked_kern;
336 uint64_t zss_locked_zones;
337
338 uint64_t zss_vm_total;
339 uint64_t zss_vm_kern;
340 uint64_t zss_vm_zones;
341
342 uint64_t zss_swap_total;
343 uint64_t zss_swap_used;
344
345 timestruc_t zss_idle;
346 timestruc_t zss_intr;
347 timestruc_t zss_kern;
348 timestruc_t zss_user;
349
350 timestruc_t zss_cpu_total_time;
351 timestruc_t zss_cpu_usage_kern;
352 timestruc_t zss_cpu_usage_zones;
353
354 uint64_t zss_maxpid;
355 uint64_t zss_processes_max;
356 uint64_t zss_lwps_max;
357 uint64_t zss_shm_max;
358 uint64_t zss_shmids_max;
359 uint64_t zss_semids_max;
360 uint64_t zss_msgids_max;
361 uint64_t zss_lofi_max;
362
363 uint64_t zss_processes;
364 uint64_t zss_lwps;
365 uint64_t zss_shm;
366 uint64_t zss_shmids;
367 uint64_t zss_semids;
368 uint64_t zss_msgids;
369 uint64_t zss_lofi;
370
371 uint64_t zss_ncpus;
372 uint64_t zss_ncpus_online;
373
374 } zsd_system_t;
375
376 /*
377 * A dumping ground for various information and structures used to compute
378 * utilization.
379 *
380 * This structure is used to track the system while clients are connected.
381 * When The first client connects, a zsd_ctl is allocated and configured by
382 * zsd_open(). When all clients disconnect, the zsd_ctl is closed.
383 */
384 typedef struct zsd_ctl {
385 kstat_ctl_t *zsctl_kstat_ctl;
386
387 /* To track extended accounting */
388 int zsctl_proc_fd; /* Log currently being used */
389 ea_file_t zsctl_proc_eaf;
390 struct stat64 zsctl_proc_stat;
391 int zsctl_proc_open;
392 int zsctl_proc_fd_next; /* Log file to use next */
393 ea_file_t zsctl_proc_eaf_next;
394 struct stat64 zsctl_proc_stat_next;
395 int zsctl_proc_open_next;
396
397 /* pool configuration handle */
398 pool_conf_t *zsctl_pool_conf;
399 int zsctl_pool_status;
400 int zsctl_pool_changed;
401
402 /* The above usage tacking structures */
403 zsd_system_t *zsctl_system;
404 list_t zsctl_zones;
405 list_t zsctl_psets;
406 list_t zsctl_cpus;
407 zsd_cpu_t *zsctl_cpu_array;
408 zsd_proc_t *zsctl_proc_array;
409
410 /* Various system info */
411 uint64_t zsctl_maxcpuid;
412 uint64_t zsctl_maxproc;
413 uint64_t zsctl_kern_bits;
414 uint64_t zsctl_pagesize;
415
416 /* Used to track time available under a cpu cap. */
417 uint64_t zsctl_hrtime;
418 uint64_t zsctl_hrtime_prev;
419 timestruc_t zsctl_hrtime_total;
420
421 struct timeval zsctl_timeofday;
422
423 /* Caches for arrays allocated for use by various system calls */
424 psetid_t *zsctl_pset_cache;
425 uint_t zsctl_pset_ncache;
426 processorid_t *zsctl_cpu_cache;
427 uint_t zsctl_cpu_ncache;
428 zoneid_t *zsctl_zone_cache;
429 uint_t zsctl_zone_ncache;
430 struct swaptable *zsctl_swap_cache;
431 uint64_t zsctl_swap_cache_size;
432 uint64_t zsctl_swap_cache_num;
433 zsd_vmusage64_t *zsctl_vmusage_cache;
434 uint64_t zsctl_vmusage_cache_num;
435
436 /* Info about procfs for scanning /proc */
437 struct dirent *zsctl_procfs_dent;
438 long zsctl_procfs_dent_size;
439 pool_value_t *zsctl_pool_vals[3];
440
441 /* Counts on tracked entities */
442 uint_t zsctl_nzones;
443 uint_t zsctl_npsets;
444 uint_t zsctl_npset_usages;
445 } zsd_ctl_t;
446
447 zsd_ctl_t *g_ctl;
448 boolean_t g_open; /* True if g_ctl is open */
449 int g_hasclient; /* True if any clients are connected */
450
451 /*
452 * The usage cache is updated by the stat_thread, and copied to clients by
453 * the zsd_stat_server. Mutex and cond are to synchronize between the
454 * stat_thread and the stat_server.
455 */
456 zs_usage_cache_t *g_usage_cache;
457 mutex_t g_usage_cache_lock;
458 cond_t g_usage_cache_kick;
459 uint_t g_usage_cache_kickers;
460 cond_t g_usage_cache_wait;
461 char *g_usage_cache_buf;
462 uint_t g_usage_cache_bufsz;
463 uint64_t g_gen_next;
464
465 /* fds of door servers */
466 int g_server_door;
467 int g_stat_door;
468
469 /*
470 * Starting and current time. Used to throttle memory calculation, and to
471 * mark new zones and psets with their boot and creation time.
472 */
473 time_t g_now;
474 time_t g_start;
475 hrtime_t g_hrnow;
476 hrtime_t g_hrstart;
477 uint64_t g_interval;
478
479 /*
480 * main() thread.
481 */
482 thread_t g_main;
483
484 /* PRINTFLIKE1 */
485 static void
zsd_warn(const char * fmt,...)486 zsd_warn(const char *fmt, ...)
487 {
488 va_list alist;
489
490 va_start(alist, fmt);
491
492 (void) fprintf(stderr, gettext("zonestat: Warning: "));
493 (void) vfprintf(stderr, fmt, alist);
494 (void) fprintf(stderr, "\n");
495 va_end(alist);
496 }
497
498 /* PRINTFLIKE1 */
499 static void
zsd_error(const char * fmt,...)500 zsd_error(const char *fmt, ...)
501 {
502 va_list alist;
503
504 va_start(alist, fmt);
505
506 (void) fprintf(stderr, gettext("zonestat: Error: "));
507 (void) vfprintf(stderr, fmt, alist);
508 (void) fprintf(stderr, "\n");
509 va_end(alist);
510 exit(1);
511 }
512
513 /* Turns on extended accounting if not configured externally */
514 int
zsd_enable_cpu_stats()515 zsd_enable_cpu_stats()
516 {
517 char *path = ZONESTAT_EXACCT_FILE;
518 char oldfile[MAXPATHLEN];
519 int ret, state = AC_ON;
520 ac_res_t res[6];
521
522 /*
523 * Start a new accounting file if accounting not configured
524 * externally.
525 */
526
527 res[0].ar_id = AC_PROC_PID;
528 res[0].ar_state = AC_ON;
529 res[1].ar_id = AC_PROC_ANCPID;
530 res[1].ar_state = AC_ON;
531 res[2].ar_id = AC_PROC_CPU;
532 res[2].ar_state = AC_ON;
533 res[3].ar_id = AC_PROC_TIME;
534 res[3].ar_state = AC_ON;
535 res[4].ar_id = AC_PROC_ZONENAME;
536 res[4].ar_state = AC_ON;
537 res[5].ar_id = AC_NONE;
538 res[5].ar_state = AC_ON;
539 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
540 zsd_warn(gettext("Unable to set accounting resources"));
541 return (-1);
542 }
543 /* Only set accounting file if none is configured */
544 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
545 if (ret < 0) {
546
547 (void) unlink(path);
548 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1)
549 == -1) {
550 zsd_warn(gettext("Unable to set accounting file"));
551 return (-1);
552 }
553 }
554 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
555 zsd_warn(gettext("Unable to enable accounting"));
556 return (-1);
557 }
558 return (0);
559 }
560
561 /* Turns off extended accounting if not configured externally */
562 int
zsd_disable_cpu_stats()563 zsd_disable_cpu_stats()
564 {
565 char *path = ZONESTAT_EXACCT_FILE;
566 int ret, state = AC_OFF;
567 ac_res_t res[6];
568 char oldfile[MAXPATHLEN];
569
570 /* If accounting file is externally configured, leave it alone */
571 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
572 if (ret == 0 && strcmp(oldfile, path) != 0)
573 return (0);
574
575 res[0].ar_id = AC_PROC_PID;
576 res[0].ar_state = AC_OFF;
577 res[1].ar_id = AC_PROC_ANCPID;
578 res[1].ar_state = AC_OFF;
579 res[2].ar_id = AC_PROC_CPU;
580 res[2].ar_state = AC_OFF;
581 res[3].ar_id = AC_PROC_TIME;
582 res[3].ar_state = AC_OFF;
583 res[4].ar_id = AC_PROC_ZONENAME;
584 res[4].ar_state = AC_OFF;
585 res[5].ar_id = AC_NONE;
586 res[5].ar_state = AC_OFF;
587 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
588 zsd_warn(gettext("Unable to clear accounting resources"));
589 return (-1);
590 }
591 if (acctctl(AC_PROC | AC_FILE_SET, NULL, 0) == -1) {
592 zsd_warn(gettext("Unable to clear accounting file"));
593 return (-1);
594 }
595 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
596 zsd_warn(gettext("Unable to diable accounting"));
597 return (-1);
598 }
599
600 (void) unlink(path);
601 return (0);
602 }
603
604 /*
605 * If not configured externally, deletes the current extended accounting file
606 * and starts a new one.
607 *
608 * Since the stat_thread holds an open handle to the accounting file, it will
609 * read all remaining entries from the old file before switching to
610 * read the new one.
611 */
612 int
zsd_roll_exacct(void)613 zsd_roll_exacct(void)
614 {
615 int ret;
616 char *path = ZONESTAT_EXACCT_FILE;
617 char oldfile[MAXPATHLEN];
618
619 /* If accounting file is externally configured, leave it alone */
620 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
621 if (ret == 0 && strcmp(oldfile, path) != 0)
622 return (0);
623
624 if (unlink(path) != 0)
625 /* Roll it next time */
626 return (0);
627
628 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1) == -1) {
629 zsd_warn(gettext("Unable to set accounting file"));
630 return (-1);
631 }
632 return (0);
633 }
634
635 /* Contract stuff for zone_enter() */
636 int
init_template(void)637 init_template(void)
638 {
639 int fd;
640 int err = 0;
641
642 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
643 if (fd == -1)
644 return (-1);
645
646 /*
647 * For now, zoneadmd doesn't do anything with the contract.
648 * Deliver no events, don't inherit, and allow it to be orphaned.
649 */
650 err |= ct_tmpl_set_critical(fd, 0);
651 err |= ct_tmpl_set_informative(fd, 0);
652 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
653 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
654 if (err || ct_tmpl_activate(fd)) {
655 (void) close(fd);
656 return (-1);
657 }
658
659 return (fd);
660 }
661
662 /*
663 * Contract stuff for zone_enter()
664 */
665 int
contract_latest(ctid_t * id)666 contract_latest(ctid_t *id)
667 {
668 int cfd, r;
669 ct_stathdl_t st;
670 ctid_t result;
671
672 if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1)
673 return (errno);
674
675 if ((r = ct_status_read(cfd, CTD_COMMON, &st)) != 0) {
676 (void) close(cfd);
677 return (r);
678 }
679
680 result = ct_status_get_id(st);
681 ct_status_free(st);
682 (void) close(cfd);
683
684 *id = result;
685 return (0);
686 }
687
688 static int
close_on_exec(int fd)689 close_on_exec(int fd)
690 {
691 int flags = fcntl(fd, F_GETFD, 0);
692 if ((flags != -1) && (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != -1))
693 return (0);
694 return (-1);
695 }
696
697 int
contract_open(ctid_t ctid,const char * type,const char * file,int oflag)698 contract_open(ctid_t ctid, const char *type, const char *file, int oflag)
699 {
700 char path[PATH_MAX];
701 int n, fd;
702
703 if (type == NULL)
704 type = "all";
705
706 n = snprintf(path, PATH_MAX, CTFS_ROOT "/%s/%ld/%s", type, ctid, file);
707 if (n >= sizeof (path)) {
708 errno = ENAMETOOLONG;
709 return (-1);
710 }
711
712 fd = open64(path, oflag);
713 if (fd != -1) {
714 if (close_on_exec(fd) == -1) {
715 int err = errno;
716 (void) close(fd);
717 errno = err;
718 return (-1);
719 }
720 }
721 return (fd);
722 }
723
724 int
contract_abandon_id(ctid_t ctid)725 contract_abandon_id(ctid_t ctid)
726 {
727 int fd, err;
728
729 fd = contract_open(ctid, "all", "ctl", O_WRONLY);
730 if (fd == -1)
731 return (errno);
732
733 err = ct_ctl_abandon(fd);
734 (void) close(fd);
735
736 return (err);
737 }
738 /*
739 * Attach the zsd_server to a zone. Called for each zone when zonestatd
740 * starts, and for each newly booted zone when zoneadmd contacts the zsd_server
741 *
742 * Zone_enter is used to avoid reaching into zone to fattach door.
743 */
744 static void
zsd_fattach_zone(zoneid_t zid,int door,boolean_t detach_only)745 zsd_fattach_zone(zoneid_t zid, int door, boolean_t detach_only)
746 {
747 char *path = ZS_DOOR_PATH;
748 int fd, pid, stat, tmpl_fd;
749 ctid_t ct;
750
751 if ((tmpl_fd = init_template()) == -1) {
752 zsd_warn("Unable to init template");
753 return;
754 }
755
756 pid = forkx(0);
757 if (pid < 0) {
758 (void) ct_tmpl_clear(tmpl_fd);
759 zsd_warn(gettext(
760 "Unable to fork to add zonestat to zoneid %d\n"), zid);
761 return;
762 }
763
764 if (pid == 0) {
765 (void) ct_tmpl_clear(tmpl_fd);
766 (void) close(tmpl_fd);
767 if (zid != 0 && zone_enter(zid) != 0) {
768 if (errno == EINVAL) {
769 _exit(0);
770 }
771 _exit(1);
772 }
773 (void) fdetach(path);
774 (void) unlink(path);
775 if (detach_only)
776 _exit(0);
777 fd = open(path, O_CREAT|O_RDWR, 0644);
778 if (fd < 0)
779 _exit(2);
780 if (fattach(door, path) != 0)
781 _exit(3);
782 _exit(0);
783 }
784 if (contract_latest(&ct) == -1)
785 ct = -1;
786 (void) ct_tmpl_clear(tmpl_fd);
787 (void) close(tmpl_fd);
788 (void) contract_abandon_id(ct);
789 while (waitpid(pid, &stat, 0) != pid)
790 ;
791 if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0)
792 return;
793
794 zsd_warn(gettext("Unable to attach door to zoneid: %d"), zid);
795
796 if (WEXITSTATUS(stat) == 1)
797 zsd_warn(gettext("Cannot entering zone"));
798 else if (WEXITSTATUS(stat) == 2)
799 zsd_warn(gettext("Unable to create door file: %s"), path);
800 else if (WEXITSTATUS(stat) == 3)
801 zsd_warn(gettext("Unable to fattach file: %s"), path);
802
803 zsd_warn(gettext("Internal error entering zone: %d"), zid);
804 }
805
806 /*
807 * Zone lookup and allocation functions to manage list of currently running
808 * zones.
809 */
810 static zsd_zone_t *
zsd_lookup_zone(zsd_ctl_t * ctl,char * zonename,zoneid_t zoneid)811 zsd_lookup_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
812 {
813 zsd_zone_t *zone;
814
815 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
816 zone = list_next(&ctl->zsctl_zones, zone)) {
817 if (strcmp(zone->zsz_name, zonename) == 0) {
818 if (zoneid != -1)
819 zone->zsz_id = zoneid;
820 return (zone);
821 }
822 }
823 return (NULL);
824 }
825
826 static zsd_zone_t *
zsd_lookup_zone_byid(zsd_ctl_t * ctl,zoneid_t zoneid)827 zsd_lookup_zone_byid(zsd_ctl_t *ctl, zoneid_t zoneid)
828 {
829 zsd_zone_t *zone;
830
831 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
832 zone = list_next(&ctl->zsctl_zones, zone)) {
833 if (zone->zsz_id == zoneid)
834 return (zone);
835 }
836 return (NULL);
837 }
838
839 static zsd_zone_t *
zsd_allocate_zone(zsd_ctl_t * ctl,char * zonename,zoneid_t zoneid)840 zsd_allocate_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
841 {
842 zsd_zone_t *zone;
843
844 if ((zone = (zsd_zone_t *)calloc(1, sizeof (zsd_zone_t))) == NULL)
845 return (NULL);
846
847 (void) strlcpy(zone->zsz_name, zonename, sizeof (zone->zsz_name));
848 zone->zsz_id = zoneid;
849 zone->zsz_found = B_FALSE;
850
851 /*
852 * Allocate as deleted so if not found in first pass, zone is deleted
853 * from list. This can happen if zone is returned by zone_list, but
854 * exits before first attempt to fetch zone details.
855 */
856 zone->zsz_start = g_now;
857 zone->zsz_hrstart = g_hrnow;
858 zone->zsz_deleted = B_TRUE;
859
860 zone->zsz_cpu_shares = ZS_LIMIT_NONE;
861 zone->zsz_cpu_cap = ZS_LIMIT_NONE;
862 zone->zsz_ram_cap = ZS_LIMIT_NONE;
863 zone->zsz_locked_cap = ZS_LIMIT_NONE;
864 zone->zsz_vm_cap = ZS_LIMIT_NONE;
865
866 zone->zsz_processes_cap = ZS_LIMIT_NONE;
867 zone->zsz_lwps_cap = ZS_LIMIT_NONE;
868 zone->zsz_shm_cap = ZS_LIMIT_NONE;
869 zone->zsz_shmids_cap = ZS_LIMIT_NONE;
870 zone->zsz_semids_cap = ZS_LIMIT_NONE;
871 zone->zsz_msgids_cap = ZS_LIMIT_NONE;
872 zone->zsz_lofi_cap = ZS_LIMIT_NONE;
873
874 ctl->zsctl_nzones++;
875
876 return (zone);
877 }
878
879 static zsd_zone_t *
zsd_lookup_insert_zone(zsd_ctl_t * ctl,char * zonename,zoneid_t zoneid)880 zsd_lookup_insert_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
881 {
882 zsd_zone_t *zone, *tmp;
883
884 if ((zone = zsd_lookup_zone(ctl, zonename, zoneid)) != NULL)
885 return (zone);
886
887 if ((zone = zsd_allocate_zone(ctl, zonename, zoneid)) == NULL)
888 return (NULL);
889
890 /* Insert sorted by zonename */
891 tmp = list_head(&ctl->zsctl_zones);
892 while (tmp != NULL && strcmp(zonename, tmp->zsz_name) > 0)
893 tmp = list_next(&ctl->zsctl_zones, tmp);
894
895 list_insert_before(&ctl->zsctl_zones, tmp, zone);
896 return (zone);
897 }
898
899 /*
900 * Mark all zones as not existing. As zones are found, they will
901 * be marked as existing. If a zone is not found, then it must have
902 * halted.
903 */
904 static void
zsd_mark_zones_start(zsd_ctl_t * ctl)905 zsd_mark_zones_start(zsd_ctl_t *ctl)
906 {
907
908 zsd_zone_t *zone;
909
910 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
911 zone = list_next(&ctl->zsctl_zones, zone)) {
912 zone->zsz_found = B_FALSE;
913 }
914 }
915
916 /*
917 * Mark each zone as not using pset. If processes are found using the
918 * pset, the zone will remain bound to the pset. If none of a zones
919 * processes are bound to the pset, the zone's usage of the pset will
920 * be deleted.
921 *
922 */
923 static void
zsd_mark_pset_usage_start(zsd_pset_t * pset)924 zsd_mark_pset_usage_start(zsd_pset_t *pset)
925 {
926 zsd_pset_usage_t *usage;
927
928 for (usage = list_head(&pset->zsp_usage_list);
929 usage != NULL;
930 usage = list_next(&pset->zsp_usage_list, usage)) {
931 usage->zsu_found = B_FALSE;
932 usage->zsu_empty = B_TRUE;
933 }
934 }
935
936 /*
937 * Mark each pset as not existing. If a pset is found, it will be marked
938 * as existing. If a pset is not found, it wil be deleted.
939 */
940 static void
zsd_mark_psets_start(zsd_ctl_t * ctl)941 zsd_mark_psets_start(zsd_ctl_t *ctl)
942 {
943 zsd_pset_t *pset;
944
945 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
946 pset = list_next(&ctl->zsctl_psets, pset)) {
947 pset->zsp_found = B_FALSE;
948 zsd_mark_pset_usage_start(pset);
949 }
950 }
951
952 /*
953 * A pset was found. Update its information
954 */
955 static void
zsd_mark_pset_found(zsd_pset_t * pset,uint_t type,uint64_t online,uint64_t size,uint64_t min,uint64_t max,int64_t importance)956 zsd_mark_pset_found(zsd_pset_t *pset, uint_t type, uint64_t online,
957 uint64_t size, uint64_t min, uint64_t max, int64_t importance)
958 {
959 pset->zsp_empty = B_TRUE;
960 pset->zsp_deleted = B_FALSE;
961
962 assert(pset->zsp_found == B_FALSE);
963
964 /* update pset flags */
965 if (pset->zsp_active == B_FALSE)
966 /* pset not seen on previous interval. It is new. */
967 pset->zsp_new = B_TRUE;
968 else
969 pset->zsp_new = B_FALSE;
970
971 pset->zsp_found = B_TRUE;
972 pset->zsp_cputype = type;
973 pset->zsp_online = online;
974 pset->zsp_size = size;
975 pset->zsp_min = min;
976 pset->zsp_max = max;
977 pset->zsp_importance = importance;
978 pset->zsp_cpu_shares = 0;
979 pset->zsp_scheds = 0;
980 pset->zsp_active = B_TRUE;
981 }
982
983 /*
984 * A zone's process was found using a pset. Charge the process to the pset and
985 * the per-zone data for the pset.
986 */
987 static void
zsd_mark_pset_usage_found(zsd_pset_usage_t * usage,uint_t sched)988 zsd_mark_pset_usage_found(zsd_pset_usage_t *usage, uint_t sched)
989 {
990 zsd_zone_t *zone = usage->zsu_zone;
991 zsd_pset_t *pset = usage->zsu_pset;
992
993 /* Nothing to do if already found */
994 if (usage->zsu_found == B_TRUE)
995 goto add_stats;
996
997 usage->zsu_found = B_TRUE;
998 usage->zsu_empty = B_FALSE;
999
1000 usage->zsu_deleted = B_FALSE;
1001 /* update usage flags */
1002 if (usage->zsu_active == B_FALSE)
1003 usage->zsu_new = B_TRUE;
1004 else
1005 usage->zsu_new = B_FALSE;
1006
1007 usage->zsu_scheds = 0;
1008 usage->zsu_cpu_shares = ZS_LIMIT_NONE;
1009 usage->zsu_active = B_TRUE;
1010 pset->zsp_empty = B_FALSE;
1011 zone->zsz_empty = B_FALSE;
1012
1013 add_stats:
1014 /* Detect zone's pset id, and if it is bound to multiple psets */
1015 if (zone->zsz_psetid == ZS_PSET_ERROR)
1016 zone->zsz_psetid = pset->zsp_id;
1017 else if (zone->zsz_psetid != pset->zsp_id)
1018 zone->zsz_psetid = ZS_PSET_MULTI;
1019
1020 usage->zsu_scheds |= sched;
1021 pset->zsp_scheds |= sched;
1022 zone->zsz_scheds |= sched;
1023
1024 /* Record if FSS is co-habitating with conflicting scheduler */
1025 if ((pset->zsp_scheds & ZS_SCHED_FSS) &&
1026 usage->zsu_scheds & (
1027 ZS_SCHED_TS | ZS_SCHED_IA | ZS_SCHED_FX)) {
1028 usage->zsu_scheds |= ZS_SCHED_CONFLICT;
1029
1030 pset->zsp_scheds |= ZS_SCHED_CONFLICT;
1031 }
1032
1033 }
1034
1035 /* Add cpu time for a process to a pset, zone, and system totals */
1036 static void
zsd_add_usage(zsd_ctl_t * ctl,zsd_pset_usage_t * usage,timestruc_t * delta)1037 zsd_add_usage(zsd_ctl_t *ctl, zsd_pset_usage_t *usage, timestruc_t *delta)
1038 {
1039 zsd_system_t *system = ctl->zsctl_system;
1040 zsd_zone_t *zone = usage->zsu_zone;
1041 zsd_pset_t *pset = usage->zsu_pset;
1042
1043 TIMESTRUC_ADD_TIMESTRUC(usage->zsu_cpu_usage, *delta);
1044 TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_zones, *delta);
1045 TIMESTRUC_ADD_TIMESTRUC(zone->zsz_cpu_usage, *delta);
1046 TIMESTRUC_ADD_TIMESTRUC(system->zss_cpu_usage_zones, *delta);
1047 }
1048
1049 /* Determine which processor sets have been deleted */
1050 static void
zsd_mark_psets_end(zsd_ctl_t * ctl)1051 zsd_mark_psets_end(zsd_ctl_t *ctl)
1052 {
1053 zsd_pset_t *pset, *tmp;
1054
1055 /*
1056 * Mark pset as not exists, and deleted if it existed
1057 * previous interval.
1058 */
1059 pset = list_head(&ctl->zsctl_psets);
1060 while (pset != NULL) {
1061 if (pset->zsp_found == B_FALSE) {
1062 pset->zsp_empty = B_TRUE;
1063 if (pset->zsp_deleted == B_TRUE) {
1064 tmp = pset;
1065 pset = list_next(&ctl->zsctl_psets, pset);
1066 list_remove(&ctl->zsctl_psets, tmp);
1067 free(tmp);
1068 ctl->zsctl_npsets--;
1069 continue;
1070 } else {
1071 /* Pset vanished during this interval */
1072 pset->zsp_new = B_FALSE;
1073 pset->zsp_deleted = B_TRUE;
1074 pset->zsp_active = B_TRUE;
1075 }
1076 }
1077 pset = list_next(&ctl->zsctl_psets, pset);
1078 }
1079 }
1080
1081 /* Determine which zones are no longer bound to processor sets */
1082 static void
zsd_mark_pset_usages_end(zsd_ctl_t * ctl)1083 zsd_mark_pset_usages_end(zsd_ctl_t *ctl)
1084 {
1085 zsd_pset_t *pset;
1086 zsd_zone_t *zone;
1087 zsd_pset_usage_t *usage, *tmp;
1088
1089 /*
1090 * Mark pset as not exists, and deleted if it existed previous
1091 * interval.
1092 */
1093 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1094 pset = list_next(&ctl->zsctl_psets, pset)) {
1095 usage = list_head(&pset->zsp_usage_list);
1096 while (usage != NULL) {
1097 /*
1098 * Mark pset as not exists, and deleted if it existed
1099 * previous interval.
1100 */
1101 if (usage->zsu_found == B_FALSE ||
1102 usage->zsu_zone->zsz_deleted == B_TRUE ||
1103 usage->zsu_pset->zsp_deleted == B_TRUE) {
1104 tmp = usage;
1105 usage = list_next(&pset->zsp_usage_list,
1106 usage);
1107 list_remove(&pset->zsp_usage_list, tmp);
1108 free(tmp);
1109 pset->zsp_nusage--;
1110 ctl->zsctl_npset_usages--;
1111 continue;
1112 } else {
1113 usage->zsu_new = B_FALSE;
1114 usage->zsu_deleted = B_TRUE;
1115 usage->zsu_active = B_TRUE;
1116 }
1117 /* Add cpu shares for usages that are in FSS */
1118 zone = usage->zsu_zone;
1119 if (usage->zsu_scheds & ZS_SCHED_FSS &&
1120 zone->zsz_cpu_shares != ZS_SHARES_UNLIMITED &&
1121 zone->zsz_cpu_shares != 0) {
1122 zone = usage->zsu_zone;
1123 usage->zsu_cpu_shares = zone->zsz_cpu_shares;
1124 pset->zsp_cpu_shares += zone->zsz_cpu_shares;
1125 }
1126 usage = list_next(&pset->zsp_usage_list,
1127 usage);
1128 }
1129 }
1130 }
1131
1132 /* A zone has been found. Update its information */
1133 static void
zsd_mark_zone_found(zsd_ctl_t * ctl,zsd_zone_t * zone,uint64_t cpu_shares,uint64_t cpu_cap,uint64_t ram_cap,uint64_t locked_cap,uint64_t vm_cap,uint64_t processes_cap,uint64_t processes,uint64_t lwps_cap,uint64_t lwps,uint64_t shm_cap,uint64_t shm,uint64_t shmids_cap,uint64_t shmids,uint64_t semids_cap,uint64_t semids,uint64_t msgids_cap,uint64_t msgids,uint64_t lofi_cap,uint64_t lofi,char * poolname,char * psetname,uint_t sched,uint_t cputype,uint_t iptype)1134 zsd_mark_zone_found(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t cpu_shares,
1135 uint64_t cpu_cap, uint64_t ram_cap, uint64_t locked_cap,
1136 uint64_t vm_cap, uint64_t processes_cap, uint64_t processes,
1137 uint64_t lwps_cap, uint64_t lwps, uint64_t shm_cap, uint64_t shm,
1138 uint64_t shmids_cap, uint64_t shmids, uint64_t semids_cap,
1139 uint64_t semids, uint64_t msgids_cap, uint64_t msgids, uint64_t lofi_cap,
1140 uint64_t lofi, char *poolname, char *psetname, uint_t sched, uint_t cputype,
1141 uint_t iptype)
1142 {
1143 zsd_system_t *sys = ctl->zsctl_system;
1144
1145 assert(zone->zsz_found == B_FALSE);
1146
1147 /*
1148 * Mark zone as exists, and new if it did not exist in previous
1149 * interval.
1150 */
1151 zone->zsz_found = B_TRUE;
1152 zone->zsz_empty = B_TRUE;
1153 zone->zsz_deleted = B_FALSE;
1154
1155 /*
1156 * Zone is new. Assume zone's properties are the same over entire
1157 * interval.
1158 */
1159 if (zone->zsz_active == B_FALSE)
1160 zone->zsz_new = B_TRUE;
1161 else
1162 zone->zsz_new = B_FALSE;
1163
1164 (void) strlcpy(zone->zsz_pool, poolname, sizeof (zone->zsz_pool));
1165 (void) strlcpy(zone->zsz_pset, psetname, sizeof (zone->zsz_pset));
1166 zone->zsz_default_sched = sched;
1167
1168 /* Schedulers updated later as processes are found */
1169 zone->zsz_scheds = 0;
1170
1171 /* Cpus updated later as psets bound are identified */
1172 zone->zsz_cpus_online = 0;
1173
1174 zone->zsz_cputype = cputype;
1175 zone->zsz_iptype = iptype;
1176 zone->zsz_psetid = ZS_PSET_ERROR;
1177 zone->zsz_cpu_cap = cpu_cap;
1178 zone->zsz_cpu_shares = cpu_shares;
1179 zone->zsz_ram_cap = ram_cap;
1180 zone->zsz_locked_cap = locked_cap;
1181 zone->zsz_vm_cap = vm_cap;
1182 zone->zsz_processes_cap = processes_cap;
1183 zone->zsz_processes = processes;
1184 zone->zsz_lwps_cap = lwps_cap;
1185 zone->zsz_lwps = lwps;
1186 zone->zsz_shm_cap = shm_cap;
1187 zone->zsz_shm = shm;
1188 zone->zsz_shmids_cap = shmids_cap;
1189 zone->zsz_shmids = shmids;
1190 zone->zsz_semids_cap = semids_cap;
1191 zone->zsz_semids = semids;
1192 zone->zsz_msgids_cap = msgids_cap;
1193 zone->zsz_msgids = msgids;
1194 zone->zsz_lofi_cap = lofi_cap;
1195 zone->zsz_lofi = lofi;
1196
1197 sys->zss_processes += processes;
1198 sys->zss_lwps += lwps;
1199 sys->zss_shm += shm;
1200 sys->zss_shmids += shmids;
1201 sys->zss_semids += semids;
1202 sys->zss_msgids += msgids;
1203 sys->zss_lofi += lofi;
1204 zone->zsz_active = B_TRUE;
1205 }
1206
1207
1208 /* Determine which zones have halted */
1209 static void
zsd_mark_zones_end(zsd_ctl_t * ctl)1210 zsd_mark_zones_end(zsd_ctl_t *ctl)
1211 {
1212 zsd_zone_t *zone, *tmp;
1213
1214 /*
1215 * Mark zone as not existing, or delete if it did not exist in
1216 * previous interval.
1217 */
1218 zone = list_head(&ctl->zsctl_zones);
1219 while (zone != NULL) {
1220 if (zone->zsz_found == B_FALSE) {
1221 zone->zsz_empty = B_TRUE;
1222 if (zone->zsz_deleted == B_TRUE) {
1223 /*
1224 * Zone deleted in prior interval,
1225 * so it no longer exists.
1226 */
1227 tmp = zone;
1228 zone = list_next(&ctl->zsctl_zones, zone);
1229 list_remove(&ctl->zsctl_zones, tmp);
1230 free(tmp);
1231 ctl->zsctl_nzones--;
1232 continue;
1233 } else {
1234 zone->zsz_new = B_FALSE;
1235 zone->zsz_deleted = B_TRUE;
1236 zone->zsz_active = B_TRUE;
1237 }
1238 }
1239 zone = list_next(&ctl->zsctl_zones, zone);
1240 }
1241 }
1242
1243 /*
1244 * Mark cpus as not existing. If a cpu is found, it will be updated. If
1245 * a cpu is not found, then it must have gone offline, so it will be
1246 * deleted.
1247 *
1248 * The kstat tracking data is rolled so that the usage since the previous
1249 * interval can be determined.
1250 */
1251 static void
zsd_mark_cpus_start(zsd_ctl_t * ctl,boolean_t roll)1252 zsd_mark_cpus_start(zsd_ctl_t *ctl, boolean_t roll)
1253 {
1254 zsd_cpu_t *cpu;
1255
1256 /*
1257 * Mark all cpus as not existing. As cpus are found, they will
1258 * be marked as existing.
1259 */
1260 for (cpu = list_head(&ctl->zsctl_cpus); cpu != NULL;
1261 cpu = list_next(&ctl->zsctl_cpus, cpu)) {
1262 cpu->zsc_found = B_FALSE;
1263 if (cpu->zsc_active == B_TRUE && roll) {
1264 cpu->zsc_psetid_prev = cpu->zsc_psetid;
1265 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle;
1266 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr;
1267 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern;
1268 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user;
1269 }
1270 }
1271 }
1272
1273 /*
1274 * An array the size of the maximum number of cpus is kept. Within this array
1275 * a list of the online cpus is maintained.
1276 */
1277 zsd_cpu_t *
zsd_lookup_insert_cpu(zsd_ctl_t * ctl,processorid_t cpuid)1278 zsd_lookup_insert_cpu(zsd_ctl_t *ctl, processorid_t cpuid)
1279 {
1280 zsd_cpu_t *cpu;
1281
1282 assert(cpuid < ctl->zsctl_maxcpuid);
1283 cpu = &(ctl->zsctl_cpu_array[cpuid]);
1284 assert(cpuid == cpu->zsc_id);
1285
1286 if (cpu->zsc_allocated == B_FALSE) {
1287 cpu->zsc_allocated = B_TRUE;
1288 list_insert_tail(&ctl->zsctl_cpus, cpu);
1289 }
1290 return (cpu);
1291 }
1292
1293 /* A cpu has been found. Update its information */
1294 static void
zsd_mark_cpu_found(zsd_cpu_t * cpu,zsd_pset_t * pset,psetid_t psetid)1295 zsd_mark_cpu_found(zsd_cpu_t *cpu, zsd_pset_t *pset, psetid_t psetid)
1296 {
1297 /*
1298 * legacy processor sets, the cpu may move while zonestatd is
1299 * inspecting, causing it to be found twice. In this case, just
1300 * leave cpu in the first processor set in which it was found.
1301 */
1302 if (cpu->zsc_found == B_TRUE)
1303 return;
1304
1305 /* Mark cpu as online */
1306 cpu->zsc_found = B_TRUE;
1307 cpu->zsc_offlined = B_FALSE;
1308 cpu->zsc_pset = pset;
1309 /*
1310 * cpu is newly online.
1311 */
1312 if (cpu->zsc_active == B_FALSE) {
1313 /*
1314 * Cpu is newly online.
1315 */
1316 cpu->zsc_onlined = B_TRUE;
1317 cpu->zsc_psetid = psetid;
1318 cpu->zsc_psetid_prev = psetid;
1319 } else {
1320 /*
1321 * cpu online during previous interval. Save properties at
1322 * start of interval
1323 */
1324 cpu->zsc_onlined = B_FALSE;
1325 cpu->zsc_psetid = psetid;
1326
1327 }
1328 cpu->zsc_active = B_TRUE;
1329 }
1330
1331 /* Remove all offlined cpus from the list of tracked cpus */
1332 static void
zsd_mark_cpus_end(zsd_ctl_t * ctl)1333 zsd_mark_cpus_end(zsd_ctl_t *ctl)
1334 {
1335 zsd_cpu_t *cpu, *tmp;
1336 int id;
1337
1338 /* Mark cpu as online or offline */
1339 cpu = list_head(&ctl->zsctl_cpus);
1340 while (cpu != NULL) {
1341 if (cpu->zsc_found == B_FALSE) {
1342 if (cpu->zsc_offlined == B_TRUE) {
1343 /*
1344 * cpu offlined in prior interval. It is gone.
1345 */
1346 tmp = cpu;
1347 cpu = list_next(&ctl->zsctl_cpus, cpu);
1348 list_remove(&ctl->zsctl_cpus, tmp);
1349 /* Clear structure for future use */
1350 id = tmp->zsc_id;
1351 bzero(tmp, sizeof (zsd_cpu_t));
1352 tmp->zsc_id = id;
1353 tmp->zsc_allocated = B_FALSE;
1354 tmp->zsc_psetid = ZS_PSET_ERROR;
1355 tmp->zsc_psetid_prev = ZS_PSET_ERROR;
1356
1357 } else {
1358 /*
1359 * cpu online at start of interval. Treat
1360 * as still online, since it was online for
1361 * some portion of the interval.
1362 */
1363 cpu->zsc_offlined = B_TRUE;
1364 cpu->zsc_onlined = B_FALSE;
1365 cpu->zsc_active = B_TRUE;
1366 cpu->zsc_psetid = cpu->zsc_psetid_prev;
1367 cpu->zsc_pset = NULL;
1368 }
1369 }
1370 cpu = list_next(&ctl->zsctl_cpus, cpu);
1371 }
1372 }
1373
1374 /* Some utility functions for managing the list of processor sets */
1375 static zsd_pset_t *
zsd_lookup_pset_byid(zsd_ctl_t * ctl,psetid_t psetid)1376 zsd_lookup_pset_byid(zsd_ctl_t *ctl, psetid_t psetid)
1377 {
1378 zsd_pset_t *pset;
1379
1380 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1381 pset = list_next(&ctl->zsctl_psets, pset)) {
1382 if (pset->zsp_id == psetid)
1383 return (pset);
1384 }
1385 return (NULL);
1386 }
1387
1388 static zsd_pset_t *
zsd_lookup_pset(zsd_ctl_t * ctl,char * psetname,psetid_t psetid)1389 zsd_lookup_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1390 {
1391 zsd_pset_t *pset;
1392
1393 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1394 pset = list_next(&ctl->zsctl_psets, pset)) {
1395 if (strcmp(pset->zsp_name, psetname) == 0) {
1396 if (psetid != -1)
1397 pset->zsp_id = psetid;
1398 return (pset);
1399 }
1400 }
1401 return (NULL);
1402 }
1403
1404 static zsd_pset_t *
zsd_allocate_pset(zsd_ctl_t * ctl,char * psetname,psetid_t psetid)1405 zsd_allocate_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1406 {
1407 zsd_pset_t *pset;
1408
1409 if ((pset = (zsd_pset_t *)calloc(1, sizeof (zsd_pset_t))) == NULL)
1410 return (NULL);
1411
1412 (void) strlcpy(pset->zsp_name, psetname, sizeof (pset->zsp_name));
1413 pset->zsp_id = psetid;
1414 pset->zsp_found = B_FALSE;
1415 /*
1416 * Allocate as deleted so if not found in first pass, pset is deleted
1417 * from list. This can happen if pset is returned by pset_list, but
1418 * is destroyed before first attempt to fetch pset details.
1419 */
1420 list_create(&pset->zsp_usage_list, sizeof (zsd_pset_usage_t),
1421 offsetof(zsd_pset_usage_t, zsu_next));
1422
1423 pset->zsp_hrstart = g_hrnow;
1424 pset->zsp_deleted = B_TRUE;
1425 pset->zsp_empty = B_TRUE;
1426 ctl->zsctl_npsets++;
1427
1428 return (pset);
1429 }
1430
1431 static zsd_pset_t *
zsd_lookup_insert_pset(zsd_ctl_t * ctl,char * psetname,psetid_t psetid)1432 zsd_lookup_insert_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1433 {
1434 zsd_pset_t *pset, *tmp;
1435
1436 if ((pset = zsd_lookup_pset(ctl, psetname, psetid)) != NULL)
1437 return (pset);
1438
1439 if ((pset = zsd_allocate_pset(ctl, psetname, psetid)) == NULL)
1440 return (NULL);
1441
1442 /* Insert sorted by psetname */
1443 tmp = list_head(&ctl->zsctl_psets);
1444 while (tmp != NULL && strcmp(psetname, tmp->zsp_name) > 0)
1445 tmp = list_next(&ctl->zsctl_psets, tmp);
1446
1447 list_insert_before(&ctl->zsctl_psets, tmp, pset);
1448 return (pset);
1449 }
1450
1451 /* Some utility functions for managing the list of zones using each pset */
1452 static zsd_pset_usage_t *
zsd_lookup_usage(zsd_pset_t * pset,zsd_zone_t * zone)1453 zsd_lookup_usage(zsd_pset_t *pset, zsd_zone_t *zone)
1454 {
1455 zsd_pset_usage_t *usage;
1456
1457 for (usage = list_head(&pset->zsp_usage_list); usage != NULL;
1458 usage = list_next(&pset->zsp_usage_list, usage))
1459 if (usage->zsu_zone == zone)
1460 return (usage);
1461
1462 return (NULL);
1463 }
1464
1465 static zsd_pset_usage_t *
zsd_allocate_pset_usage(zsd_ctl_t * ctl,zsd_pset_t * pset,zsd_zone_t * zone)1466 zsd_allocate_pset_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone)
1467 {
1468 zsd_pset_usage_t *usage;
1469
1470 if ((usage = (zsd_pset_usage_t *)calloc(1, sizeof (zsd_pset_usage_t)))
1471 == NULL)
1472 return (NULL);
1473
1474 list_link_init(&usage->zsu_next);
1475 usage->zsu_zone = zone;
1476 usage->zsu_zoneid = zone->zsz_id;
1477 usage->zsu_pset = pset;
1478 usage->zsu_found = B_FALSE;
1479 usage->zsu_active = B_FALSE;
1480 usage->zsu_new = B_FALSE;
1481 /*
1482 * Allocate as not deleted. If a process is found in a pset for
1483 * a zone, the usage will not be deleted until at least the next
1484 * interval.
1485 */
1486 usage->zsu_start = g_now;
1487 usage->zsu_hrstart = g_hrnow;
1488 usage->zsu_deleted = B_FALSE;
1489 usage->zsu_empty = B_TRUE;
1490 usage->zsu_scheds = 0;
1491 usage->zsu_cpu_shares = ZS_LIMIT_NONE;
1492
1493 ctl->zsctl_npset_usages++;
1494 pset->zsp_nusage++;
1495
1496 return (usage);
1497 }
1498
1499 static zsd_pset_usage_t *
zsd_lookup_insert_usage(zsd_ctl_t * ctl,zsd_pset_t * pset,zsd_zone_t * zone)1500 zsd_lookup_insert_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone)
1501 {
1502 zsd_pset_usage_t *usage, *tmp;
1503
1504 if ((usage = zsd_lookup_usage(pset, zone))
1505 != NULL)
1506 return (usage);
1507
1508 if ((usage = zsd_allocate_pset_usage(ctl, pset, zone)) == NULL)
1509 return (NULL);
1510
1511 tmp = list_head(&pset->zsp_usage_list);
1512 while (tmp != NULL && strcmp(zone->zsz_name, tmp->zsu_zone->zsz_name)
1513 > 0)
1514 tmp = list_next(&pset->zsp_usage_list, tmp);
1515
1516 list_insert_before(&pset->zsp_usage_list, tmp, usage);
1517 return (usage);
1518 }
1519
1520 static void
zsd_refresh_system(zsd_ctl_t * ctl)1521 zsd_refresh_system(zsd_ctl_t *ctl)
1522 {
1523 zsd_system_t *system = ctl->zsctl_system;
1524
1525 /* Re-count these values each interval */
1526 system->zss_processes = 0;
1527 system->zss_lwps = 0;
1528 system->zss_shm = 0;
1529 system->zss_shmids = 0;
1530 system->zss_semids = 0;
1531 system->zss_msgids = 0;
1532 system->zss_lofi = 0;
1533 }
1534
1535
1536 /* Reads each cpu's kstats, and adds the usage to the cpu's pset */
1537 static void
zsd_update_cpu_stats(zsd_ctl_t * ctl,zsd_cpu_t * cpu)1538 zsd_update_cpu_stats(zsd_ctl_t *ctl, zsd_cpu_t *cpu)
1539 {
1540 zsd_system_t *sys;
1541 processorid_t cpuid;
1542 zsd_pset_t *pset_prev;
1543 zsd_pset_t *pset;
1544 kstat_t *kstat;
1545 kstat_named_t *knp;
1546 kid_t kid;
1547 uint64_t idle, intr, kern, user;
1548
1549 sys = ctl->zsctl_system;
1550 pset = cpu->zsc_pset;
1551 knp = NULL;
1552 kid = -1;
1553 cpuid = cpu->zsc_id;
1554
1555 /* Get the cpu time totals for this cpu */
1556 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "cpu", cpuid, "sys");
1557 if (kstat == NULL)
1558 return;
1559
1560 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
1561 if (kid == -1)
1562 return;
1563
1564 knp = kstat_data_lookup(kstat, "cpu_nsec_idle");
1565 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1566 return;
1567
1568 idle = knp->value.ui64;
1569
1570 knp = kstat_data_lookup(kstat, "cpu_nsec_kernel");
1571 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1572 return;
1573
1574 kern = knp->value.ui64;
1575
1576 knp = kstat_data_lookup(kstat, "cpu_nsec_user");
1577 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1578 return;
1579
1580 user = knp->value.ui64;
1581
1582 /*
1583 * Tracking intr time per cpu just exists for future enhancements.
1584 * The value is presently always zero.
1585 */
1586 intr = 0;
1587 cpu->zsc_nsec_idle = idle;
1588 cpu->zsc_nsec_intr = intr;
1589 cpu->zsc_nsec_kern = kern;
1590 cpu->zsc_nsec_user = user;
1591
1592 if (cpu->zsc_onlined == B_TRUE) {
1593 /*
1594 * cpu is newly online. There is no reference value,
1595 * so just record its current stats for comparison
1596 * on next stat read.
1597 */
1598 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle;
1599 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr;
1600 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern;
1601 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user;
1602 return;
1603 }
1604
1605 /*
1606 * Calculate relative time since previous refresh.
1607 * Paranoia. Don't let time go backwards.
1608 */
1609 idle = intr = kern = user = 0;
1610 if (cpu->zsc_nsec_idle > cpu->zsc_nsec_idle_prev)
1611 idle = cpu->zsc_nsec_idle - cpu->zsc_nsec_idle_prev;
1612
1613 if (cpu->zsc_nsec_intr > cpu->zsc_nsec_intr_prev)
1614 intr = cpu->zsc_nsec_intr - cpu->zsc_nsec_intr_prev;
1615
1616 if (cpu->zsc_nsec_kern > cpu->zsc_nsec_kern_prev)
1617 kern = cpu->zsc_nsec_kern - cpu->zsc_nsec_kern_prev;
1618
1619 if (cpu->zsc_nsec_user > cpu->zsc_nsec_user_prev)
1620 user = cpu->zsc_nsec_user - cpu->zsc_nsec_user_prev;
1621
1622 /* Update totals for cpu usage */
1623 TIMESTRUC_ADD_NANOSEC(cpu->zsc_idle, idle);
1624 TIMESTRUC_ADD_NANOSEC(cpu->zsc_intr, intr);
1625 TIMESTRUC_ADD_NANOSEC(cpu->zsc_kern, kern);
1626 TIMESTRUC_ADD_NANOSEC(cpu->zsc_user, user);
1627
1628 /*
1629 * Add cpu's stats to its pset if it is known to be in
1630 * the pset since previous read.
1631 */
1632 if (cpu->zsc_psetid == cpu->zsc_psetid_prev ||
1633 cpu->zsc_psetid_prev == ZS_PSET_ERROR ||
1634 (pset_prev = zsd_lookup_pset_byid(ctl,
1635 cpu->zsc_psetid_prev)) == NULL) {
1636 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle, idle);
1637 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr, intr);
1638 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern, kern);
1639 TIMESTRUC_ADD_NANOSEC(pset->zsp_user, user);
1640 } else {
1641 /*
1642 * Last pset was different than current pset.
1643 * Best guess is to split usage between the two.
1644 */
1645 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_idle, idle / 2);
1646 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_intr, intr / 2);
1647 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_kern, kern / 2);
1648 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_user, user / 2);
1649
1650 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle,
1651 (idle / 2) + (idle % 2));
1652 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr,
1653 (intr / 2) + (intr % 2));
1654 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern,
1655 (kern / 2) + (kern % 2));
1656 TIMESTRUC_ADD_NANOSEC(pset->zsp_user,
1657 (user / 2) + (user % 2));
1658 }
1659 TIMESTRUC_ADD_NANOSEC(sys->zss_idle, idle);
1660 TIMESTRUC_ADD_NANOSEC(sys->zss_intr, intr);
1661 TIMESTRUC_ADD_NANOSEC(sys->zss_kern, kern);
1662 TIMESTRUC_ADD_NANOSEC(sys->zss_user, user);
1663 }
1664
1665 /* Determine the details of a processor set by pset_id */
1666 static int
zsd_get_pool_pset(zsd_ctl_t * ctl,psetid_t psetid,char * psetname,size_t namelen,uint_t * cputype,uint64_t * online,uint64_t * size,uint64_t * min,uint64_t * max,int64_t * importance)1667 zsd_get_pool_pset(zsd_ctl_t *ctl, psetid_t psetid, char *psetname,
1668 size_t namelen, uint_t *cputype, uint64_t *online, uint64_t *size,
1669 uint64_t *min, uint64_t *max, int64_t *importance)
1670 {
1671 uint_t old, num;
1672
1673 pool_conf_t *conf = ctl->zsctl_pool_conf;
1674 pool_value_t **vals = ctl->zsctl_pool_vals;
1675 pool_resource_t **res_list = NULL;
1676 pool_resource_t *pset;
1677 pool_component_t **cpus = NULL;
1678 processorid_t *cache;
1679 const char *string;
1680 uint64_t uint64;
1681 int64_t int64;
1682 int i, ret, type;
1683
1684 if (ctl->zsctl_pool_status == POOL_DISABLED) {
1685
1686 /*
1687 * Inspect legacy psets
1688 */
1689 for (;;) {
1690 old = num = ctl->zsctl_cpu_ncache;
1691 ret = pset_info(psetid, &type, &num,
1692 ctl->zsctl_cpu_cache);
1693 if (ret < 0) {
1694 /* pset is gone. Tell caller to retry */
1695 errno = EINTR;
1696 return (-1);
1697 }
1698 if (num <= old) {
1699 /* Success */
1700 break;
1701 }
1702 if ((cache = (processorid_t *)realloc(
1703 ctl->zsctl_cpu_cache, num *
1704 sizeof (processorid_t))) != NULL) {
1705 ctl->zsctl_cpu_ncache = num;
1706 ctl->zsctl_cpu_cache = cache;
1707 } else {
1708 /*
1709 * Could not allocate to get new cpu list.
1710 */
1711 zsd_warn(gettext(
1712 "Could not allocate for cpu list"));
1713 errno = ENOMEM;
1714 return (-1);
1715 }
1716 }
1717 /*
1718 * Old school pset. Just make min and max equal
1719 * to its size
1720 */
1721 if (psetid == ZS_PSET_DEFAULT) {
1722 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
1723 (void) strlcpy(psetname, "pset_default", namelen);
1724 } else {
1725 *cputype = ZS_CPUTYPE_PSRSET_PSET;
1726 (void) snprintf(psetname, namelen,
1727 "SUNWlegacy_pset_%d", psetid);
1728 }
1729
1730 /*
1731 * Just treat legacy pset as a simple pool pset
1732 */
1733 *online = num;
1734 *size = num;
1735 *min = num;
1736 *max = num;
1737 *importance = 1;
1738
1739 return (0);
1740 }
1741
1742 /* Look up the pool pset using the pset id */
1743 res_list = NULL;
1744 pool_value_set_int64(vals[1], psetid);
1745 if (pool_value_set_name(vals[1], "pset.sys_id")
1746 != PO_SUCCESS)
1747 goto err;
1748
1749 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
1750 goto err;
1751 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
1752 goto err;
1753 if ((res_list = pool_query_resources(conf, &num, vals)) == NULL)
1754 goto err;
1755 if (num != 1)
1756 goto err;
1757 pset = res_list[0];
1758 free(res_list);
1759 res_list = NULL;
1760 if (pool_get_property(conf, pool_resource_to_elem(conf, pset),
1761 "pset.name", vals[0]) != POC_STRING ||
1762 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
1763 goto err;
1764
1765 (void) strlcpy(psetname, string, namelen);
1766 if (strncmp(psetname, "SUNWtmp", strlen("SUNWtmp")) == 0)
1767 *cputype = ZS_CPUTYPE_DEDICATED;
1768 else if (psetid == ZS_PSET_DEFAULT)
1769 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
1770 else
1771 *cputype = ZS_CPUTYPE_POOL_PSET;
1772
1773 /* Get size, min, max, and importance */
1774 if (pool_get_property(conf, pool_resource_to_elem(conf,
1775 pset), "pset.size", vals[0]) == POC_UINT &&
1776 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1777 *size = uint64;
1778 else
1779 *size = 0;
1780
1781 /* Get size, min, max, and importance */
1782 if (pool_get_property(conf, pool_resource_to_elem(conf,
1783 pset), "pset.min", vals[0]) == POC_UINT &&
1784 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1785 *min = uint64;
1786 else
1787 *min = 0;
1788 if (*min >= ZSD_PSET_UNLIMITED)
1789 *min = ZS_LIMIT_NONE;
1790
1791 if (pool_get_property(conf, pool_resource_to_elem(conf,
1792 pset), "pset.max", vals[0]) == POC_UINT &&
1793 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1794 *max = uint64;
1795 else
1796 *max = ZS_LIMIT_NONE;
1797
1798 if (*max >= ZSD_PSET_UNLIMITED)
1799 *max = ZS_LIMIT_NONE;
1800
1801 if (pool_get_property(conf, pool_resource_to_elem(conf,
1802 pset), "pset.importance", vals[0]) == POC_INT &&
1803 pool_value_get_int64(vals[0], &int64) == PO_SUCCESS)
1804 *importance = int64;
1805 else
1806 *importance = (uint64_t)1;
1807
1808 *online = 0;
1809 if (*size == 0)
1810 return (0);
1811
1812 /* get cpus */
1813 cpus = pool_query_resource_components(conf, pset, &num, NULL);
1814 if (cpus == NULL)
1815 goto err;
1816
1817 /* Make sure there is space for cpu id list */
1818 if (num > ctl->zsctl_cpu_ncache) {
1819 if ((cache = (processorid_t *)realloc(
1820 ctl->zsctl_cpu_cache, num *
1821 sizeof (processorid_t))) != NULL) {
1822 ctl->zsctl_cpu_ncache = num;
1823 ctl->zsctl_cpu_cache = cache;
1824 } else {
1825 /*
1826 * Could not allocate to get new cpu list.
1827 */
1828 zsd_warn(gettext(
1829 "Could not allocate for cpu list"));
1830 goto err;
1831 }
1832 }
1833
1834 /* count the online cpus */
1835 for (i = 0; i < num; i++) {
1836 if (pool_get_property(conf, pool_component_to_elem(
1837 conf, cpus[i]), "cpu.status", vals[0]) != POC_STRING ||
1838 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
1839 goto err;
1840
1841 if (strcmp(string, "on-line") != 0 &&
1842 strcmp(string, "no-intr") != 0)
1843 continue;
1844
1845 if (pool_get_property(conf, pool_component_to_elem(
1846 conf, cpus[i]), "cpu.sys_id", vals[0]) != POC_INT ||
1847 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS)
1848 goto err;
1849
1850 (*online)++;
1851 ctl->zsctl_cpu_cache[i] = (psetid_t)int64;
1852 }
1853 free(cpus);
1854 return (0);
1855 err:
1856 if (res_list != NULL)
1857 free(res_list);
1858 if (cpus != NULL)
1859 free(cpus);
1860
1861 /*
1862 * The pools operations should succeed since the conf is a consistent
1863 * snapshot. Tell caller there is no need to retry.
1864 */
1865 errno = EINVAL;
1866 return (-1);
1867 }
1868
1869 /*
1870 * Update the current list of processor sets.
1871 * This also updates the list of online cpus, and each cpu's pset membership.
1872 */
1873 static void
zsd_refresh_psets(zsd_ctl_t * ctl)1874 zsd_refresh_psets(zsd_ctl_t *ctl)
1875 {
1876 int i, j, ret, state;
1877 uint_t old, num;
1878 uint_t cputype;
1879 int64_t sys_id, importance;
1880 uint64_t online, size, min, max;
1881 zsd_system_t *system;
1882 zsd_pset_t *pset;
1883 zsd_cpu_t *cpu;
1884 psetid_t *cache;
1885 char psetname[ZS_PSETNAME_MAX];
1886 processorid_t cpuid;
1887 pool_value_t *pv_save = NULL;
1888 pool_resource_t **res_list = NULL;
1889 pool_resource_t *res;
1890 pool_value_t **vals;
1891 pool_conf_t *conf;
1892 boolean_t roll_cpus = B_TRUE;
1893
1894 /* Zero cpu counters to recount them */
1895 system = ctl->zsctl_system;
1896 system->zss_ncpus = 0;
1897 system->zss_ncpus_online = 0;
1898 retry:
1899 ret = pool_get_status(&state);
1900 if (ret == 0 && state == POOL_ENABLED) {
1901
1902 conf = ctl->zsctl_pool_conf;
1903 vals = ctl->zsctl_pool_vals;
1904 pv_save = vals[1];
1905 vals[1] = NULL;
1906
1907 if (ctl->zsctl_pool_status == POOL_DISABLED) {
1908 if (pool_conf_open(ctl->zsctl_pool_conf,
1909 pool_dynamic_location(), PO_RDONLY) == 0) {
1910 ctl->zsctl_pool_status = POOL_ENABLED;
1911 ctl->zsctl_pool_changed = POU_PSET;
1912 }
1913 } else {
1914 ctl->zsctl_pool_changed = 0;
1915 ret = pool_conf_update(ctl->zsctl_pool_conf,
1916 &(ctl->zsctl_pool_changed));
1917 if (ret < 0) {
1918 /* Pools must have become disabled */
1919 (void) pool_conf_close(ctl->zsctl_pool_conf);
1920 ctl->zsctl_pool_status = POOL_DISABLED;
1921 if (pool_error() == POE_SYSTEM && errno ==
1922 ENOTACTIVE)
1923 goto retry;
1924
1925 zsd_warn(gettext(
1926 "Unable to update pool configuration"));
1927 /* Not able to get pool info. Don't update. */
1928 goto err;
1929 }
1930 }
1931 /* Get the list of psets using libpool */
1932 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
1933 goto err;
1934
1935 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
1936 goto err;
1937 if ((res_list = pool_query_resources(conf, &num, vals))
1938 == NULL)
1939 goto err;
1940
1941 if (num > ctl->zsctl_pset_ncache) {
1942 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache,
1943 (num) * sizeof (psetid_t))) == NULL) {
1944 goto err;
1945 }
1946 ctl->zsctl_pset_ncache = num;
1947 ctl->zsctl_pset_cache = cache;
1948 }
1949 /* Save the pset id of each pset */
1950 for (i = 0; i < num; i++) {
1951 res = res_list[i];
1952 if (pool_get_property(conf, pool_resource_to_elem(conf,
1953 res), "pset.sys_id", vals[0]) != POC_INT ||
1954 pool_value_get_int64(vals[0], &sys_id)
1955 != PO_SUCCESS)
1956 goto err;
1957 ctl->zsctl_pset_cache[i] = (int)sys_id;
1958 }
1959 vals[1] = pv_save;
1960 pv_save = NULL;
1961 } else {
1962 if (ctl->zsctl_pool_status == POOL_ENABLED) {
1963 (void) pool_conf_close(ctl->zsctl_pool_conf);
1964 ctl->zsctl_pool_status = POOL_DISABLED;
1965 }
1966 /* Get the pset list using legacy psets */
1967 for (;;) {
1968 old = num = ctl->zsctl_pset_ncache;
1969 (void) pset_list(ctl->zsctl_pset_cache, &num);
1970 if ((num + 1) <= old) {
1971 break;
1972 }
1973 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache,
1974 (num + 1) * sizeof (psetid_t))) != NULL) {
1975 ctl->zsctl_pset_ncache = num + 1;
1976 ctl->zsctl_pset_cache = cache;
1977 } else {
1978 /*
1979 * Could not allocate to get new pset list.
1980 * Give up
1981 */
1982 return;
1983 }
1984 }
1985 /* Add the default pset to list */
1986 ctl->zsctl_pset_cache[num] = ctl->zsctl_pset_cache[0];
1987 ctl->zsctl_pset_cache[0] = ZS_PSET_DEFAULT;
1988 num++;
1989 }
1990 psets_changed:
1991 zsd_mark_cpus_start(ctl, roll_cpus);
1992 zsd_mark_psets_start(ctl);
1993 roll_cpus = B_FALSE;
1994
1995 /* Refresh cpu membership of all psets */
1996 for (i = 0; i < num; i++) {
1997
1998 /* Get pool pset information */
1999 sys_id = ctl->zsctl_pset_cache[i];
2000 if (zsd_get_pool_pset(ctl, sys_id, psetname, sizeof (psetname),
2001 &cputype, &online, &size, &min, &max, &importance)
2002 != 0) {
2003 if (errno == EINTR)
2004 goto psets_changed;
2005 zsd_warn(gettext("Failed to get info for pset %d"),
2006 sys_id);
2007 continue;
2008 }
2009
2010 system->zss_ncpus += size;
2011 system->zss_ncpus_online += online;
2012
2013 pset = zsd_lookup_insert_pset(ctl, psetname,
2014 ctl->zsctl_pset_cache[i]);
2015
2016 /* update pset info */
2017 zsd_mark_pset_found(pset, cputype, online, size, min,
2018 max, importance);
2019
2020 /* update each cpu in pset */
2021 for (j = 0; j < pset->zsp_online; j++) {
2022 cpuid = ctl->zsctl_cpu_cache[j];
2023 cpu = zsd_lookup_insert_cpu(ctl, cpuid);
2024 zsd_mark_cpu_found(cpu, pset, sys_id);
2025 }
2026 }
2027 err:
2028 if (res_list != NULL)
2029 free(res_list);
2030 if (pv_save != NULL)
2031 vals[1] = pv_save;
2032 }
2033
2034
2035
2036 /*
2037 * Fetch the current pool and pset name for the given zone.
2038 */
2039 static void
zsd_get_zone_pool_pset(zsd_ctl_t * ctl,zsd_zone_t * zone,char * pool,int poollen,char * pset,int psetlen,uint_t * cputype)2040 zsd_get_zone_pool_pset(zsd_ctl_t *ctl, zsd_zone_t *zone,
2041 char *pool, int poollen, char *pset, int psetlen, uint_t *cputype)
2042 {
2043 poolid_t poolid;
2044 pool_t **pools = NULL;
2045 pool_resource_t **res_list = NULL;
2046 char poolname[ZS_POOLNAME_MAX];
2047 char psetname[ZS_PSETNAME_MAX];
2048 pool_conf_t *conf = ctl->zsctl_pool_conf;
2049 pool_value_t *pv_save = NULL;
2050 pool_value_t **vals = ctl->zsctl_pool_vals;
2051 const char *string;
2052 int ret;
2053 int64_t int64;
2054 uint_t num;
2055
2056 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_POOLID,
2057 &poolid, sizeof (poolid));
2058 if (ret < 0)
2059 goto lookup_done;
2060
2061 pv_save = vals[1];
2062 vals[1] = NULL;
2063 pools = NULL;
2064 res_list = NULL;
2065
2066 /* Default values if lookup fails */
2067 (void) strlcpy(poolname, "pool_default", sizeof (poolname));
2068 (void) strlcpy(psetname, "pset_default", sizeof (poolname));
2069 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
2070
2071 /* no dedicated cpu if pools are disabled */
2072 if (ctl->zsctl_pool_status == POOL_DISABLED)
2073 goto lookup_done;
2074
2075 /* Get the pool name using the id */
2076 pool_value_set_int64(vals[0], poolid);
2077 if (pool_value_set_name(vals[0], "pool.sys_id") != PO_SUCCESS)
2078 goto lookup_done;
2079
2080 if ((pools = pool_query_pools(conf, &num, vals)) == NULL)
2081 goto lookup_done;
2082
2083 if (num != 1)
2084 goto lookup_done;
2085
2086 if (pool_get_property(conf, pool_to_elem(conf, pools[0]),
2087 "pool.name", vals[0]) != POC_STRING ||
2088 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
2089 goto lookup_done;
2090 (void) strlcpy(poolname, (char *)string, sizeof (poolname));
2091
2092 /* Get the name of the pset for the pool */
2093 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
2094 goto lookup_done;
2095
2096 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
2097 goto lookup_done;
2098
2099 if ((res_list = pool_query_pool_resources(conf, pools[0], &num, vals))
2100 == NULL)
2101 goto lookup_done;
2102
2103 if (num != 1)
2104 goto lookup_done;
2105
2106 if (pool_get_property(conf, pool_resource_to_elem(conf,
2107 res_list[0]), "pset.sys_id", vals[0]) != POC_INT ||
2108 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS)
2109 goto lookup_done;
2110
2111 if (int64 == ZS_PSET_DEFAULT)
2112 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
2113
2114 if (pool_get_property(conf, pool_resource_to_elem(conf,
2115 res_list[0]), "pset.name", vals[0]) != POC_STRING ||
2116 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
2117 goto lookup_done;
2118
2119 (void) strlcpy(psetname, (char *)string, sizeof (psetname));
2120
2121 if (strncmp(psetname, "SUNWtmp_", strlen("SUNWtmp_")) == 0)
2122 *cputype = ZS_CPUTYPE_DEDICATED;
2123 if (strncmp(psetname, "SUNW_legacy_", strlen("SUNW_legacy_")) == 0)
2124 *cputype = ZS_CPUTYPE_PSRSET_PSET;
2125 else
2126 *cputype = ZS_CPUTYPE_POOL_PSET;
2127
2128 lookup_done:
2129
2130 if (pv_save != NULL)
2131 vals[1] = pv_save;
2132
2133 if (res_list)
2134 free(res_list);
2135 if (pools)
2136 free(pools);
2137
2138 (void) strlcpy(pool, poolname, poollen);
2139 (void) strlcpy(pset, psetname, psetlen);
2140 }
2141
2142 /* Convert scheduler names to ZS_* scheduler flags */
2143 static uint_t
zsd_schedname2int(char * clname,int pri)2144 zsd_schedname2int(char *clname, int pri)
2145 {
2146 uint_t sched = 0;
2147
2148 if (strcmp(clname, "TS") == 0) {
2149 sched = ZS_SCHED_TS;
2150 } else if (strcmp(clname, "IA") == 0) {
2151 sched = ZS_SCHED_IA;
2152 } else if (strcmp(clname, "FX") == 0) {
2153 if (pri > 59) {
2154 sched = ZS_SCHED_FX_60;
2155 } else {
2156 sched = ZS_SCHED_FX;
2157 }
2158 } else if (strcmp(clname, "RT") == 0) {
2159 sched = ZS_SCHED_RT;
2160
2161 } else if (strcmp(clname, "FSS") == 0) {
2162 sched = ZS_SCHED_FSS;
2163 }
2164 return (sched);
2165 }
2166
2167 static uint64_t
zsd_get_zone_rctl_limit(char * name)2168 zsd_get_zone_rctl_limit(char *name)
2169 {
2170 rctlblk_t *rblk;
2171
2172 rblk = (rctlblk_t *)alloca(rctlblk_size());
2173 if (getrctl(name, NULL, rblk, RCTL_FIRST)
2174 != 0) {
2175 return (ZS_LIMIT_NONE);
2176 }
2177 return (rctlblk_get_value(rblk));
2178 }
2179
2180 static uint64_t
zsd_get_zone_rctl_usage(char * name)2181 zsd_get_zone_rctl_usage(char *name)
2182 {
2183 rctlblk_t *rblk;
2184
2185 rblk = (rctlblk_t *)alloca(rctlblk_size());
2186 if (getrctl(name, NULL, rblk, RCTL_USAGE)
2187 != 0) {
2188 return (0);
2189 }
2190 return (rctlblk_get_value(rblk));
2191 }
2192
2193 #define ZSD_NUM_RCTL_VALS 19
2194
2195 /*
2196 * Fetch the limit information for a zone. This uses zone_enter() as the
2197 * getrctl(2) system call only returns rctl information for the zone of
2198 * the caller.
2199 */
2200 static int
zsd_get_zone_caps(zsd_ctl_t * ctl,zsd_zone_t * zone,uint64_t * cpu_shares,uint64_t * cpu_cap,uint64_t * ram_cap,uint64_t * locked_cap,uint64_t * vm_cap,uint64_t * processes_cap,uint64_t * processes,uint64_t * lwps_cap,uint64_t * lwps,uint64_t * shm_cap,uint64_t * shm,uint64_t * shmids_cap,uint64_t * shmids,uint64_t * semids_cap,uint64_t * semids,uint64_t * msgids_cap,uint64_t * msgids,uint64_t * lofi_cap,uint64_t * lofi,uint_t * sched)2201 zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
2202 uint64_t *cpu_cap, uint64_t *ram_cap, uint64_t *locked_cap,
2203 uint64_t *vm_cap, uint64_t *processes_cap, uint64_t *processes,
2204 uint64_t *lwps_cap, uint64_t *lwps, uint64_t *shm_cap, uint64_t *shm,
2205 uint64_t *shmids_cap, uint64_t *shmids, uint64_t *semids_cap,
2206 uint64_t *semids, uint64_t *msgids_cap, uint64_t *msgids,
2207 uint64_t *lofi_cap, uint64_t *lofi, uint_t *sched)
2208 {
2209 int p[2], pid, tmpl_fd, ret;
2210 ctid_t ct;
2211 char class[PC_CLNMSZ];
2212 uint64_t vals[ZSD_NUM_RCTL_VALS];
2213 zsd_system_t *sys = ctl->zsctl_system;
2214 int i = 0;
2215 int res = 0;
2216
2217 /* Treat all caps as no cap on error */
2218 *cpu_shares = ZS_LIMIT_NONE;
2219 *cpu_cap = ZS_LIMIT_NONE;
2220 *ram_cap = ZS_LIMIT_NONE;
2221 *locked_cap = ZS_LIMIT_NONE;
2222 *vm_cap = ZS_LIMIT_NONE;
2223
2224 *processes_cap = ZS_LIMIT_NONE;
2225 *lwps_cap = ZS_LIMIT_NONE;
2226 *shm_cap = ZS_LIMIT_NONE;
2227 *shmids_cap = ZS_LIMIT_NONE;
2228 *semids_cap = ZS_LIMIT_NONE;
2229 *msgids_cap = ZS_LIMIT_NONE;
2230 *lofi_cap = ZS_LIMIT_NONE;
2231
2232 *processes = 0;
2233 *lwps = 0;
2234 *shm = 0;
2235 *shmids = 0;
2236 *semids = 0;
2237 *msgids = 0;
2238 *lofi = 0;
2239
2240 /* Get the ram cap first since it is a zone attr */
2241 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_PHYS_MCAP,
2242 ram_cap, sizeof (*ram_cap));
2243 if (ret < 0 || *ram_cap == 0)
2244 *ram_cap = ZS_LIMIT_NONE;
2245
2246 /* Get the zone's default scheduling class */
2247 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_SCHED_CLASS,
2248 class, sizeof (class));
2249 if (ret < 0)
2250 return (-1);
2251
2252 *sched = zsd_schedname2int(class, 0);
2253
2254 /* rctl caps must be fetched from within the zone */
2255 if (pipe(p) != 0)
2256 return (-1);
2257
2258 if ((tmpl_fd = init_template()) == -1) {
2259 (void) close(p[0]);
2260 (void) close(p[1]);
2261 return (-1);
2262 }
2263 pid = forkx(0);
2264 if (pid < 0) {
2265 (void) ct_tmpl_clear(tmpl_fd);
2266 (void) close(p[0]);
2267 (void) close(p[1]);
2268 return (-1);
2269 }
2270 if (pid == 0) {
2271
2272 (void) ct_tmpl_clear(tmpl_fd);
2273 (void) close(tmpl_fd);
2274 (void) close(p[0]);
2275 if (zone->zsz_id != getzoneid()) {
2276 if (zone_enter(zone->zsz_id) < 0) {
2277 (void) close(p[1]);
2278 _exit(0);
2279 }
2280 }
2281
2282 /* Get caps for zone, and write them to zonestatd parent. */
2283 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-shares");
2284 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-cap");
2285 vals[i++] = zsd_get_zone_rctl_limit("zone.max-locked-memory");
2286 vals[i++] = zsd_get_zone_rctl_limit("zone.max-swap");
2287 vals[i++] = zsd_get_zone_rctl_limit("zone.max-processes");
2288 vals[i++] = zsd_get_zone_rctl_usage("zone.max-processes");
2289 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lwps");
2290 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lwps");
2291 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-memory");
2292 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-memory");
2293 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-ids");
2294 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-ids");
2295 vals[i++] = zsd_get_zone_rctl_limit("zone.max-sem-ids");
2296 vals[i++] = zsd_get_zone_rctl_usage("zone.max-sem-ids");
2297 vals[i++] = zsd_get_zone_rctl_limit("zone.max-msg-ids");
2298 vals[i++] = zsd_get_zone_rctl_usage("zone.max-msg-ids");
2299 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lofi");
2300 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lofi");
2301
2302 if (write(p[1], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
2303 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
2304 (void) close(p[1]);
2305 _exit(1);
2306 }
2307
2308 (void) close(p[1]);
2309 _exit(0);
2310 }
2311 if (contract_latest(&ct) == -1)
2312 ct = -1;
2313
2314 (void) ct_tmpl_clear(tmpl_fd);
2315 (void) close(tmpl_fd);
2316 (void) close(p[1]);
2317 while (waitpid(pid, NULL, 0) != pid)
2318 ;
2319
2320 /* Read cap from child in zone */
2321 if (read(p[0], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
2322 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
2323 res = -1;
2324 goto cleanup;
2325 }
2326 i = 0;
2327 *cpu_shares = vals[i++];
2328 *cpu_cap = vals[i++];
2329 *locked_cap = vals[i++];
2330 *vm_cap = vals[i++];
2331 *processes_cap = vals[i++];
2332 *processes = vals[i++];
2333 *lwps_cap = vals[i++];
2334 *lwps = vals[i++];
2335 *shm_cap = vals[i++];
2336 *shm = vals[i++];
2337 *shmids_cap = vals[i++];
2338 *shmids = vals[i++];
2339 *semids_cap = vals[i++];
2340 *semids = vals[i++];
2341 *msgids_cap = vals[i++];
2342 *msgids = vals[i++];
2343 *lofi_cap = vals[i++];
2344 *lofi = vals[i++];
2345
2346 /* Interpret maximum values as no cap */
2347 if (*cpu_cap == UINT32_MAX || *cpu_cap == 0)
2348 *cpu_cap = ZS_LIMIT_NONE;
2349 if (*processes_cap == sys->zss_processes_max)
2350 *processes_cap = ZS_LIMIT_NONE;
2351 if (*lwps_cap == sys->zss_lwps_max)
2352 *lwps_cap = ZS_LIMIT_NONE;
2353 if (*shm_cap == sys->zss_shm_max)
2354 *shm_cap = ZS_LIMIT_NONE;
2355 if (*shmids_cap == sys->zss_shmids_max)
2356 *shmids_cap = ZS_LIMIT_NONE;
2357 if (*semids_cap == sys->zss_semids_max)
2358 *semids_cap = ZS_LIMIT_NONE;
2359 if (*msgids_cap == sys->zss_msgids_max)
2360 *msgids_cap = ZS_LIMIT_NONE;
2361 if (*lofi_cap == sys->zss_lofi_max)
2362 *lofi_cap = ZS_LIMIT_NONE;
2363
2364
2365 cleanup:
2366 (void) close(p[0]);
2367 (void) ct_tmpl_clear(tmpl_fd);
2368 (void) close(tmpl_fd);
2369 (void) contract_abandon_id(ct);
2370
2371 return (res);
2372 }
2373
2374 /* Update the current list of running zones */
2375 static void
zsd_refresh_zones(zsd_ctl_t * ctl)2376 zsd_refresh_zones(zsd_ctl_t *ctl)
2377 {
2378 zsd_zone_t *zone;
2379 uint_t old, num;
2380 ushort_t flags;
2381 int i, ret;
2382 zoneid_t *cache;
2383 uint64_t cpu_shares;
2384 uint64_t cpu_cap;
2385 uint64_t ram_cap;
2386 uint64_t locked_cap;
2387 uint64_t vm_cap;
2388 uint64_t processes_cap;
2389 uint64_t processes;
2390 uint64_t lwps_cap;
2391 uint64_t lwps;
2392 uint64_t shm_cap;
2393 uint64_t shm;
2394 uint64_t shmids_cap;
2395 uint64_t shmids;
2396 uint64_t semids_cap;
2397 uint64_t semids;
2398 uint64_t msgids_cap;
2399 uint64_t msgids;
2400 uint64_t lofi_cap;
2401 uint64_t lofi;
2402
2403 char zonename[ZS_ZONENAME_MAX];
2404 char poolname[ZS_POOLNAME_MAX];
2405 char psetname[ZS_PSETNAME_MAX];
2406 uint_t sched;
2407 uint_t cputype;
2408 uint_t iptype;
2409
2410 /* Get the current list of running zones */
2411 for (;;) {
2412 old = num = ctl->zsctl_zone_ncache;
2413 (void) zone_list(ctl->zsctl_zone_cache, &num);
2414 if (num <= old)
2415 break;
2416 if ((cache = (zoneid_t *)realloc(ctl->zsctl_zone_cache,
2417 (num) * sizeof (zoneid_t))) != NULL) {
2418 ctl->zsctl_zone_ncache = num;
2419 ctl->zsctl_zone_cache = cache;
2420 } else {
2421 /* Could not allocate to get new zone list. Give up */
2422 return;
2423 }
2424 }
2425
2426 zsd_mark_zones_start(ctl);
2427
2428 for (i = 0; i < num; i++) {
2429
2430 ret = getzonenamebyid(ctl->zsctl_zone_cache[i],
2431 zonename, sizeof (zonename));
2432 if (ret < 0)
2433 continue;
2434
2435 zone = zsd_lookup_insert_zone(ctl, zonename,
2436 ctl->zsctl_zone_cache[i]);
2437
2438 ret = zone_getattr(ctl->zsctl_zone_cache[i], ZONE_ATTR_FLAGS,
2439 &flags, sizeof (flags));
2440 if (ret < 0)
2441 continue;
2442
2443 if (flags & ZF_NET_EXCL)
2444 iptype = ZS_IPTYPE_EXCLUSIVE;
2445 else
2446 iptype = ZS_IPTYPE_SHARED;
2447
2448 zsd_get_zone_pool_pset(ctl, zone, poolname, sizeof (poolname),
2449 psetname, sizeof (psetname), &cputype);
2450
2451 if (zsd_get_zone_caps(ctl, zone, &cpu_shares, &cpu_cap,
2452 &ram_cap, &locked_cap, &vm_cap, &processes_cap, &processes,
2453 &lwps_cap, &lwps, &shm_cap, &shm, &shmids_cap, &shmids,
2454 &semids_cap, &semids, &msgids_cap, &msgids, &lofi_cap,
2455 &lofi, &sched) != 0)
2456 continue;
2457
2458 zsd_mark_zone_found(ctl, zone, cpu_shares, cpu_cap, ram_cap,
2459 locked_cap, vm_cap, processes_cap, processes, lwps_cap,
2460 lwps, shm_cap, shm, shmids_cap, shmids, semids_cap,
2461 semids, msgids_cap, msgids, lofi_cap, lofi, poolname,
2462 psetname, sched, cputype, iptype);
2463 }
2464 }
2465
2466 /* Fetch the details of a process from its psinfo_t */
2467 static void
zsd_get_proc_info(zsd_ctl_t * ctl,psinfo_t * psinfo,psetid_t * psetid,psetid_t * prev_psetid,zoneid_t * zoneid,zoneid_t * prev_zoneid,timestruc_t * delta,uint_t * sched)2468 zsd_get_proc_info(zsd_ctl_t *ctl, psinfo_t *psinfo, psetid_t *psetid,
2469 psetid_t *prev_psetid, zoneid_t *zoneid, zoneid_t *prev_zoneid,
2470 timestruc_t *delta, uint_t *sched)
2471 {
2472 timestruc_t d;
2473 zsd_proc_t *proc;
2474
2475 /* Get cached data for proc */
2476 proc = &(ctl->zsctl_proc_array[psinfo->pr_pid]);
2477 *psetid = psinfo->pr_lwp.pr_bindpset;
2478
2479 if (proc->zspr_psetid == ZS_PSET_ERROR)
2480 *prev_psetid = *psetid;
2481 else
2482 *prev_psetid = proc->zspr_psetid;
2483
2484 *zoneid = psinfo->pr_zoneid;
2485 if (proc->zspr_zoneid == -1)
2486 *prev_zoneid = *zoneid;
2487 else
2488 *prev_zoneid = proc->zspr_zoneid;
2489
2490 TIMESTRUC_DELTA(d, psinfo->pr_time, proc->zspr_usage);
2491 *delta = d;
2492
2493 *sched = zsd_schedname2int(psinfo->pr_lwp.pr_clname,
2494 psinfo->pr_lwp.pr_pri);
2495
2496 /* Update cached data for proc */
2497 proc->zspr_psetid = psinfo->pr_lwp.pr_bindpset;
2498 proc->zspr_zoneid = psinfo->pr_zoneid;
2499 proc->zspr_sched = *sched;
2500 proc->zspr_usage.tv_sec = psinfo->pr_time.tv_sec;
2501 proc->zspr_usage.tv_nsec = psinfo->pr_time.tv_nsec;
2502 proc->zspr_ppid = psinfo->pr_ppid;
2503 }
2504
2505 /*
2506 * Reset the known cpu usage of a process. This is done after a process
2507 * exits so that if the pid is recycled, data from its previous life is
2508 * not reused
2509 */
2510 static void
zsd_flush_proc_info(zsd_proc_t * proc)2511 zsd_flush_proc_info(zsd_proc_t *proc)
2512 {
2513 proc->zspr_usage.tv_sec = 0;
2514 proc->zspr_usage.tv_nsec = 0;
2515 }
2516
2517 /*
2518 * Open the current extended accounting file. On initialization, open the
2519 * file as the current file to be used. Otherwise, open the file as the
2520 * next file to use of the current file reaches EOF.
2521 */
2522 static int
zsd_open_exacct(zsd_ctl_t * ctl,boolean_t init)2523 zsd_open_exacct(zsd_ctl_t *ctl, boolean_t init)
2524 {
2525 int ret, oret, state, trys = 0, flags;
2526 int *fd, *open;
2527 ea_file_t *eaf;
2528 struct stat64 *stat;
2529 char path[MAXPATHLEN];
2530
2531 /*
2532 * The accounting file is first opened at the tail. Following
2533 * opens to new accounting files are opened at the head.
2534 */
2535 if (init == B_TRUE) {
2536 flags = EO_NO_VALID_HDR | EO_TAIL;
2537 fd = &ctl->zsctl_proc_fd;
2538 eaf = &ctl->zsctl_proc_eaf;
2539 stat = &ctl->zsctl_proc_stat;
2540 open = &ctl->zsctl_proc_open;
2541 } else {
2542 flags = EO_NO_VALID_HDR | EO_HEAD;
2543 fd = &ctl->zsctl_proc_fd_next;
2544 eaf = &ctl->zsctl_proc_eaf_next;
2545 stat = &ctl->zsctl_proc_stat_next;
2546 open = &ctl->zsctl_proc_open_next;
2547 }
2548
2549 *fd = -1;
2550 *open = 0;
2551 retry:
2552 /* open accounting files for cpu consumption */
2553 ret = acctctl(AC_STATE_GET | AC_PROC, &state, sizeof (state));
2554 if (ret != 0) {
2555 zsd_warn(gettext("Unable to get process accounting state"));
2556 goto err;
2557 }
2558 if (state != AC_ON) {
2559 if (trys > 0) {
2560 zsd_warn(gettext(
2561 "Unable to enable process accounting"));
2562 goto err;
2563 }
2564 (void) zsd_enable_cpu_stats();
2565 trys++;
2566 goto retry;
2567 }
2568
2569 ret = acctctl(AC_FILE_GET | AC_PROC, path, sizeof (path));
2570 if (ret != 0) {
2571 zsd_warn(gettext("Unable to get process accounting file"));
2572 goto err;
2573 }
2574
2575 if ((*fd = open64(path, O_RDONLY, 0)) >= 0 &&
2576 (oret = ea_fdopen(eaf, *fd, NULL, flags, O_RDONLY)) == 0)
2577 ret = fstat64(*fd, stat);
2578
2579 if (*fd < 0 || oret < 0 || ret < 0) {
2580 struct timespec ts;
2581
2582 /*
2583 * It is possible the accounting file is momentarily unavailable
2584 * because it is being rolled. Try for up to half a second.
2585 *
2586 * If failure to open accounting file persists, give up.
2587 */
2588 if (oret == 0)
2589 (void) ea_close(eaf);
2590 else if (*fd >= 0)
2591 (void) close(*fd);
2592 if (trys > 500) {
2593 zsd_warn(gettext(
2594 "Unable to open process accounting file"));
2595 goto err;
2596 }
2597 /* wait one millisecond */
2598 ts.tv_sec = 0;
2599 ts.tv_nsec = NANOSEC / 1000;
2600 (void) nanosleep(&ts, NULL);
2601 goto retry;
2602 }
2603 *open = 1;
2604 return (0);
2605 err:
2606 if (*fd >= 0)
2607 (void) close(*fd);
2608 *open = 0;
2609 *fd = -1;
2610 return (-1);
2611 }
2612
2613 /*
2614 * Walk /proc and charge each process to its zone and processor set.
2615 * Then read exacct data for exited processes, and charge them as well.
2616 */
2617 static void
zsd_refresh_procs(zsd_ctl_t * ctl,boolean_t init)2618 zsd_refresh_procs(zsd_ctl_t *ctl, boolean_t init)
2619 {
2620 DIR *dir;
2621 struct dirent *dent;
2622 psinfo_t psinfo;
2623 int fd, ret;
2624 zsd_proc_t *proc, *pproc, *tmp, *next;
2625 list_t pplist, plist;
2626 zsd_zone_t *zone, *prev_zone;
2627 zsd_pset_t *pset, *prev_pset;
2628 psetid_t psetid, prev_psetid;
2629 zoneid_t zoneid, prev_zoneid;
2630 zsd_pset_usage_t *usage, *prev_usage;
2631 char path[MAXPATHLEN];
2632
2633 ea_object_t object;
2634 ea_object_t pobject;
2635 boolean_t hrtime_expired = B_FALSE;
2636 struct timeval interval_end;
2637
2638 timestruc_t delta, d1, d2;
2639 uint_t sched = 0;
2640
2641 /*
2642 * Get the current accounting file. The current accounting file
2643 * may be different than the file in use, as the accounting file
2644 * may have been rolled, or manually changed by an admin.
2645 */
2646 ret = zsd_open_exacct(ctl, init);
2647 if (ret != 0) {
2648 zsd_warn(gettext("Unable to track process accounting"));
2649 return;
2650 }
2651
2652 /*
2653 * Mark the current time as the interval end time. Don't track
2654 * processes that exit after this time.
2655 */
2656 (void) gettimeofday(&interval_end, NULL);
2657
2658 dir = opendir("/proc");
2659 if (dir == NULL) {
2660 zsd_warn(gettext("Unable to open /proc"));
2661 return;
2662 }
2663
2664 dent = ctl->zsctl_procfs_dent;
2665
2666 (void) memset(dent, 0, ctl->zsctl_procfs_dent_size);
2667
2668 /* Walk all processes and compute each zone's usage on each pset. */
2669 while (readdir_r(dir, dent) != 0) {
2670
2671 if (strcmp(dent->d_name, ".") == 0 ||
2672 strcmp(dent->d_name, "..") == 0)
2673 continue;
2674
2675 (void) snprintf(path, sizeof (path), "/proc/%s/psinfo",
2676 dent->d_name);
2677
2678 fd = open(path, O_RDONLY);
2679 if (fd < 0)
2680 continue;
2681
2682 if (read(fd, &psinfo, sizeof (psinfo)) != sizeof (psinfo)) {
2683 (void) close(fd);
2684 continue;
2685 }
2686 (void) close(fd);
2687
2688 zsd_get_proc_info(ctl, &psinfo, &psetid, &prev_psetid,
2689 &zoneid, &prev_zoneid, &delta, &sched);
2690
2691 d1.tv_sec = delta.tv_sec / 2;
2692 d1.tv_nsec = delta.tv_nsec / 2;
2693 d2.tv_sec = (delta.tv_sec / 2) + (delta.tv_sec % 2);
2694 d2.tv_nsec = (delta.tv_nsec / 2) + (delta.tv_nsec % 2);
2695
2696 /* Get the zone and pset this process is running in */
2697 zone = zsd_lookup_zone_byid(ctl, zoneid);
2698 if (zone == NULL)
2699 continue;
2700 pset = zsd_lookup_pset_byid(ctl, psetid);
2701 if (pset == NULL)
2702 continue;
2703 usage = zsd_lookup_insert_usage(ctl, pset, zone);
2704 if (usage == NULL)
2705 continue;
2706
2707 /*
2708 * Get the usage of the previous zone and pset if they were
2709 * different.
2710 */
2711 if (zoneid != prev_zoneid)
2712 prev_zone = zsd_lookup_zone_byid(ctl, prev_zoneid);
2713 else
2714 prev_zone = NULL;
2715
2716 if (psetid != prev_psetid)
2717 prev_pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2718 else
2719 prev_pset = NULL;
2720
2721 prev_usage = NULL;
2722 if (prev_zone != NULL || prev_pset != NULL) {
2723 if (prev_zone == NULL)
2724 prev_zone = zone;
2725 if (prev_pset == NULL)
2726 prev_pset = pset;
2727
2728 prev_usage = zsd_lookup_insert_usage(ctl, prev_pset,
2729 prev_zone);
2730 }
2731
2732 /* Update the usage with the processes info */
2733 if (prev_usage == NULL) {
2734 zsd_mark_pset_usage_found(usage, sched);
2735 } else {
2736 zsd_mark_pset_usage_found(usage, sched);
2737 zsd_mark_pset_usage_found(prev_usage, sched);
2738 }
2739
2740 /*
2741 * First time around is just to get a starting point. All
2742 * usages will be zero.
2743 */
2744 if (init == B_TRUE)
2745 continue;
2746
2747 if (prev_usage == NULL) {
2748 zsd_add_usage(ctl, usage, &delta);
2749 } else {
2750 zsd_add_usage(ctl, usage, &d1);
2751 zsd_add_usage(ctl, prev_usage, &d2);
2752 }
2753 }
2754 (void) closedir(dir);
2755
2756 /*
2757 * No need to collect exited proc data on initialization. Just
2758 * caching the usage of the known processes to get a zero starting
2759 * point.
2760 */
2761 if (init == B_TRUE)
2762 return;
2763
2764 /*
2765 * Add accounting records to account for processes which have
2766 * exited.
2767 */
2768 list_create(&plist, sizeof (zsd_proc_t),
2769 offsetof(zsd_proc_t, zspr_next));
2770 list_create(&pplist, sizeof (zsd_proc_t),
2771 offsetof(zsd_proc_t, zspr_next));
2772
2773 for (;;) {
2774 pid_t pid;
2775 pid_t ppid;
2776 timestruc_t user, sys, proc_usage;
2777 timestruc_t finish;
2778 int numfound = 0;
2779
2780 bzero(&object, sizeof (object));
2781 proc = NULL;
2782 zone = NULL;
2783 pset = NULL;
2784 usage = NULL;
2785 ret = ea_get_object(&ctl->zsctl_proc_eaf, &object);
2786 if (ret == EO_ERROR) {
2787 if (ea_error() == EXR_EOF) {
2788
2789 struct stat64 *stat;
2790 struct stat64 *stat_next;
2791
2792 /*
2793 * See if the next accounting file is the
2794 * same as the current accounting file.
2795 */
2796 stat = &(ctl->zsctl_proc_stat);
2797 stat_next = &(ctl->zsctl_proc_stat_next);
2798 if (stat->st_ino == stat_next->st_ino &&
2799 stat->st_dev == stat_next->st_dev) {
2800 /*
2801 * End of current accounting file is
2802 * reached, so finished. Clear EOF
2803 * bit for next time around.
2804 */
2805 ea_clear(&ctl->zsctl_proc_eaf);
2806 break;
2807 } else {
2808 /*
2809 * Accounting file has changed. Move
2810 * to current accounting file.
2811 */
2812 (void) ea_close(&ctl->zsctl_proc_eaf);
2813
2814 ctl->zsctl_proc_fd =
2815 ctl->zsctl_proc_fd_next;
2816 ctl->zsctl_proc_eaf =
2817 ctl->zsctl_proc_eaf_next;
2818 ctl->zsctl_proc_stat =
2819 ctl->zsctl_proc_stat_next;
2820
2821 ctl->zsctl_proc_fd_next = -1;
2822 ctl->zsctl_proc_open_next = 0;
2823 continue;
2824 }
2825 } else {
2826 /*
2827 * Other accounting error. Give up on
2828 * accounting.
2829 */
2830 goto ea_err;
2831 }
2832 }
2833 /* Skip if not a process group */
2834 if ((object.eo_catalog & EXT_TYPE_MASK) != EXT_GROUP ||
2835 (object.eo_catalog & EXD_DATA_MASK) != EXD_GROUP_PROC) {
2836 (void) ea_free_item(&object, EUP_ALLOC);
2837 continue;
2838 }
2839
2840 /* The process group entry should be complete */
2841 while (numfound < 9) {
2842 bzero(&pobject, sizeof (pobject));
2843 ret = ea_get_object(&ctl->zsctl_proc_eaf,
2844 &pobject);
2845 if (ret < 0) {
2846 (void) ea_free_item(&object, EUP_ALLOC);
2847 zsd_warn(
2848 "unable to get process accounting data");
2849 goto ea_err;
2850 }
2851 /* Next entries should be process data */
2852 if ((pobject.eo_catalog & EXT_TYPE_MASK) ==
2853 EXT_GROUP) {
2854 (void) ea_free_item(&object, EUP_ALLOC);
2855 (void) ea_free_item(&pobject, EUP_ALLOC);
2856 zsd_warn(
2857 "process data of wrong type");
2858 goto ea_err;
2859 }
2860 switch (pobject.eo_catalog & EXD_DATA_MASK) {
2861 case EXD_PROC_PID:
2862 pid = pobject.eo_item.ei_uint32;
2863 proc = &(ctl->zsctl_proc_array[pid]);
2864 /*
2865 * This process should not be currently in
2866 * the list of processes to process.
2867 */
2868 assert(!list_link_active(&proc->zspr_next));
2869 numfound++;
2870 break;
2871 case EXD_PROC_ANCPID:
2872 ppid = pobject.eo_item.ei_uint32;
2873 pproc = &(ctl->zsctl_proc_array[ppid]);
2874 numfound++;
2875 break;
2876 case EXD_PROC_ZONENAME:
2877 zone = zsd_lookup_zone(ctl,
2878 pobject.eo_item.ei_string, -1);
2879 numfound++;
2880 break;
2881 case EXD_PROC_CPU_USER_SEC:
2882 user.tv_sec =
2883 pobject.eo_item.ei_uint64;
2884 numfound++;
2885 break;
2886 case EXD_PROC_CPU_USER_NSEC:
2887 user.tv_nsec =
2888 pobject.eo_item.ei_uint64;
2889 numfound++;
2890 break;
2891 case EXD_PROC_CPU_SYS_SEC:
2892 sys.tv_sec =
2893 pobject.eo_item.ei_uint64;
2894 numfound++;
2895 break;
2896 case EXD_PROC_CPU_SYS_NSEC:
2897 sys.tv_nsec =
2898 pobject.eo_item.ei_uint64;
2899 numfound++;
2900 break;
2901 case EXD_PROC_FINISH_SEC:
2902 finish.tv_sec =
2903 pobject.eo_item.ei_uint64;
2904 numfound++;
2905 break;
2906 case EXD_PROC_FINISH_NSEC:
2907 finish.tv_nsec =
2908 pobject.eo_item.ei_uint64;
2909 numfound++;
2910 break;
2911 }
2912 (void) ea_free_item(&pobject, EUP_ALLOC);
2913 }
2914 (void) ea_free_item(&object, EUP_ALLOC);
2915 if (numfound != 9) {
2916 zsd_warn(gettext(
2917 "Malformed process accounting entry found"));
2918 goto proc_done;
2919 }
2920
2921 if (finish.tv_sec > interval_end.tv_sec ||
2922 (finish.tv_sec == interval_end.tv_sec &&
2923 finish.tv_nsec > (interval_end.tv_usec * 1000)))
2924 hrtime_expired = B_TRUE;
2925
2926 /*
2927 * Try to identify the zone and pset to which this
2928 * exited process belongs.
2929 */
2930 if (zone == NULL)
2931 goto proc_done;
2932
2933 /* Save proc info */
2934 proc->zspr_ppid = ppid;
2935 proc->zspr_zoneid = zone->zsz_id;
2936
2937 prev_psetid = ZS_PSET_ERROR;
2938 sched = 0;
2939
2940 /*
2941 * The following tries to deduce the processes pset.
2942 *
2943 * First choose pset and sched using cached value from the
2944 * most recent time the process has been seen.
2945 *
2946 * pset and sched can change across zone_enter, so make sure
2947 * most recent sighting of this process was in the same
2948 * zone before using most recent known value.
2949 *
2950 * If there is no known value, use value of processes
2951 * parent. If parent is unknown, walk parents until a known
2952 * parent is found.
2953 *
2954 * If no parent in the zone is found, use the zone's default
2955 * pset and scheduling class.
2956 */
2957 if (proc->zspr_psetid != ZS_PSET_ERROR) {
2958 prev_psetid = proc->zspr_psetid;
2959 pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2960 sched = proc->zspr_sched;
2961 } else if (pproc->zspr_zoneid == zone->zsz_id &&
2962 pproc->zspr_psetid != ZS_PSET_ERROR) {
2963 prev_psetid = pproc->zspr_psetid;
2964 pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2965 sched = pproc->zspr_sched;
2966 }
2967
2968 if (pset == NULL) {
2969 /*
2970 * Process or processes parent has never been seen.
2971 * Save to deduce a known parent later.
2972 */
2973 proc_usage = sys;
2974 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user);
2975 TIMESTRUC_DELTA(delta, proc_usage,
2976 proc->zspr_usage);
2977 proc->zspr_usage = delta;
2978 list_insert_tail(&plist, proc);
2979 continue;
2980 }
2981
2982 /* Add the zone's usage to the pset */
2983 usage = zsd_lookup_insert_usage(ctl, pset, zone);
2984 if (usage == NULL)
2985 goto proc_done;
2986
2987 zsd_mark_pset_usage_found(usage, sched);
2988
2989 /* compute the usage to add for the exited proc */
2990 proc_usage = sys;
2991 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user);
2992 TIMESTRUC_DELTA(delta, proc_usage,
2993 proc->zspr_usage);
2994
2995 zsd_add_usage(ctl, usage, &delta);
2996 proc_done:
2997 zsd_flush_proc_info(proc);
2998
2999 if (hrtime_expired == B_TRUE)
3000 break;
3001 }
3002 /*
3003 * close next accounting file.
3004 */
3005 if (ctl->zsctl_proc_open_next) {
3006 (void) ea_close(
3007 &ctl->zsctl_proc_eaf_next);
3008 ctl->zsctl_proc_open_next = 0;
3009 ctl->zsctl_proc_fd_next = -1;
3010 }
3011
3012 /* For the remaining processes, use pset and sched of a known parent */
3013 proc = list_head(&plist);
3014 while (proc != NULL) {
3015 next = proc;
3016 for (;;) {
3017 if (next->zspr_ppid == 0 || next->zspr_ppid == -1) {
3018 /*
3019 * Kernel process, or parent is unknown, skip
3020 * process, remove from process list.
3021 */
3022 tmp = proc;
3023 proc = list_next(&plist, proc);
3024 list_link_init(&tmp->zspr_next);
3025 break;
3026 }
3027 pproc = &(ctl->zsctl_proc_array[next->zspr_ppid]);
3028 if (pproc->zspr_zoneid != proc->zspr_zoneid) {
3029 /*
3030 * Parent in different zone. Save process and
3031 * use zone's default pset and sched below
3032 */
3033 tmp = proc;
3034 proc = list_next(&plist, proc);
3035 list_remove(&plist, tmp);
3036 list_insert_tail(&pplist, tmp);
3037 break;
3038 }
3039 /* Parent has unknown pset, Search parent's parent */
3040 if (pproc->zspr_psetid == ZS_PSET_ERROR) {
3041 next = pproc;
3042 continue;
3043 }
3044 /* Found parent with known pset. Use its info */
3045 proc->zspr_psetid = pproc->zspr_psetid;
3046 proc->zspr_sched = pproc->zspr_sched;
3047 next->zspr_psetid = pproc->zspr_psetid;
3048 next->zspr_sched = pproc->zspr_sched;
3049 zone = zsd_lookup_zone_byid(ctl,
3050 proc->zspr_zoneid);
3051 if (zone == NULL) {
3052 tmp = proc;
3053 proc = list_next(&plist, proc);
3054 list_remove(&plist, tmp);
3055 list_link_init(&tmp->zspr_next);
3056 break;
3057 }
3058 pset = zsd_lookup_pset_byid(ctl,
3059 proc->zspr_psetid);
3060 if (pset == NULL) {
3061 tmp = proc;
3062 proc = list_next(&plist, proc);
3063 list_remove(&plist, tmp);
3064 list_link_init(&tmp->zspr_next);
3065 break;
3066 }
3067 /* Add the zone's usage to the pset */
3068 usage = zsd_lookup_insert_usage(ctl, pset, zone);
3069 if (usage == NULL) {
3070 tmp = proc;
3071 proc = list_next(&plist, proc);
3072 list_remove(&plist, tmp);
3073 list_link_init(&tmp->zspr_next);
3074 break;
3075 }
3076 zsd_mark_pset_usage_found(usage, proc->zspr_sched);
3077 zsd_add_usage(ctl, usage, &proc->zspr_usage);
3078 zsd_flush_proc_info(proc);
3079 tmp = proc;
3080 proc = list_next(&plist, proc);
3081 list_remove(&plist, tmp);
3082 list_link_init(&tmp->zspr_next);
3083 break;
3084 }
3085 }
3086 /*
3087 * Process has never been seen. Using zone info to
3088 * determine pset and scheduling class.
3089 */
3090 proc = list_head(&pplist);
3091 while (proc != NULL) {
3092
3093 zone = zsd_lookup_zone_byid(ctl, proc->zspr_zoneid);
3094 if (zone == NULL)
3095 goto next;
3096 if (zone->zsz_psetid != ZS_PSET_ERROR &&
3097 zone->zsz_psetid != ZS_PSET_MULTI) {
3098 prev_psetid = zone->zsz_psetid;
3099 pset = zsd_lookup_pset_byid(ctl, prev_psetid);
3100 } else {
3101 pset = zsd_lookup_pset(ctl, zone->zsz_pset, -1);
3102 if (pset != NULL)
3103 prev_psetid = pset->zsp_id;
3104 }
3105 if (pset == NULL)
3106 goto next;
3107
3108 sched = zone->zsz_scheds;
3109 /*
3110 * Ignore FX high scheduling class if it is not the
3111 * only scheduling class in the zone.
3112 */
3113 if (sched != ZS_SCHED_FX_60)
3114 sched &= (~ZS_SCHED_FX_60);
3115 /*
3116 * If more than one scheduling class has been found
3117 * in the zone, use zone's default scheduling class for
3118 * this process.
3119 */
3120 if ((sched & (sched - 1)) != 0)
3121 sched = zone->zsz_default_sched;
3122
3123 /* Add the zone's usage to the pset */
3124 usage = zsd_lookup_insert_usage(ctl, pset, zone);
3125 if (usage == NULL)
3126 goto next;
3127
3128 zsd_mark_pset_usage_found(usage, sched);
3129 zsd_add_usage(ctl, usage, &proc->zspr_usage);
3130 next:
3131 tmp = proc;
3132 proc = list_next(&pplist, proc);
3133 zsd_flush_proc_info(tmp);
3134 list_link_init(&tmp->zspr_next);
3135 }
3136 return;
3137 ea_err:
3138 /*
3139 * Close the next accounting file if we have not transitioned to it
3140 * yet.
3141 */
3142 if (ctl->zsctl_proc_open_next) {
3143 (void) ea_close(&ctl->zsctl_proc_eaf_next);
3144 ctl->zsctl_proc_open_next = 0;
3145 ctl->zsctl_proc_fd_next = -1;
3146 }
3147 }
3148
3149 /*
3150 * getvmusage(2) uses size_t's in the passwd data structure, which differ
3151 * in size for 32bit and 64 bit kernels. Since this is a contracted interface,
3152 * and zonestatd does not necessarily match the kernel's bitness, marshal
3153 * results appropriately.
3154 */
3155 static int
zsd_getvmusage(zsd_ctl_t * ctl,uint_t flags,time_t age,zsd_vmusage64_t * buf,uint64_t * nres)3156 zsd_getvmusage(zsd_ctl_t *ctl, uint_t flags, time_t age, zsd_vmusage64_t *buf,
3157 uint64_t *nres)
3158 {
3159 zsd_vmusage32_t *vmu32;
3160 zsd_vmusage64_t *vmu64;
3161 uint32_t nres32;
3162 int i;
3163 int ret;
3164
3165 if (ctl->zsctl_kern_bits == 32) {
3166 nres32 = *nres;
3167 ret = syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE,
3168 flags, age, (uintptr_t)buf, (uintptr_t)&nres32);
3169 *nres = nres32;
3170 if (ret == 0 && buf != NULL) {
3171 /*
3172 * An array of vmusage32_t's has been returned.
3173 * Convert it to an array of vmusage64_t's.
3174 */
3175 vmu32 = (zsd_vmusage32_t *)buf;
3176 vmu64 = (zsd_vmusage64_t *)buf;
3177 for (i = nres32 - 1; i >= 0; i--) {
3178
3179 vmu64[i].vmu_zoneid = vmu32[i].vmu_zoneid;
3180 vmu64[i].vmu_type = vmu32[i].vmu_type;
3181 vmu64[i].vmu_type = vmu32[i].vmu_type;
3182 vmu64[i].vmu_rss_all = vmu32[i].vmu_rss_all;
3183 vmu64[i].vmu_rss_private =
3184 vmu32[i].vmu_rss_private;
3185 vmu64[i].vmu_rss_shared =
3186 vmu32[i].vmu_rss_shared;
3187 vmu64[i].vmu_swap_all = vmu32[i].vmu_swap_all;
3188 vmu64[i].vmu_swap_private =
3189 vmu32[i].vmu_swap_private;
3190 vmu64[i].vmu_swap_shared =
3191 vmu32[i].vmu_swap_shared;
3192 }
3193 }
3194 return (ret);
3195 } else {
3196 /*
3197 * kernel is 64 bit, so use 64 bit structures as zonestat
3198 * expects.
3199 */
3200 return (syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE,
3201 flags, age, (uintptr_t)buf, (uintptr_t)nres));
3202
3203 }
3204 }
3205
3206 /*
3207 * Update the current physical, virtual, and locked memory usage of the
3208 * running zones.
3209 */
3210 static void
zsd_refresh_memory(zsd_ctl_t * ctl,boolean_t init)3211 zsd_refresh_memory(zsd_ctl_t *ctl, boolean_t init)
3212 {
3213
3214 uint64_t phys_total;
3215 uint64_t phys_used;
3216 uint64_t phys_zones;
3217 uint64_t phys_zones_overcount;
3218 uint64_t phys_zones_extra;
3219 uint64_t phys_zones_credit;
3220
3221 uint64_t vm_free;
3222 uint64_t vm_used;
3223
3224 uint64_t disk_swap_total;
3225 uint64_t disk_swap_used; /* disk swap with contents */
3226
3227 uint64_t physmem;
3228 uint64_t pp_kernel;
3229 uint64_t arc_size = 0;
3230 struct anoninfo ani;
3231
3232 int num_swap_devices;
3233 struct swaptable *swt;
3234 struct swapent *swent;
3235 size_t swt_size;
3236 char *path;
3237
3238 zsd_vmusage64_t *vmusage;
3239 uint64_t num_vmusage;
3240
3241 int i, ret;
3242
3243 zsd_system_t *sys;
3244 zsd_zone_t *zone;
3245 int vmu_nzones;
3246
3247 kstat_t *kstat;
3248 char kstat_name[KSTAT_STRLEN];
3249 kstat_named_t *knp;
3250 kid_t kid;
3251
3252 if (init)
3253 return;
3254
3255 sys = ctl->zsctl_system;
3256
3257 /* interrogate swap devices to find the amount of disk swap */
3258 disk_swap_again:
3259 num_swap_devices = swapctl(SC_GETNSWP, NULL);
3260
3261 if (num_swap_devices == 0) {
3262 sys->zss_swap_total = disk_swap_total = 0;
3263 sys->zss_swap_used = disk_swap_used = 0;
3264 /* No disk swap */
3265 goto disk_swap_done;
3266 }
3267 /* see if swap table needs to be larger */
3268 if (num_swap_devices > ctl->zsctl_swap_cache_num) {
3269 swt_size = sizeof (int) +
3270 (num_swap_devices * sizeof (struct swapent)) +
3271 (num_swap_devices * MAXPATHLEN);
3272 if (ctl->zsctl_swap_cache != NULL)
3273 free(ctl->zsctl_swap_cache);
3274
3275 swt = (struct swaptable *)malloc(swt_size);
3276 if (swt == NULL) {
3277 /*
3278 * Could not allocate to get list of swap devices.
3279 * Just use data from the most recent read, which will
3280 * be zero if this is the first read.
3281 */
3282 zsd_warn(gettext("Unable to allocate to determine "
3283 "virtual memory"));
3284 disk_swap_total = sys->zss_swap_total;
3285 disk_swap_used = sys->zss_swap_used;
3286 goto disk_swap_done;
3287 }
3288 swent = swt->swt_ent;
3289 path = (char *)swt + (sizeof (int) +
3290 num_swap_devices * sizeof (swapent_t));
3291 for (i = 0; i < num_swap_devices; i++, swent++) {
3292 swent->ste_path = path;
3293 path += MAXPATHLEN;
3294 }
3295 swt->swt_n = num_swap_devices;
3296 ctl->zsctl_swap_cache = swt;
3297 ctl->zsctl_swap_cache_size = swt_size;
3298 ctl->zsctl_swap_cache_num = num_swap_devices;
3299 }
3300 num_swap_devices = swapctl(SC_LIST, ctl->zsctl_swap_cache);
3301 if (num_swap_devices < 0) {
3302 /* More swap devices have arrived */
3303 if (errno == ENOMEM)
3304 goto disk_swap_again;
3305
3306 zsd_warn(gettext("Unable to determine disk swap devices"));
3307 /* Unexpected error. Use existing data */
3308 disk_swap_total = sys->zss_swap_total;
3309 disk_swap_used = sys->zss_swap_used;
3310 goto disk_swap_done;
3311 }
3312
3313 /* add up the disk swap */
3314 disk_swap_total = 0;
3315 disk_swap_used = 0;
3316 swent = ctl->zsctl_swap_cache->swt_ent;
3317 for (i = 0; i < num_swap_devices; i++, swent++) {
3318 disk_swap_total += swent->ste_pages;
3319 disk_swap_used += (swent->ste_pages - swent->ste_free);
3320 }
3321 disk_swap_total *= ctl->zsctl_pagesize;
3322 disk_swap_used *= ctl->zsctl_pagesize;
3323
3324 sys->zss_swap_total = disk_swap_total;
3325 sys->zss_swap_used = disk_swap_used;
3326
3327 disk_swap_done:
3328
3329 /* get system pages kstat */
3330 kid = -1;
3331 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "unix", 0, "system_pages");
3332 if (kstat == NULL)
3333 zsd_warn(gettext("Unable to lookup system pages kstat"));
3334 else
3335 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3336
3337 if (kid == -1) {
3338 zsd_warn(gettext("Unable to read system pages kstat"));
3339 return;
3340 } else {
3341 knp = kstat_data_lookup(kstat, "physmem");
3342 if (knp == NULL) {
3343 zsd_warn(gettext("Unable to read physmem"));
3344 } else {
3345 if (knp->data_type == KSTAT_DATA_UINT64)
3346 physmem = knp->value.ui64;
3347 else if (knp->data_type == KSTAT_DATA_UINT32)
3348 physmem = knp->value.ui32;
3349 else
3350 return;
3351 }
3352 knp = kstat_data_lookup(kstat, "pp_kernel");
3353 if (knp == NULL) {
3354 zsd_warn(gettext("Unable to read pp_kernel"));
3355 } else {
3356 if (knp->data_type == KSTAT_DATA_UINT64)
3357 pp_kernel = knp->value.ui64;
3358 else if (knp->data_type == KSTAT_DATA_UINT32)
3359 pp_kernel = knp->value.ui32;
3360 else
3361 return;
3362 }
3363 }
3364 physmem *= ctl->zsctl_pagesize;
3365 pp_kernel *= ctl->zsctl_pagesize;
3366
3367 /* get the zfs arc size if available */
3368 arc_size = 0;
3369 kid = -1;
3370 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "zfs", 0, "arcstats");
3371 if (kstat != NULL)
3372 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3373 if (kid != -1) {
3374 knp = kstat_data_lookup(kstat, "size");
3375 if (knp != NULL)
3376 if (knp->data_type == KSTAT_DATA_UINT64)
3377 arc_size = knp->value.ui64;
3378 }
3379
3380 /* Try to get swap information */
3381 if (swapctl(SC_AINFO, &ani) < 0) {
3382 zsd_warn(gettext("Unable to get swap info"));
3383 return;
3384 }
3385
3386 vmusage_again:
3387 /* getvmusage to get physical memory usage */
3388 vmusage = ctl->zsctl_vmusage_cache;
3389 num_vmusage = ctl->zsctl_vmusage_cache_num;
3390
3391 ret = zsd_getvmusage(ctl, VMUSAGE_SYSTEM | VMUSAGE_ALL_ZONES, 0,
3392 vmusage, &num_vmusage);
3393
3394 if (ret != 0) {
3395 /* Unexpected error. Use existing data */
3396 if (errno != EOVERFLOW) {
3397 zsd_warn(gettext(
3398 "Unable to read physical memory usage"));
3399 phys_zones = sys->zss_ram_zones;
3400 goto vmusage_done;
3401 }
3402 }
3403 /* vmusage results cache too small */
3404 if (num_vmusage > ctl->zsctl_vmusage_cache_num) {
3405
3406 size_t size = sizeof (zsd_vmusage64_t) * num_vmusage;
3407
3408 if (ctl->zsctl_vmusage_cache != NULL)
3409 free(ctl->zsctl_vmusage_cache);
3410 vmusage = (zsd_vmusage64_t *)malloc(size);
3411 if (vmusage == NULL) {
3412 zsd_warn(gettext("Unable to alloc to determine "
3413 "physical memory usage"));
3414 phys_zones = sys->zss_ram_zones;
3415 goto vmusage_done;
3416 }
3417 ctl->zsctl_vmusage_cache = vmusage;
3418 ctl->zsctl_vmusage_cache_num = num_vmusage;
3419 goto vmusage_again;
3420 }
3421
3422 phys_zones_overcount = 0;
3423 vmu_nzones = 0;
3424 for (i = 0; i < num_vmusage; i++) {
3425 switch (vmusage[i].vmu_type) {
3426 case VMUSAGE_SYSTEM:
3427 /* total pages backing user process mappings */
3428 phys_zones = sys->zss_ram_zones =
3429 vmusage[i].vmu_rss_all;
3430 break;
3431 case VMUSAGE_ZONE:
3432 vmu_nzones++;
3433 phys_zones_overcount += vmusage[i].vmu_rss_all;
3434 zone = zsd_lookup_zone_byid(ctl, vmusage[i].vmu_id);
3435 if (zone != NULL)
3436 zone->zsz_usage_ram = vmusage[i].vmu_rss_all;
3437 break;
3438 default:
3439 break;
3440 }
3441 }
3442 /*
3443 * Figure how much memory was double counted due to text sharing
3444 * between zones. Credit this back so that the sum of the zones
3445 * equals the total zone ram usage;
3446 */
3447 phys_zones_extra = phys_zones_overcount - phys_zones;
3448 phys_zones_credit = phys_zones_extra / vmu_nzones;
3449
3450 vmusage_done:
3451
3452 /* walk the zones to get swap and locked kstats. Fetch ram cap. */
3453 sys->zss_locked_zones = 0;
3454 sys->zss_vm_zones = 0;
3455 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
3456 zone = list_next(&ctl->zsctl_zones, zone)) {
3457
3458 /* If zone halted during interval, show memory usage as none */
3459 if (zone->zsz_active == B_FALSE ||
3460 zone->zsz_deleted == B_TRUE) {
3461 zone->zsz_usage_ram = 0;
3462 zone->zsz_usage_vm = 0;
3463 zone->zsz_usage_locked = 0;
3464 continue;
3465 }
3466
3467 if (phys_zones_credit > 0) {
3468 if (zone->zsz_usage_ram > phys_zones_credit) {
3469 zone->zsz_usage_ram -= phys_zones_credit;
3470 }
3471 }
3472 /*
3473 * Get zone's swap usage. Since zone could have halted,
3474 * treats as zero if cannot read
3475 */
3476 zone->zsz_usage_vm = 0;
3477 (void) snprintf(kstat_name, sizeof (kstat_name),
3478 "swapresv_zone_%d", zone->zsz_id);
3479 kid = -1;
3480 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps",
3481 zone->zsz_id, kstat_name);
3482 if (kstat != NULL)
3483 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3484 if (kid != -1) {
3485 knp = kstat_data_lookup(kstat, "usage");
3486 if (knp != NULL &&
3487 knp->data_type == KSTAT_DATA_UINT64) {
3488 zone->zsz_usage_vm = knp->value.ui64;
3489 sys->zss_vm_zones += knp->value.ui64;
3490 }
3491 }
3492 /*
3493 * Get zone's locked usage. Since zone could have halted,
3494 * treats as zero if cannot read
3495 */
3496 zone->zsz_usage_locked = 0;
3497 (void) snprintf(kstat_name, sizeof (kstat_name),
3498 "lockedmem_zone_%d", zone->zsz_id);
3499 kid = -1;
3500 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps",
3501 zone->zsz_id, kstat_name);
3502 if (kstat != NULL)
3503 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3504 if (kid != -1) {
3505 knp = kstat_data_lookup(kstat, "usage");
3506 if (knp != NULL &&
3507 knp->data_type == KSTAT_DATA_UINT64) {
3508 zone->zsz_usage_locked = knp->value.ui64;
3509 /*
3510 * Since locked memory accounting for zones
3511 * can double count ddi locked memory, cap each
3512 * zone's locked usage at its ram usage.
3513 */
3514 if (zone->zsz_usage_locked >
3515 zone->zsz_usage_ram)
3516 zone->zsz_usage_locked =
3517 zone->zsz_usage_ram;
3518 sys->zss_locked_zones +=
3519 zone->zsz_usage_locked;
3520 }
3521 }
3522 }
3523
3524 phys_total =
3525 sysconf(_SC_PHYS_PAGES) * ctl->zsctl_pagesize;
3526
3527 phys_used = (sysconf(_SC_PHYS_PAGES) - sysconf(_SC_AVPHYS_PAGES))
3528 * ctl->zsctl_pagesize;
3529
3530 /* Compute remaining statistics */
3531 sys->zss_ram_total = phys_total;
3532 sys->zss_ram_zones = phys_zones;
3533 sys->zss_ram_kern = phys_used - phys_zones - arc_size;
3534
3535 /*
3536 * The total for kernel locked memory should include
3537 * segkp locked pages, but oh well. The arc size is subtracted,
3538 * as that physical memory is reclaimable.
3539 */
3540 sys->zss_locked_kern = pp_kernel - arc_size;
3541 /* Add memory used by kernel startup and obp to kernel locked */
3542 if ((phys_total - physmem) > 0)
3543 sys->zss_locked_kern += phys_total - physmem;
3544
3545 /*
3546 * Add in the portion of (RAM+DISK) that is not available as swap,
3547 * and consider it swap used by the kernel.
3548 */
3549 sys->zss_vm_total = phys_total + disk_swap_total;
3550 vm_free = (ani.ani_max - ani.ani_resv) * ctl->zsctl_pagesize;
3551 vm_used = sys->zss_vm_total - vm_free;
3552 sys->zss_vm_kern = vm_used - sys->zss_vm_zones - arc_size;
3553 }
3554
3555 /*
3556 * Charge each cpu's usage to its processor sets. Also add the cpu's total
3557 * time to each zone using the processor set. This tracks the maximum
3558 * amount of cpu time that a zone could have used.
3559 */
3560 static void
zsd_refresh_cpu_stats(zsd_ctl_t * ctl,boolean_t init)3561 zsd_refresh_cpu_stats(zsd_ctl_t *ctl, boolean_t init)
3562 {
3563 zsd_system_t *sys;
3564 zsd_zone_t *zone;
3565 zsd_pset_usage_t *usage;
3566 zsd_cpu_t *cpu;
3567 zsd_cpu_t *cpu_next;
3568 zsd_pset_t *pset;
3569 timestruc_t ts;
3570 uint64_t hrtime;
3571 timestruc_t delta;
3572
3573 /* Update the per-cpu kstat data */
3574 cpu_next = list_head(&ctl->zsctl_cpus);
3575 while (cpu_next != NULL) {
3576 cpu = cpu_next;
3577 cpu_next = list_next(&ctl->zsctl_cpus, cpu);
3578 zsd_update_cpu_stats(ctl, cpu);
3579 }
3580 /* Update the elapsed real time */
3581 hrtime = gethrtime();
3582 if (init) {
3583 /* first time around, store hrtime for future comparision */
3584 ctl->zsctl_hrtime = hrtime;
3585 ctl->zsctl_hrtime_prev = hrtime;
3586
3587 } else {
3588 /* Compute increase in hrtime since the most recent read */
3589 ctl->zsctl_hrtime_prev = ctl->zsctl_hrtime;
3590 ctl->zsctl_hrtime = hrtime;
3591 if ((hrtime = hrtime - ctl->zsctl_hrtime_prev) > 0)
3592 TIMESTRUC_ADD_NANOSEC(ctl->zsctl_hrtime_total, hrtime);
3593 }
3594
3595 /* On initialization, all psets have zero time */
3596 if (init)
3597 return;
3598
3599 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
3600 pset = list_next(&ctl->zsctl_psets, pset)) {
3601
3602 if (pset->zsp_active == B_FALSE) {
3603 zsd_warn(gettext("Internal error,inactive pset found"));
3604 continue;
3605 }
3606
3607 /* sum total used time for pset */
3608 ts.tv_sec = 0;
3609 ts.tv_nsec = 0;
3610 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_intr);
3611 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_kern);
3612 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_user);
3613 /* kernel time in pset is total time minus zone time */
3614 TIMESTRUC_DELTA(pset->zsp_usage_kern, ts,
3615 pset->zsp_usage_zones);
3616 if (pset->zsp_usage_kern.tv_sec < 0 ||
3617 pset->zsp_usage_kern.tv_nsec < 0) {
3618 pset->zsp_usage_kern.tv_sec = 0;
3619 pset->zsp_usage_kern.tv_nsec = 0;
3620 }
3621 /* Total pset elapsed time is used time plus idle time */
3622 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_idle);
3623
3624 TIMESTRUC_DELTA(delta, ts, pset->zsp_total_time);
3625
3626 for (usage = list_head(&pset->zsp_usage_list); usage != NULL;
3627 usage = list_next(&pset->zsp_usage_list, usage)) {
3628
3629 zone = usage->zsu_zone;
3630 if (usage->zsu_cpu_shares != ZS_LIMIT_NONE &&
3631 usage->zsu_cpu_shares != ZS_SHARES_UNLIMITED &&
3632 usage->zsu_cpu_shares != 0) {
3633 /*
3634 * Figure out how many nanoseconds of share time
3635 * to give to the zone
3636 */
3637 hrtime = delta.tv_sec;
3638 hrtime *= NANOSEC;
3639 hrtime += delta.tv_nsec;
3640 hrtime *= usage->zsu_cpu_shares;
3641 hrtime /= pset->zsp_cpu_shares;
3642 TIMESTRUC_ADD_NANOSEC(zone->zsz_share_time,
3643 hrtime);
3644 }
3645 /* Add pset time to each zone using pset */
3646 TIMESTRUC_ADD_TIMESTRUC(zone->zsz_pset_time, delta);
3647
3648 zone->zsz_cpus_online += pset->zsp_online;
3649 }
3650 pset->zsp_total_time = ts;
3651 }
3652
3653 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
3654 zone = list_next(&ctl->zsctl_zones, zone)) {
3655
3656 /* update cpu cap tracking if the zone has a cpu cap */
3657 if (zone->zsz_cpu_cap != ZS_LIMIT_NONE) {
3658 uint64_t elapsed;
3659
3660 elapsed = ctl->zsctl_hrtime - ctl->zsctl_hrtime_prev;
3661 elapsed *= zone->zsz_cpu_cap;
3662 elapsed = elapsed / 100;
3663 TIMESTRUC_ADD_NANOSEC(zone->zsz_cap_time, elapsed);
3664 }
3665 }
3666 sys = ctl->zsctl_system;
3667 ts.tv_sec = 0;
3668 ts.tv_nsec = 0;
3669 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_intr);
3670 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_kern);
3671 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_user);
3672
3673 /* kernel time in pset is total time minus zone time */
3674 TIMESTRUC_DELTA(sys->zss_cpu_usage_kern, ts,
3675 sys->zss_cpu_usage_zones);
3676 if (sys->zss_cpu_usage_kern.tv_sec < 0 ||
3677 sys->zss_cpu_usage_kern.tv_nsec < 0) {
3678 sys->zss_cpu_usage_kern.tv_sec = 0;
3679 sys->zss_cpu_usage_kern.tv_nsec = 0;
3680 }
3681 /* Total pset elapsed time is used time plus idle time */
3682 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_idle);
3683 sys->zss_cpu_total_time = ts;
3684 }
3685
3686 /*
3687 * Saves current usage data to a cache that is read by libzonestat when
3688 * calling zs_usage_read().
3689 *
3690 * All pointers in the cached data structure are set to NULL. When
3691 * libzonestat reads the cached data, it will set the pointers relative to
3692 * its address space.
3693 */
3694 static void
zsd_usage_cache_update(zsd_ctl_t * ctl)3695 zsd_usage_cache_update(zsd_ctl_t *ctl)
3696 {
3697 zs_usage_cache_t *cache;
3698 zs_usage_cache_t *old;
3699 zs_usage_t *usage;
3700
3701 zs_system_t *sys;
3702 zsd_system_t *dsys;
3703 zs_zone_t *zone = NULL;
3704 zsd_zone_t *dzone;
3705 zs_pset_t *pset = NULL;
3706 zsd_pset_t *dpset;
3707 zs_pset_zone_t *pusage;
3708 zsd_pset_usage_t *dpusage;
3709
3710 char *next;
3711 uint_t size, i, j;
3712
3713 size =
3714 sizeof (zs_usage_cache_t) +
3715 sizeof (zs_usage_t) +
3716 sizeof (zs_system_t) +
3717 sizeof (zs_zone_t) * ctl->zsctl_nzones +
3718 sizeof (zs_pset_t) * ctl->zsctl_npsets +
3719 sizeof (zs_pset_zone_t) * ctl->zsctl_npset_usages;
3720
3721 cache = (zs_usage_cache_t *)malloc(size);
3722 if (cache == NULL) {
3723 zsd_warn(gettext("Unable to allocate usage cache\n"));
3724 return;
3725 }
3726
3727 next = (char *)cache;
3728 cache->zsuc_size = size - sizeof (zs_usage_cache_t);
3729 next += sizeof (zs_usage_cache_t);
3730
3731 /* LINTED */
3732 usage = cache->zsuc_usage = (zs_usage_t *)next;
3733 next += sizeof (zs_usage_t);
3734 usage->zsu_start = g_start;
3735 usage->zsu_hrstart = g_hrstart;
3736 usage->zsu_time = g_now;
3737 usage->zsu_hrtime = g_hrnow;
3738 usage->zsu_nzones = ctl->zsctl_nzones;
3739 usage->zsu_npsets = ctl->zsctl_npsets;
3740 usage->zsu_system = NULL;
3741
3742 /* LINTED */
3743 sys = (zs_system_t *)next;
3744 next += sizeof (zs_system_t);
3745 dsys = ctl->zsctl_system;
3746 sys->zss_ram_total = dsys->zss_ram_total;
3747 sys->zss_ram_kern = dsys->zss_ram_kern;
3748 sys->zss_ram_zones = dsys->zss_ram_zones;
3749 sys->zss_locked_kern = dsys->zss_locked_kern;
3750 sys->zss_locked_zones = dsys->zss_locked_zones;
3751 sys->zss_vm_total = dsys->zss_vm_total;
3752 sys->zss_vm_kern = dsys->zss_vm_kern;
3753 sys->zss_vm_zones = dsys->zss_vm_zones;
3754 sys->zss_swap_total = dsys->zss_swap_total;
3755 sys->zss_swap_used = dsys->zss_swap_used;
3756 sys->zss_ncpus = dsys->zss_ncpus;
3757 sys->zss_ncpus_online = dsys->zss_ncpus_online;
3758
3759 sys->zss_processes_max = dsys->zss_maxpid;
3760 sys->zss_lwps_max = dsys->zss_lwps_max;
3761 sys->zss_shm_max = dsys->zss_shm_max;
3762 sys->zss_shmids_max = dsys->zss_shmids_max;
3763 sys->zss_semids_max = dsys->zss_semids_max;
3764 sys->zss_msgids_max = dsys->zss_msgids_max;
3765 sys->zss_lofi_max = dsys->zss_lofi_max;
3766
3767 sys->zss_processes = dsys->zss_processes;
3768 sys->zss_lwps = dsys->zss_lwps;
3769 sys->zss_shm = dsys->zss_shm;
3770 sys->zss_shmids = dsys->zss_shmids;
3771 sys->zss_semids = dsys->zss_semids;
3772 sys->zss_msgids = dsys->zss_msgids;
3773 sys->zss_lofi = dsys->zss_lofi;
3774
3775 sys->zss_cpu_total_time = dsys->zss_cpu_total_time;
3776 sys->zss_cpu_usage_zones = dsys->zss_cpu_usage_zones;
3777 sys->zss_cpu_usage_kern = dsys->zss_cpu_usage_kern;
3778
3779 for (i = 0, dzone = list_head(&ctl->zsctl_zones);
3780 i < ctl->zsctl_nzones;
3781 i++, dzone = list_next(&ctl->zsctl_zones, dzone)) {
3782 /* LINTED */
3783 zone = (zs_zone_t *)next;
3784 next += sizeof (zs_zone_t);
3785 list_link_init(&zone->zsz_next);
3786 zone->zsz_system = NULL;
3787
3788 (void) strlcpy(zone->zsz_name, dzone->zsz_name,
3789 sizeof (zone->zsz_name));
3790 (void) strlcpy(zone->zsz_pool, dzone->zsz_pool,
3791 sizeof (zone->zsz_pool));
3792 (void) strlcpy(zone->zsz_pset, dzone->zsz_pset,
3793 sizeof (zone->zsz_pset));
3794 zone->zsz_id = dzone->zsz_id;
3795 zone->zsz_cputype = dzone->zsz_cputype;
3796 zone->zsz_iptype = dzone->zsz_iptype;
3797 zone->zsz_start = dzone->zsz_start;
3798 zone->zsz_hrstart = dzone->zsz_hrstart;
3799 zone->zsz_scheds = dzone->zsz_scheds;
3800 zone->zsz_cpu_shares = dzone->zsz_cpu_shares;
3801 zone->zsz_cpu_cap = dzone->zsz_cpu_cap;
3802 zone->zsz_ram_cap = dzone->zsz_ram_cap;
3803 zone->zsz_vm_cap = dzone->zsz_vm_cap;
3804 zone->zsz_locked_cap = dzone->zsz_locked_cap;
3805 zone->zsz_cpu_usage = dzone->zsz_cpu_usage;
3806 zone->zsz_cpus_online = dzone->zsz_cpus_online;
3807 zone->zsz_pset_time = dzone->zsz_pset_time;
3808 zone->zsz_cap_time = dzone->zsz_cap_time;
3809 zone->zsz_share_time = dzone->zsz_share_time;
3810 zone->zsz_usage_ram = dzone->zsz_usage_ram;
3811 zone->zsz_usage_locked = dzone->zsz_usage_locked;
3812 zone->zsz_usage_vm = dzone->zsz_usage_vm;
3813
3814 zone->zsz_processes_cap = dzone->zsz_processes_cap;
3815 zone->zsz_lwps_cap = dzone->zsz_lwps_cap;
3816 zone->zsz_shm_cap = dzone->zsz_shm_cap;
3817 zone->zsz_shmids_cap = dzone->zsz_shmids_cap;
3818 zone->zsz_semids_cap = dzone->zsz_semids_cap;
3819 zone->zsz_msgids_cap = dzone->zsz_msgids_cap;
3820 zone->zsz_lofi_cap = dzone->zsz_lofi_cap;
3821
3822 zone->zsz_processes = dzone->zsz_processes;
3823 zone->zsz_lwps = dzone->zsz_lwps;
3824 zone->zsz_shm = dzone->zsz_shm;
3825 zone->zsz_shmids = dzone->zsz_shmids;
3826 zone->zsz_semids = dzone->zsz_semids;
3827 zone->zsz_msgids = dzone->zsz_msgids;
3828 zone->zsz_lofi = dzone->zsz_lofi;
3829 }
3830
3831 for (i = 0, dpset = list_head(&ctl->zsctl_psets);
3832 i < ctl->zsctl_npsets;
3833 i++, dpset = list_next(&ctl->zsctl_psets, dpset)) {
3834 /* LINTED */
3835 pset = (zs_pset_t *)next;
3836 next += sizeof (zs_pset_t);
3837 list_link_init(&pset->zsp_next);
3838 (void) strlcpy(pset->zsp_name, dpset->zsp_name,
3839 sizeof (pset->zsp_name));
3840 pset->zsp_id = dpset->zsp_id;
3841 pset->zsp_cputype = dpset->zsp_cputype;
3842 pset->zsp_start = dpset->zsp_start;
3843 pset->zsp_hrstart = dpset->zsp_hrstart;
3844 pset->zsp_online = dpset->zsp_online;
3845 pset->zsp_size = dpset->zsp_size;
3846 pset->zsp_min = dpset->zsp_min;
3847 pset->zsp_max = dpset->zsp_max;
3848 pset->zsp_importance = dpset->zsp_importance;
3849 pset->zsp_scheds = dpset->zsp_scheds;
3850 pset->zsp_cpu_shares = dpset->zsp_cpu_shares;
3851 pset->zsp_total_time = dpset->zsp_total_time;
3852 pset->zsp_usage_kern = dpset->zsp_usage_kern;
3853 pset->zsp_usage_zones = dpset->zsp_usage_zones;
3854 pset->zsp_nusage = dpset->zsp_nusage;
3855 /* Add pset usages for pset */
3856 for (j = 0, dpusage = list_head(&dpset->zsp_usage_list);
3857 j < dpset->zsp_nusage;
3858 j++, dpusage = list_next(&dpset->zsp_usage_list, dpusage)) {
3859 /* LINTED */
3860 pusage = (zs_pset_zone_t *)next;
3861 next += sizeof (zs_pset_zone_t);
3862 /* pointers are computed by client */
3863 pusage->zspz_pset = NULL;
3864 pusage->zspz_zone = NULL;
3865 list_link_init(&pusage->zspz_next);
3866 pusage->zspz_zoneid = dpusage->zsu_zone->zsz_id;
3867 pusage->zspz_start = dpusage->zsu_start;
3868 pusage->zspz_hrstart = dpusage->zsu_hrstart;
3869 pusage->zspz_hrstart = dpusage->zsu_hrstart;
3870 pusage->zspz_cpu_shares = dpusage->zsu_cpu_shares;
3871 pusage->zspz_scheds = dpusage->zsu_scheds;
3872 pusage->zspz_cpu_usage = dpusage->zsu_cpu_usage;
3873 }
3874 }
3875
3876 /* Update the current cache pointer */
3877 (void) mutex_lock(&g_usage_cache_lock);
3878 old = g_usage_cache;
3879 cache->zsuc_ref = 1;
3880 cache->zsuc_gen = g_gen_next;
3881 usage->zsu_gen = g_gen_next;
3882 usage->zsu_size = size;
3883 g_usage_cache = cache;
3884 if (old != NULL) {
3885 old->zsuc_ref--;
3886 if (old->zsuc_ref == 0)
3887 free(old);
3888 }
3889 g_gen_next++;
3890 /* Wake up any clients that are waiting for this calculation */
3891 if (g_usage_cache_kickers > 0) {
3892 (void) cond_broadcast(&g_usage_cache_wait);
3893 }
3894 (void) mutex_unlock(&g_usage_cache_lock);
3895 }
3896
3897 static zs_usage_cache_t *
zsd_usage_cache_hold_locked()3898 zsd_usage_cache_hold_locked()
3899 {
3900 zs_usage_cache_t *ret;
3901
3902 ret = g_usage_cache;
3903 ret->zsuc_ref++;
3904 return (ret);
3905 }
3906
3907 void
zsd_usage_cache_rele(zs_usage_cache_t * cache)3908 zsd_usage_cache_rele(zs_usage_cache_t *cache)
3909 {
3910 (void) mutex_lock(&g_usage_cache_lock);
3911 cache->zsuc_ref--;
3912 if (cache->zsuc_ref == 0)
3913 free(cache);
3914 (void) mutex_unlock(&g_usage_cache_lock);
3915 }
3916
3917 /* Close the handles held by zsd_open() */
3918 void
zsd_close(zsd_ctl_t * ctl)3919 zsd_close(zsd_ctl_t *ctl)
3920 {
3921 zsd_zone_t *zone;
3922 zsd_pset_t *pset;
3923 zsd_pset_usage_t *usage;
3924 zsd_cpu_t *cpu;
3925 int id;
3926
3927 if (ctl->zsctl_kstat_ctl) {
3928 (void) kstat_close(ctl->zsctl_kstat_ctl);
3929 ctl->zsctl_kstat_ctl = NULL;
3930 }
3931 if (ctl->zsctl_proc_open) {
3932 (void) ea_close(&ctl->zsctl_proc_eaf);
3933 ctl->zsctl_proc_open = 0;
3934 ctl->zsctl_proc_fd = -1;
3935 }
3936 if (ctl->zsctl_pool_conf) {
3937 if (ctl->zsctl_pool_status == POOL_ENABLED)
3938 (void) pool_conf_close(ctl->zsctl_pool_conf);
3939 ctl->zsctl_pool_status = POOL_DISABLED;
3940 }
3941
3942 while ((zone = list_head(&ctl->zsctl_zones)) != NULL) {
3943 list_remove(&ctl->zsctl_zones, zone);
3944 free(zone);
3945 ctl->zsctl_nzones--;
3946 }
3947
3948 while ((pset = list_head(&ctl->zsctl_psets)) != NULL) {
3949 while ((usage = list_head(&pset->zsp_usage_list))
3950 != NULL) {
3951 list_remove(&pset->zsp_usage_list, usage);
3952 ctl->zsctl_npset_usages--;
3953 free(usage);
3954 }
3955 list_remove(&ctl->zsctl_psets, pset);
3956 free(pset);
3957 ctl->zsctl_npsets--;
3958 }
3959
3960 /* Release all cpus being tracked */
3961 while (cpu = list_head(&ctl->zsctl_cpus)) {
3962 list_remove(&ctl->zsctl_cpus, cpu);
3963 id = cpu->zsc_id;
3964 bzero(cpu, sizeof (zsd_cpu_t));
3965 cpu->zsc_id = id;
3966 cpu->zsc_allocated = B_FALSE;
3967 cpu->zsc_psetid = ZS_PSET_ERROR;
3968 cpu->zsc_psetid_prev = ZS_PSET_ERROR;
3969 }
3970
3971 assert(ctl->zsctl_npset_usages == 0);
3972 assert(ctl->zsctl_npsets == 0);
3973 assert(ctl->zsctl_nzones == 0);
3974 (void) zsd_disable_cpu_stats();
3975 }
3976
3977
3978 /*
3979 * Update the utilization data for all zones and processor sets.
3980 */
3981 static int
zsd_read(zsd_ctl_t * ctl,boolean_t init,boolean_t do_memory)3982 zsd_read(zsd_ctl_t *ctl, boolean_t init, boolean_t do_memory)
3983 {
3984 (void) kstat_chain_update(ctl->zsctl_kstat_ctl);
3985 (void) gettimeofday(&(ctl->zsctl_timeofday), NULL);
3986
3987 zsd_refresh_system(ctl);
3988
3989 /*
3990 * Memory calculation is expensive. Only update it on sample
3991 * intervals.
3992 */
3993 if (do_memory == B_TRUE)
3994 zsd_refresh_memory(ctl, init);
3995 zsd_refresh_zones(ctl);
3996 zsd_refresh_psets(ctl);
3997 zsd_refresh_procs(ctl, init);
3998 zsd_refresh_cpu_stats(ctl, init);
3999
4000 /*
4001 * Delete objects that no longer exist.
4002 * Pset usages must be deleted first as they point to zone and
4003 * pset objects.
4004 */
4005 zsd_mark_pset_usages_end(ctl);
4006 zsd_mark_psets_end(ctl);
4007 zsd_mark_cpus_end(ctl);
4008 zsd_mark_zones_end(ctl);
4009
4010 /*
4011 * Save results for clients.
4012 */
4013 zsd_usage_cache_update(ctl);
4014
4015 /*
4016 * Roll process accounting file.
4017 */
4018 (void) zsd_roll_exacct();
4019 return (0);
4020 }
4021
4022 /*
4023 * Get the system rctl, which is the upper most limit
4024 */
4025 static uint64_t
zsd_get_system_rctl(char * name)4026 zsd_get_system_rctl(char *name)
4027 {
4028 rctlblk_t *rblk, *rblk_last;
4029
4030 rblk = (rctlblk_t *)alloca(rctlblk_size());
4031 rblk_last = (rctlblk_t *)alloca(rctlblk_size());
4032
4033 if (getrctl(name, NULL, rblk_last, RCTL_FIRST) != 0)
4034 return (ZS_LIMIT_NONE);
4035
4036 while (getrctl(name, rblk_last, rblk, RCTL_NEXT) == 0)
4037 (void) bcopy(rblk, rblk_last, rctlblk_size());
4038
4039 return (rctlblk_get_value(rblk_last));
4040 }
4041
4042 /*
4043 * Open any necessary subsystems for collecting utilization data,
4044 * allocate and initialize data structures, and get initial utilization.
4045 *
4046 * Errors:
4047 * ENOMEM out of memory
4048 * EINVAL other error
4049 */
4050 static zsd_ctl_t *
zsd_open(zsd_ctl_t * ctl)4051 zsd_open(zsd_ctl_t *ctl)
4052 {
4053 zsd_system_t *system;
4054
4055 char path[MAXPATHLEN];
4056 long pathmax;
4057 struct statvfs svfs;
4058 int ret;
4059 int i;
4060 size_t size;
4061 int err;
4062
4063 if (ctl == NULL && (ctl = (zsd_ctl_t *)calloc(1,
4064 sizeof (zsd_ctl_t))) == NULL) {
4065 zsd_warn(gettext("Out of Memory"));
4066 errno = ENOMEM;
4067 goto err;
4068 }
4069 ctl->zsctl_proc_fd = -1;
4070
4071 /* open kstats */
4072 if (ctl->zsctl_kstat_ctl == NULL &&
4073 (ctl->zsctl_kstat_ctl = kstat_open()) == NULL) {
4074 err = errno;
4075 zsd_warn(gettext("Unable to open kstats"));
4076 errno = err;
4077 if (errno != ENOMEM)
4078 errno = EAGAIN;
4079 goto err;
4080 }
4081
4082 /*
4083 * These are set when the accounting file is opened by
4084 * zsd_update_procs()
4085 */
4086 ctl->zsctl_proc_fd = -1;
4087 ctl->zsctl_proc_fd_next = -1;
4088 ctl->zsctl_proc_open = 0;
4089 ctl->zsctl_proc_open_next = 0;
4090
4091 check_exacct:
4092 (void) zsd_enable_cpu_stats();
4093
4094 /* Create structures to track usage */
4095 if (ctl->zsctl_system == NULL && (ctl->zsctl_system = (zsd_system_t *)
4096 calloc(1, sizeof (zsd_system_t))) == NULL) {
4097 ret = -1;
4098 zsd_warn(gettext("Out of Memory"));
4099 errno = ENOMEM;
4100 goto err;
4101 }
4102 system = ctl->zsctl_system;
4103 /* get the kernel bitness to know structure layout for getvmusage */
4104 ret = sysinfo(SI_ARCHITECTURE_64, path, sizeof (path));
4105 if (ret < 0)
4106 ctl->zsctl_kern_bits = 32;
4107 else
4108 ctl->zsctl_kern_bits = 64;
4109 ctl->zsctl_pagesize = sysconf(_SC_PAGESIZE);
4110
4111 size = sysconf(_SC_CPUID_MAX);
4112 ctl->zsctl_maxcpuid = size;
4113 if (ctl->zsctl_cpu_array == NULL && (ctl->zsctl_cpu_array =
4114 (zsd_cpu_t *)calloc(size + 1, sizeof (zsd_cpu_t))) == NULL) {
4115 zsd_warn(gettext("Out of Memory"));
4116 errno = ENOMEM;
4117 goto err;
4118 }
4119 for (i = 0; i <= ctl->zsctl_maxcpuid; i++) {
4120 ctl->zsctl_cpu_array[i].zsc_id = i;
4121 ctl->zsctl_cpu_array[i].zsc_allocated = B_FALSE;
4122 ctl->zsctl_cpu_array[i].zsc_psetid = ZS_PSET_ERROR;
4123 ctl->zsctl_cpu_array[i].zsc_psetid_prev = ZS_PSET_ERROR;
4124 }
4125 if (statvfs("/proc", &svfs) != 0 ||
4126 strcmp("/proc", svfs.f_fstr) != 0) {
4127 zsd_warn(gettext("/proc not a procfs filesystem"));
4128 errno = EINVAL;
4129 goto err;
4130 }
4131
4132 size = sysconf(_SC_MAXPID) + 1;
4133 ctl->zsctl_maxproc = size;
4134 if (ctl->zsctl_proc_array == NULL &&
4135 (ctl->zsctl_proc_array = (zsd_proc_t *)calloc(size,
4136 sizeof (zsd_proc_t))) == NULL) {
4137 zsd_warn(gettext("Out of Memory"));
4138 errno = ENOMEM;
4139 goto err;
4140 }
4141 for (i = 0; i <= ctl->zsctl_maxproc; i++) {
4142 list_link_init(&(ctl->zsctl_proc_array[i].zspr_next));
4143 ctl->zsctl_proc_array[i].zspr_psetid = ZS_PSET_ERROR;
4144 ctl->zsctl_proc_array[i].zspr_zoneid = -1;
4145 ctl->zsctl_proc_array[i].zspr_usage.tv_sec = 0;
4146 ctl->zsctl_proc_array[i].zspr_usage.tv_nsec = 0;
4147 ctl->zsctl_proc_array[i].zspr_ppid = -1;
4148 }
4149
4150 list_create(&ctl->zsctl_zones, sizeof (zsd_zone_t),
4151 offsetof(zsd_zone_t, zsz_next));
4152
4153 list_create(&ctl->zsctl_psets, sizeof (zsd_pset_t),
4154 offsetof(zsd_pset_t, zsp_next));
4155
4156 list_create(&ctl->zsctl_cpus, sizeof (zsd_cpu_t),
4157 offsetof(zsd_cpu_t, zsc_next));
4158
4159 pathmax = pathconf("/proc", _PC_NAME_MAX);
4160 if (pathmax < 0) {
4161 zsd_warn(gettext("Unable to determine max path of /proc"));
4162 errno = EINVAL;
4163 goto err;
4164 }
4165 size = sizeof (struct dirent) + pathmax + 1;
4166
4167 ctl->zsctl_procfs_dent_size = size;
4168 if (ctl->zsctl_procfs_dent == NULL &&
4169 (ctl->zsctl_procfs_dent = (struct dirent *)calloc(1, size))
4170 == NULL) {
4171 zsd_warn(gettext("Out of Memory"));
4172 errno = ENOMEM;
4173 goto err;
4174 }
4175
4176 if (ctl->zsctl_pool_conf == NULL &&
4177 (ctl->zsctl_pool_conf = pool_conf_alloc()) == NULL) {
4178 zsd_warn(gettext("Out of Memory"));
4179 errno = ENOMEM;
4180 goto err;
4181 }
4182 ctl->zsctl_pool_status = POOL_DISABLED;
4183 ctl->zsctl_pool_changed = 0;
4184
4185 if (ctl->zsctl_pool_vals[0] == NULL &&
4186 (ctl->zsctl_pool_vals[0] = pool_value_alloc()) == NULL) {
4187 zsd_warn(gettext("Out of Memory"));
4188 errno = ENOMEM;
4189 goto err;
4190 }
4191 if (ctl->zsctl_pool_vals[1] == NULL &&
4192 (ctl->zsctl_pool_vals[1] = pool_value_alloc()) == NULL) {
4193 zsd_warn(gettext("Out of Memory"));
4194 errno = ENOMEM;
4195 goto err;
4196 }
4197 ctl->zsctl_pool_vals[2] = NULL;
4198
4199 /*
4200 * get system limits
4201 */
4202 system->zss_maxpid = size = sysconf(_SC_MAXPID);
4203 system->zss_processes_max = zsd_get_system_rctl("zone.max-processes");
4204 system->zss_lwps_max = zsd_get_system_rctl("zone.max-lwps");
4205 system->zss_shm_max = zsd_get_system_rctl("zone.max-shm-memory");
4206 system->zss_shmids_max = zsd_get_system_rctl("zone.max-shm-ids");
4207 system->zss_semids_max = zsd_get_system_rctl("zone.max-sem-ids");
4208 system->zss_msgids_max = zsd_get_system_rctl("zone.max-msg-ids");
4209 system->zss_lofi_max = zsd_get_system_rctl("zone.max-lofi");
4210
4211 g_gen_next = 1;
4212
4213 if (zsd_read(ctl, B_TRUE, B_FALSE) != 0)
4214 zsd_warn(gettext("Reading zone statistics failed"));
4215
4216 return (ctl);
4217 err:
4218 if (ctl)
4219 zsd_close(ctl);
4220
4221 return (NULL);
4222 }
4223
4224 /* Copy utilization data to buffer, filtering data if non-global zone. */
4225 static void
zsd_usage_filter(zoneid_t zid,zs_usage_cache_t * cache,zs_usage_t * usage,boolean_t is_gz)4226 zsd_usage_filter(zoneid_t zid, zs_usage_cache_t *cache, zs_usage_t *usage,
4227 boolean_t is_gz)
4228 {
4229 zs_usage_t *cusage;
4230 zs_system_t *sys, *csys;
4231 zs_zone_t *zone, *czone;
4232 zs_pset_t *pset, *cpset;
4233 zs_pset_zone_t *pz, *cpz, *foundpz;
4234 size_t size = 0, csize = 0;
4235 char *start, *cstart;
4236 int i, j;
4237 timestruc_t delta;
4238
4239 /* Privileged users in the global zone get everything */
4240 if (is_gz) {
4241 cusage = cache->zsuc_usage;
4242 (void) bcopy(cusage, usage, cusage->zsu_size);
4243 return;
4244 }
4245
4246 /* Zones just get their own usage */
4247 cusage = cache->zsuc_usage;
4248
4249 start = (char *)usage;
4250 cstart = (char *)cusage;
4251 size += sizeof (zs_usage_t);
4252 csize += sizeof (zs_usage_t);
4253
4254 usage->zsu_start = cusage->zsu_start;
4255 usage->zsu_hrstart = cusage->zsu_hrstart;
4256 usage->zsu_time = cusage->zsu_time;
4257 usage->zsu_hrtime = cusage->zsu_hrtime;
4258 usage->zsu_gen = cusage->zsu_gen;
4259 usage->zsu_nzones = 1;
4260 usage->zsu_npsets = 0;
4261
4262 /* LINTED */
4263 sys = (zs_system_t *)(start + size);
4264 /* LINTED */
4265 csys = (zs_system_t *)(cstart + csize);
4266 size += sizeof (zs_system_t);
4267 csize += sizeof (zs_system_t);
4268
4269 /* Save system limits but not usage */
4270 *sys = *csys;
4271 sys->zss_ncpus = 0;
4272 sys->zss_ncpus_online = 0;
4273
4274 /* LINTED */
4275 zone = (zs_zone_t *)(start + size);
4276 /* LINTED */
4277 czone = (zs_zone_t *)(cstart + csize);
4278 /* Find the matching zone */
4279 for (i = 0; i < cusage->zsu_nzones; i++) {
4280 if (czone->zsz_id == zid) {
4281 *zone = *czone;
4282 size += sizeof (zs_zone_t);
4283 }
4284 csize += sizeof (zs_zone_t);
4285 /* LINTED */
4286 czone = (zs_zone_t *)(cstart + csize);
4287 }
4288 sys->zss_ram_kern += (sys->zss_ram_zones - zone->zsz_usage_ram);
4289 sys->zss_ram_zones = zone->zsz_usage_ram;
4290
4291 sys->zss_vm_kern += (sys->zss_vm_zones - zone->zsz_usage_vm);
4292 sys->zss_vm_zones = zone->zsz_usage_vm;
4293
4294 sys->zss_locked_kern += (sys->zss_locked_zones -
4295 zone->zsz_usage_locked);
4296 sys->zss_locked_zones = zone->zsz_usage_locked;
4297
4298 TIMESTRUC_DELTA(delta, sys->zss_cpu_usage_zones, zone->zsz_cpu_usage);
4299 TIMESTRUC_ADD_TIMESTRUC(sys->zss_cpu_usage_kern, delta);
4300 sys->zss_cpu_usage_zones = zone->zsz_cpu_usage;
4301
4302 /* LINTED */
4303 pset = (zs_pset_t *)(start + size);
4304 /* LINTED */
4305 cpset = (zs_pset_t *)(cstart + csize);
4306 for (i = 0; i < cusage->zsu_npsets; i++) {
4307 csize += sizeof (zs_pset_t);
4308 /* LINTED */
4309 cpz = (zs_pset_zone_t *)(csize + cstart);
4310 foundpz = NULL;
4311 for (j = 0; j < cpset->zsp_nusage; j++) {
4312 if (cpz->zspz_zoneid == zid)
4313 foundpz = cpz;
4314
4315 csize += sizeof (zs_pset_zone_t);
4316 /* LINTED */
4317 cpz = (zs_pset_zone_t *)(csize + cstart);
4318 }
4319 if (foundpz != NULL) {
4320 size += sizeof (zs_pset_t);
4321 /* LINTED */
4322 pz = (zs_pset_zone_t *)(start + size);
4323 size += sizeof (zs_pset_zone_t);
4324
4325 *pset = *cpset;
4326 *pz = *foundpz;
4327
4328 TIMESTRUC_DELTA(delta, pset->zsp_usage_zones,
4329 pz->zspz_cpu_usage);
4330 TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_kern, delta);
4331 pset->zsp_usage_zones = pz->zspz_cpu_usage;
4332 pset->zsp_nusage = 1;
4333 usage->zsu_npsets++;
4334 sys->zss_ncpus += pset->zsp_size;
4335 sys->zss_ncpus_online += pset->zsp_online;
4336 }
4337 /* LINTED */
4338 cpset = (zs_pset_t *)(cstart + csize);
4339 }
4340 usage->zsu_size = size;
4341 }
4342
4343 /*
4344 * Respond to new connections from libzonestat.so. Also respond to zoneadmd,
4345 * which reports new zones.
4346 */
4347 /* ARGSUSED */
4348 static void
zsd_server(void * cookie,char * argp,size_t arg_size,door_desc_t * dp,uint_t n_desc)4349 zsd_server(void *cookie, char *argp, size_t arg_size,
4350 door_desc_t *dp, uint_t n_desc)
4351 {
4352 int *args, cmd;
4353 door_desc_t door;
4354 ucred_t *ucred;
4355 const priv_set_t *eset;
4356
4357 if (argp == DOOR_UNREF_DATA) {
4358 (void) door_return(NULL, 0, NULL, 0);
4359 thr_exit(NULL);
4360 }
4361
4362 if (arg_size != sizeof (cmd) * 2) {
4363 (void) door_return(NULL, 0, NULL, 0);
4364 thr_exit(NULL);
4365 }
4366
4367 /* LINTED */
4368 args = (int *)argp;
4369 cmd = args[0];
4370
4371 /* If connection, return door to stat server */
4372 if (cmd == ZSD_CMD_CONNECT) {
4373
4374 /* Verify client compilation version */
4375 if (args[1] != ZS_VERSION) {
4376 args[1] = ZSD_STATUS_VERSION_MISMATCH;
4377 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4378 thr_exit(NULL);
4379 }
4380 ucred = alloca(ucred_size());
4381 /* Verify client permission */
4382 if (door_ucred(&ucred) != 0) {
4383 args[1] = ZSD_STATUS_INTERNAL_ERROR;
4384 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4385 thr_exit(NULL);
4386 }
4387
4388 eset = ucred_getprivset(ucred, PRIV_EFFECTIVE);
4389 if (eset == NULL) {
4390 args[1] = ZSD_STATUS_INTERNAL_ERROR;
4391 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4392 thr_exit(NULL);
4393 }
4394 if (!priv_ismember(eset, PRIV_PROC_INFO)) {
4395 args[1] = ZSD_STATUS_PERMISSION;
4396 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4397 thr_exit(NULL);
4398 }
4399
4400 /* Return stat server door */
4401 args[1] = ZSD_STATUS_OK;
4402 door.d_attributes = DOOR_DESCRIPTOR;
4403 door.d_data.d_desc.d_descriptor = g_stat_door;
4404 (void) door_return(argp, sizeof (cmd) * 2, &door, 1);
4405 thr_exit(NULL);
4406 }
4407
4408 /* Respond to zoneadmd informing zonestatd of a new zone */
4409 if (cmd == ZSD_CMD_NEW_ZONE) {
4410 zsd_fattach_zone(args[1], g_server_door, B_FALSE);
4411 (void) door_return(NULL, 0, NULL, 0);
4412 thr_exit(NULL);
4413 }
4414
4415 args[1] = ZSD_STATUS_INTERNAL_ERROR;
4416 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4417 thr_exit(NULL);
4418 }
4419
4420 /*
4421 * Respond to libzonestat.so clients with the current utlilzation data.
4422 */
4423 /* ARGSUSED */
4424 static void
zsd_stat_server(void * cookie,char * argp,size_t arg_size,door_desc_t * dp,uint_t n_desc)4425 zsd_stat_server(void *cookie, char *argp, size_t arg_size,
4426 door_desc_t *dp, uint_t n_desc)
4427 {
4428 uint64_t *args, cmd;
4429 zs_usage_cache_t *cache;
4430 int ret;
4431 char *rvalp;
4432 size_t rvals;
4433 zs_usage_t *usage;
4434 ucred_t *ucred;
4435 zoneid_t zoneid;
4436 const priv_set_t *eset;
4437 boolean_t is_gz = B_FALSE;
4438
4439 /* Tell stat thread there are no more clients */
4440 if (argp == DOOR_UNREF_DATA) {
4441 (void) mutex_lock(&g_usage_cache_lock);
4442 g_hasclient = B_FALSE;
4443 (void) cond_signal(&g_usage_cache_kick);
4444 (void) mutex_unlock(&g_usage_cache_lock);
4445 (void) door_return(NULL, 0, NULL, 0);
4446 thr_exit(NULL);
4447 }
4448 if (arg_size != sizeof (cmd) * 2) {
4449 (void) door_return(NULL, 0, NULL, 0);
4450 thr_exit(NULL);
4451 }
4452 /* LINTED */
4453 args = (uint64_t *)argp;
4454 cmd = args[0];
4455 if (cmd != ZSD_CMD_READ) {
4456 (void) door_return(NULL, 0, NULL, 0);
4457 thr_exit(NULL);
4458 }
4459 ucred = alloca(ucred_size());
4460 if (door_ucred(&ucred) != 0) {
4461 (void) door_return(NULL, 0, NULL, 0);
4462 thr_exit(NULL);
4463 }
4464 zoneid = ucred_getzoneid(ucred);
4465
4466 if (zoneid == GLOBAL_ZONEID)
4467 is_gz = B_TRUE;
4468
4469 eset = ucred_getprivset(ucred, PRIV_EFFECTIVE);
4470 if (eset == NULL) {
4471 (void) door_return(NULL, 0, NULL, 0);
4472 thr_exit(NULL);
4473 }
4474 if (!priv_ismember(eset, PRIV_PROC_INFO)) {
4475 (void) door_return(NULL, 0, NULL, 0);
4476 thr_exit(NULL);
4477 }
4478 (void) mutex_lock(&g_usage_cache_lock);
4479 g_hasclient = B_TRUE;
4480
4481 /*
4482 * Force a new cpu calculation for client. This will force a
4483 * new memory calculation if the memory data is older than the
4484 * sample period.
4485 */
4486 g_usage_cache_kickers++;
4487 (void) cond_signal(&g_usage_cache_kick);
4488 ret = cond_wait(&g_usage_cache_wait, &g_usage_cache_lock);
4489 g_usage_cache_kickers--;
4490 if (ret != 0 && errno == EINTR) {
4491 (void) mutex_unlock(&g_usage_cache_lock);
4492 zsd_warn(gettext(
4493 "Interrupted before writing usage size to client\n"));
4494 (void) door_return(NULL, 0, NULL, 0);
4495 thr_exit(NULL);
4496 }
4497 cache = zsd_usage_cache_hold_locked();
4498 if (cache == NULL) {
4499 zsd_warn(gettext("Usage cache empty.\n"));
4500 (void) door_return(NULL, 0, NULL, 0);
4501 thr_exit(NULL);
4502 }
4503 (void) mutex_unlock(&g_usage_cache_lock);
4504
4505 /* Copy current usage data to stack to send to client */
4506 usage = (zs_usage_t *)alloca(cache->zsuc_size);
4507
4508 /* Filter out results if caller is non-global zone */
4509 zsd_usage_filter(zoneid, cache, usage, is_gz);
4510
4511 rvalp = (void *)usage;
4512 rvals = usage->zsu_size;
4513 zsd_usage_cache_rele(cache);
4514
4515 (void) door_return(rvalp, rvals, 0, NULL);
4516 thr_exit(NULL);
4517 }
4518
4519 static volatile boolean_t g_quit;
4520
4521 /* ARGSUSED */
4522 static void
zonestat_quithandler(int sig)4523 zonestat_quithandler(int sig)
4524 {
4525 g_quit = B_TRUE;
4526 }
4527
4528 /*
4529 * The stat thread generates new utilization data when clients request
4530 * it. It also manages opening and closing the subsystems used to gather
4531 * data depending on if clients exist.
4532 */
4533 /* ARGSUSED */
4534 void *
stat_thread(void * arg)4535 stat_thread(void *arg)
4536 {
4537 time_t start;
4538 time_t now;
4539 time_t next_memory;
4540 boolean_t do_memory;
4541 boolean_t do_read;
4542 boolean_t do_close;
4543
4544 start = time(NULL);
4545 if (start < 0) {
4546 if (g_quit == B_TRUE)
4547 goto quit;
4548 zsd_warn(gettext("Unable to fetch current time"));
4549 g_quit = B_TRUE;
4550 goto quit;
4551 }
4552
4553 next_memory = start;
4554 while (g_quit == B_FALSE) {
4555 for (;;) {
4556 /*
4557 * These are used to decide if the most recent memory
4558 * calculation was within a sample interval,
4559 * and weather or not the usage collection needs to
4560 * be opened or closed.
4561 */
4562 do_memory = B_FALSE;
4563 do_read = B_FALSE;
4564 do_close = B_FALSE;
4565
4566 /*
4567 * If all clients have gone, close usage collecting
4568 */
4569 (void) mutex_lock(&g_usage_cache_lock);
4570 if (!g_hasclient && g_open == B_TRUE) {
4571 do_close = B_TRUE;
4572 (void) mutex_unlock(&g_usage_cache_lock);
4573 break;
4574 }
4575 if (g_quit == B_TRUE) {
4576 (void) mutex_unlock(
4577 &g_usage_cache_lock);
4578 break;
4579 }
4580 /*
4581 * Wait for a usage data request
4582 */
4583 if (g_usage_cache_kickers == 0) {
4584 (void) cond_wait(&g_usage_cache_kick,
4585 &g_usage_cache_lock);
4586 }
4587 now = time(NULL);
4588 if (now < 0) {
4589 if (g_quit == B_TRUE) {
4590 (void) mutex_unlock(
4591 &g_usage_cache_lock);
4592 goto quit;
4593 }
4594 g_quit = B_TRUE;
4595 (void) mutex_unlock(&g_usage_cache_lock);
4596 zsd_warn(gettext(
4597 "Unable to fetch current time"));
4598 goto quit;
4599 }
4600 if (g_hasclient) {
4601 do_read = B_TRUE;
4602 if (now >= next_memory) {
4603 do_memory = B_TRUE;
4604 next_memory = now + g_interval;
4605 }
4606 } else {
4607 do_close = B_TRUE;
4608 }
4609 (void) mutex_unlock(&g_usage_cache_lock);
4610 if (do_read || do_close)
4611 break;
4612 }
4613 g_now = now;
4614 g_hrnow = gethrtime();
4615 if (g_hasclient && g_open == B_FALSE) {
4616 g_start = g_now;
4617 g_hrstart = g_hrnow;
4618 g_ctl = zsd_open(g_ctl);
4619 if (g_ctl == NULL)
4620 zsd_warn(gettext(
4621 "Unable to open zone statistics"));
4622 else
4623 g_open = B_TRUE;
4624 }
4625 if (do_read && g_ctl) {
4626 if (zsd_read(g_ctl, B_FALSE, do_memory) != 0) {
4627 zsd_warn(gettext(
4628 "Unable to read zone statistics"));
4629 g_quit = B_TRUE;
4630 return (NULL);
4631 }
4632 }
4633 (void) mutex_lock(&g_usage_cache_lock);
4634 if (!g_hasclient && g_open == B_TRUE && g_ctl) {
4635 (void) mutex_unlock(&g_usage_cache_lock);
4636 zsd_close(g_ctl);
4637 g_open = B_FALSE;
4638 } else {
4639 (void) mutex_unlock(&g_usage_cache_lock);
4640 }
4641 }
4642 quit:
4643 if (g_open)
4644 zsd_close(g_ctl);
4645
4646 (void) thr_kill(g_main, SIGINT);
4647 thr_exit(NULL);
4648 return (NULL);
4649 }
4650
4651 void
zsd_set_fx()4652 zsd_set_fx()
4653 {
4654 pcinfo_t pcinfo;
4655 pcparms_t pcparms;
4656
4657 (void) strlcpy(pcinfo.pc_clname, "FX", sizeof (pcinfo.pc_clname));
4658 if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) == -1) {
4659 zsd_warn(gettext("cannot get FX class parameters"));
4660 return;
4661 }
4662 pcparms.pc_cid = pcinfo.pc_cid;
4663 ((fxparms_t *)pcparms.pc_clparms)->fx_upri = 60;
4664 ((fxparms_t *)pcparms.pc_clparms)->fx_uprilim = 60;
4665 ((fxparms_t *)pcparms.pc_clparms)->fx_tqsecs = 0;
4666 ((fxparms_t *)pcparms.pc_clparms)->fx_tqnsecs = FX_NOCHANGE;
4667 if (priocntl(P_PID, getpid(), PC_SETPARMS, (caddr_t)&pcparms) == -1)
4668 zsd_warn(gettext("cannot enter the FX class"));
4669 }
4670
4671 static int pipe_fd;
4672
4673 static void
daemonize_ready(char status)4674 daemonize_ready(char status)
4675 {
4676 /*
4677 * wake the parent with a clue
4678 */
4679 (void) write(pipe_fd, &status, 1);
4680 (void) close(pipe_fd);
4681 }
4682
4683 static int
daemonize_start(void)4684 daemonize_start(void)
4685 {
4686 char data;
4687 int status;
4688
4689 int filedes[2];
4690 pid_t pid;
4691
4692 (void) close(0);
4693 (void) dup2(2, 1);
4694
4695 if (pipe(filedes) < 0)
4696 return (-1);
4697
4698 (void) fflush(NULL);
4699
4700 if ((pid = fork1()) < 0)
4701 return (-1);
4702
4703 if (pid != 0) {
4704 /*
4705 * parent
4706 */
4707 struct sigaction act;
4708
4709 act.sa_sigaction = SIG_DFL;
4710 (void) sigemptyset(&act.sa_mask);
4711 act.sa_flags = 0;
4712
4713 (void) sigaction(SIGPIPE, &act, NULL); /* ignore SIGPIPE */
4714
4715 (void) close(filedes[1]);
4716 if (read(filedes[0], &data, 1) == 1) {
4717 /* forward ready code via exit status */
4718 exit(data);
4719 }
4720 status = -1;
4721 (void) wait4(pid, &status, 0, NULL);
4722 /* daemon process exited before becoming ready */
4723 if (WIFEXITED(status)) {
4724 /* assume daemon process printed useful message */
4725 exit(WEXITSTATUS(status));
4726 } else {
4727 zsd_warn(gettext("daemon process killed or died"));
4728 exit(1);
4729 }
4730 }
4731
4732 /*
4733 * child
4734 */
4735 pipe_fd = filedes[1];
4736 (void) close(filedes[0]);
4737
4738 /*
4739 * generic Unix setup
4740 */
4741 (void) setsid();
4742 (void) umask(0000);
4743
4744 return (0);
4745 }
4746
4747 static void
fattach_all_zones(boolean_t detach_only)4748 fattach_all_zones(boolean_t detach_only)
4749 {
4750 zoneid_t *zids;
4751 uint_t nzids, nzids_last;
4752 int i;
4753
4754 again:
4755 (void) zone_list(NULL, &nzids);
4756 nzids_last = nzids;
4757 zids = (zoneid_t *)malloc(sizeof (zoneid_t) * nzids_last);
4758 if (zids == NULL)
4759 zsd_error(gettext("Out of memory"));
4760
4761 (void) zone_list(zids, &nzids);
4762 if (nzids > nzids_last) {
4763 free(zids);
4764 goto again;
4765 }
4766 for (i = 0; i < nzids; i++)
4767 zsd_fattach_zone(zids[i], g_server_door, detach_only);
4768
4769 free(zids);
4770 }
4771
4772 int
main(int argc,char * argv[])4773 main(int argc, char *argv[])
4774 {
4775
4776 int arg;
4777 thread_t tid;
4778 scf_simple_prop_t *prop;
4779 uint64_t *intervalp;
4780 boolean_t opt_cleanup = B_FALSE;
4781
4782 g_main = thr_self();
4783 g_quit = B_FALSE;
4784 (void) signal(SIGINT, zonestat_quithandler);
4785 (void) signal(SIGTERM, zonestat_quithandler);
4786 (void) signal(SIGHUP, zonestat_quithandler);
4787 /* (void) sigignore(SIGCHLD); */
4788 (void) sigignore(SIGPIPE);
4789
4790 if (getzoneid() != GLOBAL_ZONEID)
4791 zsd_error(gettext("Must be run from global zone only"));
4792
4793 while ((arg = getopt(argc, argv, "c"))
4794 != EOF) {
4795 switch (arg) {
4796 case 'c':
4797 opt_cleanup = B_TRUE;
4798 break;
4799 default:
4800 zsd_error(gettext("Invalid option"));
4801 }
4802 }
4803
4804 if (opt_cleanup) {
4805 if (zsd_disable_cpu_stats() != 0)
4806 exit(1);
4807 else
4808 exit(0);
4809 }
4810
4811 /* Get the configured sample interval */
4812 prop = scf_simple_prop_get(NULL, "svc:/system/zones-monitoring:default",
4813 "config", "sample_interval");
4814 if (prop == NULL)
4815 zsd_error(gettext("Unable to fetch SMF property "
4816 "\"config/sample_interval\""));
4817
4818 if (scf_simple_prop_type(prop) != SCF_TYPE_COUNT)
4819 zsd_error(gettext("Malformed SMF property "
4820 "\"config/sample_interval\". Must be of type \"count\""));
4821
4822 intervalp = scf_simple_prop_next_count(prop);
4823 g_interval = *intervalp;
4824 if (g_interval == 0)
4825 zsd_error(gettext("Malformed SMF property "
4826 "\"config/sample_interval\". Must be greater than zero"));
4827
4828 scf_simple_prop_free(prop);
4829
4830 if (daemonize_start() < 0)
4831 zsd_error(gettext("Unable to start daemon\n"));
4832
4833 /* Run at high priority */
4834 zsd_set_fx();
4835
4836 (void) mutex_init(&g_usage_cache_lock, USYNC_THREAD, NULL);
4837 (void) cond_init(&g_usage_cache_kick, USYNC_THREAD, NULL);
4838 (void) cond_init(&g_usage_cache_wait, USYNC_THREAD, NULL);
4839
4840 g_server_door = door_create(zsd_server, NULL,
4841 DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
4842 if (g_server_door < 0)
4843 zsd_error(gettext("Unable to create server door\n"));
4844
4845
4846 g_stat_door = door_create(zsd_stat_server, NULL, DOOR_UNREF_MULTI |
4847 DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
4848 if (g_stat_door < 0)
4849 zsd_error(gettext("Unable to create statistics door\n"));
4850
4851 fattach_all_zones(B_FALSE);
4852
4853 if (thr_create(NULL, 0, stat_thread, NULL, 0, &tid) != 0)
4854 zsd_error(gettext("Unable to create statistics thread\n"));
4855
4856 daemonize_ready(0);
4857
4858 /* Wait for signal to quit */
4859 while (g_quit == B_FALSE)
4860 (void) pause();
4861
4862 /* detach doors */
4863 fattach_all_zones(B_TRUE);
4864
4865 (void) door_revoke(g_server_door);
4866 (void) door_revoke(g_stat_door);
4867
4868 /* kick stat thread and wait for it to close the statistics */
4869 (void) mutex_lock(&g_usage_cache_lock);
4870 g_quit = B_TRUE;
4871 (void) cond_signal(&g_usage_cache_kick);
4872 (void) mutex_unlock(&g_usage_cache_lock);
4873 end:
4874 (void) thr_join(tid, NULL, NULL);
4875 return (0);
4876 }
4877