1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 #include <alloca.h> 26 #include <assert.h> 27 #include <dirent.h> 28 #include <dlfcn.h> 29 #include <door.h> 30 #include <errno.h> 31 #include <exacct.h> 32 #include <ctype.h> 33 #include <fcntl.h> 34 #include <kstat.h> 35 #include <libcontract.h> 36 #include <libintl.h> 37 #include <libscf.h> 38 #include <zonestat.h> 39 #include <zonestat_impl.h> 40 #include <limits.h> 41 #include <pool.h> 42 #include <procfs.h> 43 #include <rctl.h> 44 #include <thread.h> 45 #include <signal.h> 46 #include <stdarg.h> 47 #include <stddef.h> 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <strings.h> 51 #include <synch.h> 52 #include <sys/acctctl.h> 53 #include <sys/contract/process.h> 54 #include <sys/ctfs.h> 55 #include <sys/fork.h> 56 #include <sys/param.h> 57 #include <sys/priocntl.h> 58 #include <sys/fxpriocntl.h> 59 #include <sys/processor.h> 60 #include <sys/pset.h> 61 #include <sys/socket.h> 62 #include <sys/stat.h> 63 #include <sys/statvfs.h> 64 #include <sys/swap.h> 65 #include <sys/systeminfo.h> 66 #include <thread.h> 67 #include <sys/list.h> 68 #include <sys/time.h> 69 #include <sys/types.h> 70 #include <sys/vm_usage.h> 71 #include <sys/wait.h> 72 #include <sys/zone.h> 73 #include <time.h> 74 #include <ucred.h> 75 #include <unistd.h> 76 #include <vm/anon.h> 77 #include <zone.h> 78 #include <zonestat.h> 79 80 #define MAX_PSET_NAME 1024 /* Taken from PV_NAME_MAX_LEN */ 81 #define ZSD_PSET_UNLIMITED UINT16_MAX 82 #define ZONESTAT_EXACCT_FILE "/var/adm/exacct/zonestat-process" 83 84 /* 85 * zonestatd implements gathering cpu and memory utilization data for 86 * running zones. It has these components: 87 * 88 * zsd_server: 89 * Door server to respond to client connections. Each client 90 * will connect using libzonestat.so, which will open and 91 * call /var/tmp/.zonestat_door. Each connecting client is given 92 * a file descriptor to the stat server. 93 * 94 * The zsd_server also responds to zoneadmd, which reports when a 95 * new zone is booted. This is used to fattach the zsd_server door 96 * into the new zone. 97 * 98 * zsd_stat_server: 99 * Receives client requests for the current utilization data. Each 100 * client request will cause zonestatd to update the current utilization 101 * data by kicking the stat_thread. 102 * 103 * If the client is in a non-global zone, the utilization data will 104 * be filtered to only show the given zone. The usage by all other zones 105 * will be added to the system utilization. 106 * 107 * stat_thread: 108 * The stat thread implements querying the system to determine the 109 * current utilization data for each running zone. This includes 110 * inspecting the system's processor set configuration, as well as details 111 * of each zone, such as their configured limits, and which processor 112 * sets they are running in. 113 * 114 * The stat_thread will only update memory utilization data as often as 115 * the configured config/sample_interval on the zones-monitoring service. 116 */ 117 118 /* 119 * The private vmusage structure unfortunately uses size_t types, and assumes 120 * the caller's bitness matches the kernel's bitness. Since the getvmusage() 121 * system call is contracted, and zonestatd is 32 bit, the following structures 122 * are used to interact with a 32bit or 64 bit kernel. 123 */ 124 typedef struct zsd_vmusage32 { 125 id_t vmu_zoneid; 126 uint_t vmu_type; 127 id_t vmu_id; 128 129 uint32_t vmu_rss_all; 130 uint32_t vmu_rss_private; 131 uint32_t vmu_rss_shared; 132 uint32_t vmu_swap_all; 133 uint32_t vmu_swap_private; 134 uint32_t vmu_swap_shared; 135 } zsd_vmusage32_t; 136 137 typedef struct zsd_vmusage64 { 138 id_t vmu_zoneid; 139 uint_t vmu_type; 140 id_t vmu_id; 141 /* 142 * An amd64 kernel will align the following uint64_t members, but a 143 * 32bit i386 process will not without help. 144 */ 145 int vmu_align_next_members_on_8_bytes; 146 uint64_t vmu_rss_all; 147 uint64_t vmu_rss_private; 148 uint64_t vmu_rss_shared; 149 uint64_t vmu_swap_all; 150 uint64_t vmu_swap_private; 151 uint64_t vmu_swap_shared; 152 } zsd_vmusage64_t; 153 154 struct zsd_zone; 155 156 /* Used to store a zone's usage of a pset */ 157 typedef struct zsd_pset_usage { 158 struct zsd_zone *zsu_zone; 159 struct zsd_pset *zsu_pset; 160 161 list_node_t zsu_next; 162 163 zoneid_t zsu_zoneid; 164 boolean_t zsu_found; /* zone bound at end of interval */ 165 boolean_t zsu_active; /* zone was bound during interval */ 166 boolean_t zsu_new; /* zone newly bound in this interval */ 167 boolean_t zsu_deleted; /* zone was unbound in this interval */ 168 boolean_t zsu_empty; /* no procs in pset in this interval */ 169 time_t zsu_start; /* time when zone was found in pset */ 170 hrtime_t zsu_hrstart; /* time when zone was found in pset */ 171 uint64_t zsu_cpu_shares; 172 uint_t zsu_scheds; /* schedulers found in this pass */ 173 timestruc_t zsu_cpu_usage; /* cpu time used */ 174 } zsd_pset_usage_t; 175 176 /* Used to store a pset's utilization */ 177 typedef struct zsd_pset { 178 psetid_t zsp_id; 179 list_node_t zsp_next; 180 char zsp_name[ZS_PSETNAME_MAX]; 181 182 uint_t zsp_cputype; /* default, dedicated or shared */ 183 boolean_t zsp_found; /* pset found at end of interval */ 184 boolean_t zsp_new; /* pset new in this interval */ 185 boolean_t zsp_deleted; /* pset deleted in this interval */ 186 boolean_t zsp_active; /* pset existed during interval */ 187 boolean_t zsp_empty; /* no processes in pset */ 188 time_t zsp_start; 189 hrtime_t zsp_hrstart; 190 191 uint64_t zsp_online; /* online cpus in interval */ 192 uint64_t zsp_size; /* size in this interval */ 193 uint64_t zsp_min; /* configured min in this interval */ 194 uint64_t zsp_max; /* configured max in this interval */ 195 int64_t zsp_importance; /* configured max in this interval */ 196 197 uint_t zsp_scheds; /* scheds of processes found in pset */ 198 uint64_t zsp_cpu_shares; /* total shares in this interval */ 199 200 timestruc_t zsp_total_time; 201 timestruc_t zsp_usage_kern; 202 timestruc_t zsp_usage_zones; 203 204 /* Individual zone usages of pset */ 205 list_t zsp_usage_list; 206 int zsp_nusage; 207 208 /* Summed kstat values from individual cpus in pset */ 209 timestruc_t zsp_idle; 210 timestruc_t zsp_intr; 211 timestruc_t zsp_kern; 212 timestruc_t zsp_user; 213 214 } zsd_pset_t; 215 216 /* Used to track an individual cpu's utilization as reported by kstats */ 217 typedef struct zsd_cpu { 218 processorid_t zsc_id; 219 list_node_t zsc_next; 220 psetid_t zsc_psetid; 221 psetid_t zsc_psetid_prev; 222 zsd_pset_t *zsc_pset; 223 224 boolean_t zsc_found; /* cpu online in this interval */ 225 boolean_t zsc_onlined; /* cpu onlined during this interval */ 226 boolean_t zsc_offlined; /* cpu offlined during this interval */ 227 boolean_t zsc_active; /* cpu online during this interval */ 228 boolean_t zsc_allocated; /* True if cpu has ever been found */ 229 230 /* kstats this interval */ 231 uint64_t zsc_nsec_idle; 232 uint64_t zsc_nsec_intr; 233 uint64_t zsc_nsec_kern; 234 uint64_t zsc_nsec_user; 235 236 /* kstats in most recent interval */ 237 uint64_t zsc_nsec_idle_prev; 238 uint64_t zsc_nsec_intr_prev; 239 uint64_t zsc_nsec_kern_prev; 240 uint64_t zsc_nsec_user_prev; 241 242 /* Total kstat increases since zonestatd started reading kstats */ 243 timestruc_t zsc_idle; 244 timestruc_t zsc_intr; 245 timestruc_t zsc_kern; 246 timestruc_t zsc_user; 247 248 } zsd_cpu_t; 249 250 /* Used to describe an individual zone and its utilization */ 251 typedef struct zsd_zone { 252 zoneid_t zsz_id; 253 list_node_t zsz_next; 254 char zsz_name[ZS_ZONENAME_MAX]; 255 uint_t zsz_cputype; 256 uint_t zsz_iptype; 257 time_t zsz_start; 258 hrtime_t zsz_hrstart; 259 260 char zsz_pool[ZS_POOLNAME_MAX]; 261 char zsz_pset[ZS_PSETNAME_MAX]; 262 int zsz_default_sched; 263 /* These are deduced by inspecting processes */ 264 psetid_t zsz_psetid; 265 uint_t zsz_scheds; 266 267 boolean_t zsz_new; /* zone booted during this interval */ 268 boolean_t zsz_deleted; /* halted during this interval */ 269 boolean_t zsz_active; /* running in this interval */ 270 boolean_t zsz_empty; /* no processes in this interval */ 271 boolean_t zsz_gone; /* not installed in this interval */ 272 boolean_t zsz_found; /* Running at end of this interval */ 273 274 uint64_t zsz_cpu_shares; 275 uint64_t zsz_cpu_cap; 276 uint64_t zsz_ram_cap; 277 uint64_t zsz_locked_cap; 278 uint64_t zsz_vm_cap; 279 280 uint64_t zsz_cpus_online; 281 timestruc_t zsz_cpu_usage; /* cpu time of cpu cap */ 282 timestruc_t zsz_cap_time; /* cpu time of cpu cap */ 283 timestruc_t zsz_share_time; /* cpu time of share of cpu */ 284 timestruc_t zsz_pset_time; /* time of all psets zone is bound to */ 285 286 uint64_t zsz_usage_ram; 287 uint64_t zsz_usage_locked; 288 uint64_t zsz_usage_vm; 289 290 uint64_t zsz_processes_cap; 291 uint64_t zsz_lwps_cap; 292 uint64_t zsz_shm_cap; 293 uint64_t zsz_shmids_cap; 294 uint64_t zsz_semids_cap; 295 uint64_t zsz_msgids_cap; 296 uint64_t zsz_lofi_cap; 297 298 uint64_t zsz_processes; 299 uint64_t zsz_lwps; 300 uint64_t zsz_shm; 301 uint64_t zsz_shmids; 302 uint64_t zsz_semids; 303 uint64_t zsz_msgids; 304 uint64_t zsz_lofi; 305 306 } zsd_zone_t; 307 308 /* 309 * Used to track the cpu usage of an individual processes. 310 * 311 * zonestatd sweeps /proc each interval and charges the cpu usage of processes. 312 * to their zone. As processes exit, their extended accounting records are 313 * read and the difference of their total and known usage is charged to their 314 * zone. 315 * 316 * If a process is never seen in /proc, the total usage on its extended 317 * accounting record will be charged to its zone. 318 */ 319 typedef struct zsd_proc { 320 list_node_t zspr_next; 321 pid_t zspr_ppid; 322 psetid_t zspr_psetid; 323 zoneid_t zspr_zoneid; 324 int zspr_sched; 325 timestruc_t zspr_usage; 326 } zsd_proc_t; 327 328 /* Used to track the overall resource usage of the system */ 329 typedef struct zsd_system { 330 331 uint64_t zss_ram_total; 332 uint64_t zss_ram_kern; 333 uint64_t zss_ram_zones; 334 335 uint64_t zss_locked_kern; 336 uint64_t zss_locked_zones; 337 338 uint64_t zss_vm_total; 339 uint64_t zss_vm_kern; 340 uint64_t zss_vm_zones; 341 342 uint64_t zss_swap_total; 343 uint64_t zss_swap_used; 344 345 timestruc_t zss_idle; 346 timestruc_t zss_intr; 347 timestruc_t zss_kern; 348 timestruc_t zss_user; 349 350 timestruc_t zss_cpu_total_time; 351 timestruc_t zss_cpu_usage_kern; 352 timestruc_t zss_cpu_usage_zones; 353 354 uint64_t zss_maxpid; 355 uint64_t zss_processes_max; 356 uint64_t zss_lwps_max; 357 uint64_t zss_shm_max; 358 uint64_t zss_shmids_max; 359 uint64_t zss_semids_max; 360 uint64_t zss_msgids_max; 361 uint64_t zss_lofi_max; 362 363 uint64_t zss_processes; 364 uint64_t zss_lwps; 365 uint64_t zss_shm; 366 uint64_t zss_shmids; 367 uint64_t zss_semids; 368 uint64_t zss_msgids; 369 uint64_t zss_lofi; 370 371 uint64_t zss_ncpus; 372 uint64_t zss_ncpus_online; 373 374 } zsd_system_t; 375 376 /* 377 * A dumping ground for various information and structures used to compute 378 * utilization. 379 * 380 * This structure is used to track the system while clients are connected. 381 * When The first client connects, a zsd_ctl is allocated and configured by 382 * zsd_open(). When all clients disconnect, the zsd_ctl is closed. 383 */ 384 typedef struct zsd_ctl { 385 kstat_ctl_t *zsctl_kstat_ctl; 386 387 /* To track extended accounting */ 388 int zsctl_proc_fd; /* Log currently being used */ 389 ea_file_t zsctl_proc_eaf; 390 struct stat64 zsctl_proc_stat; 391 int zsctl_proc_open; 392 int zsctl_proc_fd_next; /* Log file to use next */ 393 ea_file_t zsctl_proc_eaf_next; 394 struct stat64 zsctl_proc_stat_next; 395 int zsctl_proc_open_next; 396 397 /* pool configuration handle */ 398 pool_conf_t *zsctl_pool_conf; 399 int zsctl_pool_status; 400 int zsctl_pool_changed; 401 402 /* The above usage tacking structures */ 403 zsd_system_t *zsctl_system; 404 list_t zsctl_zones; 405 list_t zsctl_psets; 406 list_t zsctl_cpus; 407 zsd_cpu_t *zsctl_cpu_array; 408 zsd_proc_t *zsctl_proc_array; 409 410 /* Various system info */ 411 uint64_t zsctl_maxcpuid; 412 uint64_t zsctl_maxproc; 413 uint64_t zsctl_kern_bits; 414 uint64_t zsctl_pagesize; 415 416 /* Used to track time available under a cpu cap. */ 417 uint64_t zsctl_hrtime; 418 uint64_t zsctl_hrtime_prev; 419 timestruc_t zsctl_hrtime_total; 420 421 struct timeval zsctl_timeofday; 422 423 /* Caches for arrays allocated for use by various system calls */ 424 psetid_t *zsctl_pset_cache; 425 uint_t zsctl_pset_ncache; 426 processorid_t *zsctl_cpu_cache; 427 uint_t zsctl_cpu_ncache; 428 zoneid_t *zsctl_zone_cache; 429 uint_t zsctl_zone_ncache; 430 struct swaptable *zsctl_swap_cache; 431 uint64_t zsctl_swap_cache_size; 432 uint64_t zsctl_swap_cache_num; 433 zsd_vmusage64_t *zsctl_vmusage_cache; 434 uint64_t zsctl_vmusage_cache_num; 435 436 /* Info about procfs for scanning /proc */ 437 struct dirent *zsctl_procfs_dent; 438 long zsctl_procfs_dent_size; 439 pool_value_t *zsctl_pool_vals[3]; 440 441 /* Counts on tracked entities */ 442 uint_t zsctl_nzones; 443 uint_t zsctl_npsets; 444 uint_t zsctl_npset_usages; 445 } zsd_ctl_t; 446 447 zsd_ctl_t *g_ctl; 448 boolean_t g_open; /* True if g_ctl is open */ 449 int g_hasclient; /* True if any clients are connected */ 450 451 /* 452 * The usage cache is updated by the stat_thread, and copied to clients by 453 * the zsd_stat_server. Mutex and cond are to synchronize between the 454 * stat_thread and the stat_server. 455 */ 456 zs_usage_cache_t *g_usage_cache; 457 mutex_t g_usage_cache_lock; 458 cond_t g_usage_cache_kick; 459 uint_t g_usage_cache_kickers; 460 cond_t g_usage_cache_wait; 461 char *g_usage_cache_buf; 462 uint_t g_usage_cache_bufsz; 463 uint64_t g_gen_next; 464 465 /* fds of door servers */ 466 int g_server_door; 467 int g_stat_door; 468 469 /* 470 * Starting and current time. Used to throttle memory calculation, and to 471 * mark new zones and psets with their boot and creation time. 472 */ 473 time_t g_now; 474 time_t g_start; 475 hrtime_t g_hrnow; 476 hrtime_t g_hrstart; 477 uint64_t g_interval; 478 479 /* 480 * main() thread. 481 */ 482 thread_t g_main; 483 484 /* PRINTFLIKE1 */ 485 static void 486 zsd_warn(const char *fmt, ...) 487 { 488 va_list alist; 489 490 va_start(alist, fmt); 491 492 (void) fprintf(stderr, gettext("zonestat: Warning: ")); 493 (void) vfprintf(stderr, fmt, alist); 494 (void) fprintf(stderr, "\n"); 495 va_end(alist); 496 } 497 498 /* PRINTFLIKE1 */ 499 static void 500 zsd_error(const char *fmt, ...) 501 { 502 va_list alist; 503 504 va_start(alist, fmt); 505 506 (void) fprintf(stderr, gettext("zonestat: Error: ")); 507 (void) vfprintf(stderr, fmt, alist); 508 (void) fprintf(stderr, "\n"); 509 va_end(alist); 510 exit(1); 511 } 512 513 /* Turns on extended accounting if not configured externally */ 514 int 515 zsd_enable_cpu_stats() 516 { 517 char *path = ZONESTAT_EXACCT_FILE; 518 char oldfile[MAXPATHLEN]; 519 int ret, state = AC_ON; 520 ac_res_t res[6]; 521 522 /* 523 * Start a new accounting file if accounting not configured 524 * externally. 525 */ 526 527 res[0].ar_id = AC_PROC_PID; 528 res[0].ar_state = AC_ON; 529 res[1].ar_id = AC_PROC_ANCPID; 530 res[1].ar_state = AC_ON; 531 res[2].ar_id = AC_PROC_CPU; 532 res[2].ar_state = AC_ON; 533 res[3].ar_id = AC_PROC_TIME; 534 res[3].ar_state = AC_ON; 535 res[4].ar_id = AC_PROC_ZONENAME; 536 res[4].ar_state = AC_ON; 537 res[5].ar_id = AC_NONE; 538 res[5].ar_state = AC_ON; 539 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) { 540 zsd_warn(gettext("Unable to set accounting resources")); 541 return (-1); 542 } 543 /* Only set accounting file if none is configured */ 544 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile)); 545 if (ret < 0) { 546 547 (void) unlink(path); 548 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1) 549 == -1) { 550 zsd_warn(gettext("Unable to set accounting file")); 551 return (-1); 552 } 553 } 554 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) { 555 zsd_warn(gettext("Unable to enable accounting")); 556 return (-1); 557 } 558 return (0); 559 } 560 561 /* Turns off extended accounting if not configured externally */ 562 int 563 zsd_disable_cpu_stats() 564 { 565 char *path = ZONESTAT_EXACCT_FILE; 566 int ret, state = AC_OFF; 567 ac_res_t res[6]; 568 char oldfile[MAXPATHLEN]; 569 570 /* If accounting file is externally configured, leave it alone */ 571 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile)); 572 if (ret == 0 && strcmp(oldfile, path) != 0) 573 return (0); 574 575 res[0].ar_id = AC_PROC_PID; 576 res[0].ar_state = AC_OFF; 577 res[1].ar_id = AC_PROC_ANCPID; 578 res[1].ar_state = AC_OFF; 579 res[2].ar_id = AC_PROC_CPU; 580 res[2].ar_state = AC_OFF; 581 res[3].ar_id = AC_PROC_TIME; 582 res[3].ar_state = AC_OFF; 583 res[4].ar_id = AC_PROC_ZONENAME; 584 res[4].ar_state = AC_OFF; 585 res[5].ar_id = AC_NONE; 586 res[5].ar_state = AC_OFF; 587 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) { 588 zsd_warn(gettext("Unable to clear accounting resources")); 589 return (-1); 590 } 591 if (acctctl(AC_PROC | AC_FILE_SET, NULL, 0) == -1) { 592 zsd_warn(gettext("Unable to clear accounting file")); 593 return (-1); 594 } 595 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) { 596 zsd_warn(gettext("Unable to diable accounting")); 597 return (-1); 598 } 599 600 (void) unlink(path); 601 return (0); 602 } 603 604 /* 605 * If not configured externally, deletes the current extended accounting file 606 * and starts a new one. 607 * 608 * Since the stat_thread holds an open handle to the accounting file, it will 609 * read all remaining entries from the old file before switching to 610 * read the new one. 611 */ 612 int 613 zsd_roll_exacct(void) 614 { 615 int ret; 616 char *path = ZONESTAT_EXACCT_FILE; 617 char oldfile[MAXPATHLEN]; 618 619 /* If accounting file is externally configured, leave it alone */ 620 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile)); 621 if (ret == 0 && strcmp(oldfile, path) != 0) 622 return (0); 623 624 if (unlink(path) != 0) 625 /* Roll it next time */ 626 return (0); 627 628 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1) == -1) { 629 zsd_warn(gettext("Unable to set accounting file")); 630 return (-1); 631 } 632 return (0); 633 } 634 635 /* Contract stuff for zone_enter() */ 636 int 637 init_template(void) 638 { 639 int fd; 640 int err = 0; 641 642 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 643 if (fd == -1) 644 return (-1); 645 646 /* 647 * For now, zoneadmd doesn't do anything with the contract. 648 * Deliver no events, don't inherit, and allow it to be orphaned. 649 */ 650 err |= ct_tmpl_set_critical(fd, 0); 651 err |= ct_tmpl_set_informative(fd, 0); 652 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 653 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 654 if (err || ct_tmpl_activate(fd)) { 655 (void) close(fd); 656 return (-1); 657 } 658 659 return (fd); 660 } 661 662 /* 663 * Contract stuff for zone_enter() 664 */ 665 int 666 contract_latest(ctid_t *id) 667 { 668 int cfd, r; 669 ct_stathdl_t st; 670 ctid_t result; 671 672 if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1) 673 return (errno); 674 675 if ((r = ct_status_read(cfd, CTD_COMMON, &st)) != 0) { 676 (void) close(cfd); 677 return (r); 678 } 679 680 result = ct_status_get_id(st); 681 ct_status_free(st); 682 (void) close(cfd); 683 684 *id = result; 685 return (0); 686 } 687 688 static int 689 close_on_exec(int fd) 690 { 691 int flags = fcntl(fd, F_GETFD, 0); 692 if ((flags != -1) && (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != -1)) 693 return (0); 694 return (-1); 695 } 696 697 int 698 contract_open(ctid_t ctid, const char *type, const char *file, int oflag) 699 { 700 char path[PATH_MAX]; 701 int n, fd; 702 703 if (type == NULL) 704 type = "all"; 705 706 n = snprintf(path, PATH_MAX, CTFS_ROOT "/%s/%ld/%s", type, ctid, file); 707 if (n >= sizeof (path)) { 708 errno = ENAMETOOLONG; 709 return (-1); 710 } 711 712 fd = open64(path, oflag); 713 if (fd != -1) { 714 if (close_on_exec(fd) == -1) { 715 int err = errno; 716 (void) close(fd); 717 errno = err; 718 return (-1); 719 } 720 } 721 return (fd); 722 } 723 724 int 725 contract_abandon_id(ctid_t ctid) 726 { 727 int fd, err; 728 729 fd = contract_open(ctid, "all", "ctl", O_WRONLY); 730 if (fd == -1) 731 return (errno); 732 733 err = ct_ctl_abandon(fd); 734 (void) close(fd); 735 736 return (err); 737 } 738 /* 739 * Attach the zsd_server to a zone. Called for each zone when zonestatd 740 * starts, and for each newly booted zone when zoneadmd contacts the zsd_server 741 * 742 * Zone_enter is used to avoid reaching into zone to fattach door. 743 */ 744 static void 745 zsd_fattach_zone(zoneid_t zid, int door, boolean_t detach_only) 746 { 747 char *path = ZS_DOOR_PATH; 748 int fd, pid, stat, tmpl_fd; 749 ctid_t ct; 750 751 if ((tmpl_fd = init_template()) == -1) { 752 zsd_warn("Unable to init template"); 753 return; 754 } 755 756 pid = forkx(0); 757 if (pid < 0) { 758 (void) ct_tmpl_clear(tmpl_fd); 759 zsd_warn(gettext( 760 "Unable to fork to add zonestat to zoneid %d\n"), zid); 761 return; 762 } 763 764 if (pid == 0) { 765 (void) ct_tmpl_clear(tmpl_fd); 766 (void) close(tmpl_fd); 767 if (zid != 0 && zone_enter(zid) != 0) { 768 if (errno == EINVAL) { 769 _exit(0); 770 } 771 _exit(1); 772 } 773 (void) fdetach(path); 774 (void) unlink(path); 775 if (detach_only) 776 _exit(0); 777 fd = open(path, O_CREAT|O_RDWR, 0644); 778 if (fd < 0) 779 _exit(2); 780 if (fattach(door, path) != 0) 781 _exit(3); 782 _exit(0); 783 } 784 if (contract_latest(&ct) == -1) 785 ct = -1; 786 (void) ct_tmpl_clear(tmpl_fd); 787 (void) close(tmpl_fd); 788 (void) contract_abandon_id(ct); 789 while (waitpid(pid, &stat, 0) != pid) 790 ; 791 if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0) 792 return; 793 794 zsd_warn(gettext("Unable to attach door to zoneid: %d"), zid); 795 796 if (WEXITSTATUS(stat) == 1) 797 zsd_warn(gettext("Cannot entering zone")); 798 else if (WEXITSTATUS(stat) == 2) 799 zsd_warn(gettext("Unable to create door file: %s"), path); 800 else if (WEXITSTATUS(stat) == 3) 801 zsd_warn(gettext("Unable to fattach file: %s"), path); 802 803 zsd_warn(gettext("Internal error entering zone: %d"), zid); 804 } 805 806 /* 807 * Zone lookup and allocation functions to manage list of currently running 808 * zones. 809 */ 810 static zsd_zone_t * 811 zsd_lookup_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid) 812 { 813 zsd_zone_t *zone; 814 815 for (zone = list_head(&ctl->zsctl_zones); zone != NULL; 816 zone = list_next(&ctl->zsctl_zones, zone)) { 817 if (strcmp(zone->zsz_name, zonename) == 0) { 818 if (zoneid != -1) 819 zone->zsz_id = zoneid; 820 return (zone); 821 } 822 } 823 return (NULL); 824 } 825 826 static zsd_zone_t * 827 zsd_lookup_zone_byid(zsd_ctl_t *ctl, zoneid_t zoneid) 828 { 829 zsd_zone_t *zone; 830 831 for (zone = list_head(&ctl->zsctl_zones); zone != NULL; 832 zone = list_next(&ctl->zsctl_zones, zone)) { 833 if (zone->zsz_id == zoneid) 834 return (zone); 835 } 836 return (NULL); 837 } 838 839 static zsd_zone_t * 840 zsd_allocate_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid) 841 { 842 zsd_zone_t *zone; 843 844 if ((zone = (zsd_zone_t *)calloc(1, sizeof (zsd_zone_t))) == NULL) 845 return (NULL); 846 847 (void) strlcpy(zone->zsz_name, zonename, sizeof (zone->zsz_name)); 848 zone->zsz_id = zoneid; 849 zone->zsz_found = B_FALSE; 850 851 /* 852 * Allocate as deleted so if not found in first pass, zone is deleted 853 * from list. This can happen if zone is returned by zone_list, but 854 * exits before first attempt to fetch zone details. 855 */ 856 zone->zsz_start = g_now; 857 zone->zsz_hrstart = g_hrnow; 858 zone->zsz_deleted = B_TRUE; 859 860 zone->zsz_cpu_shares = ZS_LIMIT_NONE; 861 zone->zsz_cpu_cap = ZS_LIMIT_NONE; 862 zone->zsz_ram_cap = ZS_LIMIT_NONE; 863 zone->zsz_locked_cap = ZS_LIMIT_NONE; 864 zone->zsz_vm_cap = ZS_LIMIT_NONE; 865 866 zone->zsz_processes_cap = ZS_LIMIT_NONE; 867 zone->zsz_lwps_cap = ZS_LIMIT_NONE; 868 zone->zsz_shm_cap = ZS_LIMIT_NONE; 869 zone->zsz_shmids_cap = ZS_LIMIT_NONE; 870 zone->zsz_semids_cap = ZS_LIMIT_NONE; 871 zone->zsz_msgids_cap = ZS_LIMIT_NONE; 872 zone->zsz_lofi_cap = ZS_LIMIT_NONE; 873 874 ctl->zsctl_nzones++; 875 876 return (zone); 877 } 878 879 static zsd_zone_t * 880 zsd_lookup_insert_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid) 881 { 882 zsd_zone_t *zone, *tmp; 883 884 if ((zone = zsd_lookup_zone(ctl, zonename, zoneid)) != NULL) 885 return (zone); 886 887 if ((zone = zsd_allocate_zone(ctl, zonename, zoneid)) == NULL) 888 return (NULL); 889 890 /* Insert sorted by zonename */ 891 tmp = list_head(&ctl->zsctl_zones); 892 while (tmp != NULL && strcmp(zonename, tmp->zsz_name) > 0) 893 tmp = list_next(&ctl->zsctl_zones, tmp); 894 895 list_insert_before(&ctl->zsctl_zones, tmp, zone); 896 return (zone); 897 } 898 899 /* 900 * Mark all zones as not existing. As zones are found, they will 901 * be marked as existing. If a zone is not found, then it must have 902 * halted. 903 */ 904 static void 905 zsd_mark_zones_start(zsd_ctl_t *ctl) 906 { 907 908 zsd_zone_t *zone; 909 910 for (zone = list_head(&ctl->zsctl_zones); zone != NULL; 911 zone = list_next(&ctl->zsctl_zones, zone)) { 912 zone->zsz_found = B_FALSE; 913 } 914 } 915 916 /* 917 * Mark each zone as not using pset. If processes are found using the 918 * pset, the zone will remain bound to the pset. If none of a zones 919 * processes are bound to the pset, the zone's usage of the pset will 920 * be deleted. 921 * 922 */ 923 static void 924 zsd_mark_pset_usage_start(zsd_pset_t *pset) 925 { 926 zsd_pset_usage_t *usage; 927 928 for (usage = list_head(&pset->zsp_usage_list); 929 usage != NULL; 930 usage = list_next(&pset->zsp_usage_list, usage)) { 931 usage->zsu_found = B_FALSE; 932 usage->zsu_empty = B_TRUE; 933 } 934 } 935 936 /* 937 * Mark each pset as not existing. If a pset is found, it will be marked 938 * as existing. If a pset is not found, it wil be deleted. 939 */ 940 static void 941 zsd_mark_psets_start(zsd_ctl_t *ctl) 942 { 943 zsd_pset_t *pset; 944 945 for (pset = list_head(&ctl->zsctl_psets); pset != NULL; 946 pset = list_next(&ctl->zsctl_psets, pset)) { 947 pset->zsp_found = B_FALSE; 948 zsd_mark_pset_usage_start(pset); 949 } 950 } 951 952 /* 953 * A pset was found. Update its information 954 */ 955 static void 956 zsd_mark_pset_found(zsd_pset_t *pset, uint_t type, uint64_t online, 957 uint64_t size, uint64_t min, uint64_t max, int64_t importance) 958 { 959 pset->zsp_empty = B_TRUE; 960 pset->zsp_deleted = B_FALSE; 961 962 assert(pset->zsp_found == B_FALSE); 963 964 /* update pset flags */ 965 if (pset->zsp_active == B_FALSE) 966 /* pset not seen on previous interval. It is new. */ 967 pset->zsp_new = B_TRUE; 968 else 969 pset->zsp_new = B_FALSE; 970 971 pset->zsp_found = B_TRUE; 972 pset->zsp_cputype = type; 973 pset->zsp_online = online; 974 pset->zsp_size = size; 975 pset->zsp_min = min; 976 pset->zsp_max = max; 977 pset->zsp_importance = importance; 978 pset->zsp_cpu_shares = 0; 979 pset->zsp_scheds = 0; 980 pset->zsp_active = B_TRUE; 981 } 982 983 /* 984 * A zone's process was found using a pset. Charge the process to the pset and 985 * the per-zone data for the pset. 986 */ 987 static void 988 zsd_mark_pset_usage_found(zsd_pset_usage_t *usage, uint_t sched) 989 { 990 zsd_zone_t *zone = usage->zsu_zone; 991 zsd_pset_t *pset = usage->zsu_pset; 992 993 /* Nothing to do if already found */ 994 if (usage->zsu_found == B_TRUE) 995 goto add_stats; 996 997 usage->zsu_found = B_TRUE; 998 usage->zsu_empty = B_FALSE; 999 1000 usage->zsu_deleted = B_FALSE; 1001 /* update usage flags */ 1002 if (usage->zsu_active == B_FALSE) 1003 usage->zsu_new = B_TRUE; 1004 else 1005 usage->zsu_new = B_FALSE; 1006 1007 usage->zsu_scheds = 0; 1008 usage->zsu_cpu_shares = ZS_LIMIT_NONE; 1009 usage->zsu_active = B_TRUE; 1010 pset->zsp_empty = B_FALSE; 1011 zone->zsz_empty = B_FALSE; 1012 1013 add_stats: 1014 /* Detect zone's pset id, and if it is bound to multiple psets */ 1015 if (zone->zsz_psetid == ZS_PSET_ERROR) 1016 zone->zsz_psetid = pset->zsp_id; 1017 else if (zone->zsz_psetid != pset->zsp_id) 1018 zone->zsz_psetid = ZS_PSET_MULTI; 1019 1020 usage->zsu_scheds |= sched; 1021 pset->zsp_scheds |= sched; 1022 zone->zsz_scheds |= sched; 1023 1024 /* Record if FSS is co-habitating with conflicting scheduler */ 1025 if ((pset->zsp_scheds & ZS_SCHED_FSS) && 1026 usage->zsu_scheds & ( 1027 ZS_SCHED_TS | ZS_SCHED_IA | ZS_SCHED_FX)) { 1028 usage->zsu_scheds |= ZS_SCHED_CONFLICT; 1029 1030 pset->zsp_scheds |= ZS_SCHED_CONFLICT; 1031 } 1032 1033 } 1034 1035 /* Add cpu time for a process to a pset, zone, and system totals */ 1036 static void 1037 zsd_add_usage(zsd_ctl_t *ctl, zsd_pset_usage_t *usage, timestruc_t *delta) 1038 { 1039 zsd_system_t *system = ctl->zsctl_system; 1040 zsd_zone_t *zone = usage->zsu_zone; 1041 zsd_pset_t *pset = usage->zsu_pset; 1042 1043 TIMESTRUC_ADD_TIMESTRUC(usage->zsu_cpu_usage, *delta); 1044 TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_zones, *delta); 1045 TIMESTRUC_ADD_TIMESTRUC(zone->zsz_cpu_usage, *delta); 1046 TIMESTRUC_ADD_TIMESTRUC(system->zss_cpu_usage_zones, *delta); 1047 } 1048 1049 /* Determine which processor sets have been deleted */ 1050 static void 1051 zsd_mark_psets_end(zsd_ctl_t *ctl) 1052 { 1053 zsd_pset_t *pset, *tmp; 1054 1055 /* 1056 * Mark pset as not exists, and deleted if it existed 1057 * previous interval. 1058 */ 1059 pset = list_head(&ctl->zsctl_psets); 1060 while (pset != NULL) { 1061 if (pset->zsp_found == B_FALSE) { 1062 pset->zsp_empty = B_TRUE; 1063 if (pset->zsp_deleted == B_TRUE) { 1064 tmp = pset; 1065 pset = list_next(&ctl->zsctl_psets, pset); 1066 list_remove(&ctl->zsctl_psets, tmp); 1067 free(tmp); 1068 ctl->zsctl_npsets--; 1069 continue; 1070 } else { 1071 /* Pset vanished during this interval */ 1072 pset->zsp_new = B_FALSE; 1073 pset->zsp_deleted = B_TRUE; 1074 pset->zsp_active = B_TRUE; 1075 } 1076 } 1077 pset = list_next(&ctl->zsctl_psets, pset); 1078 } 1079 } 1080 1081 /* Determine which zones are no longer bound to processor sets */ 1082 static void 1083 zsd_mark_pset_usages_end(zsd_ctl_t *ctl) 1084 { 1085 zsd_pset_t *pset; 1086 zsd_zone_t *zone; 1087 zsd_pset_usage_t *usage, *tmp; 1088 1089 /* 1090 * Mark pset as not exists, and deleted if it existed previous 1091 * interval. 1092 */ 1093 for (pset = list_head(&ctl->zsctl_psets); pset != NULL; 1094 pset = list_next(&ctl->zsctl_psets, pset)) { 1095 usage = list_head(&pset->zsp_usage_list); 1096 while (usage != NULL) { 1097 /* 1098 * Mark pset as not exists, and deleted if it existed 1099 * previous interval. 1100 */ 1101 if (usage->zsu_found == B_FALSE || 1102 usage->zsu_zone->zsz_deleted == B_TRUE || 1103 usage->zsu_pset->zsp_deleted == B_TRUE) { 1104 tmp = usage; 1105 usage = list_next(&pset->zsp_usage_list, 1106 usage); 1107 list_remove(&pset->zsp_usage_list, tmp); 1108 free(tmp); 1109 pset->zsp_nusage--; 1110 ctl->zsctl_npset_usages--; 1111 continue; 1112 } else { 1113 usage->zsu_new = B_FALSE; 1114 usage->zsu_deleted = B_TRUE; 1115 usage->zsu_active = B_TRUE; 1116 } 1117 /* Add cpu shares for usages that are in FSS */ 1118 zone = usage->zsu_zone; 1119 if (usage->zsu_scheds & ZS_SCHED_FSS && 1120 zone->zsz_cpu_shares != ZS_SHARES_UNLIMITED && 1121 zone->zsz_cpu_shares != 0) { 1122 zone = usage->zsu_zone; 1123 usage->zsu_cpu_shares = zone->zsz_cpu_shares; 1124 pset->zsp_cpu_shares += zone->zsz_cpu_shares; 1125 } 1126 usage = list_next(&pset->zsp_usage_list, 1127 usage); 1128 } 1129 } 1130 } 1131 1132 /* A zone has been found. Update its information */ 1133 static void 1134 zsd_mark_zone_found(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t cpu_shares, 1135 uint64_t cpu_cap, uint64_t ram_cap, uint64_t locked_cap, 1136 uint64_t vm_cap, uint64_t processes_cap, uint64_t processes, 1137 uint64_t lwps_cap, uint64_t lwps, uint64_t shm_cap, uint64_t shm, 1138 uint64_t shmids_cap, uint64_t shmids, uint64_t semids_cap, 1139 uint64_t semids, uint64_t msgids_cap, uint64_t msgids, uint64_t lofi_cap, 1140 uint64_t lofi, char *poolname, char *psetname, uint_t sched, uint_t cputype, 1141 uint_t iptype) 1142 { 1143 zsd_system_t *sys = ctl->zsctl_system; 1144 1145 assert(zone->zsz_found == B_FALSE); 1146 1147 /* 1148 * Mark zone as exists, and new if it did not exist in previous 1149 * interval. 1150 */ 1151 zone->zsz_found = B_TRUE; 1152 zone->zsz_empty = B_TRUE; 1153 zone->zsz_deleted = B_FALSE; 1154 1155 /* 1156 * Zone is new. Assume zone's properties are the same over entire 1157 * interval. 1158 */ 1159 if (zone->zsz_active == B_FALSE) 1160 zone->zsz_new = B_TRUE; 1161 else 1162 zone->zsz_new = B_FALSE; 1163 1164 (void) strlcpy(zone->zsz_pool, poolname, sizeof (zone->zsz_pool)); 1165 (void) strlcpy(zone->zsz_pset, psetname, sizeof (zone->zsz_pset)); 1166 zone->zsz_default_sched = sched; 1167 1168 /* Schedulers updated later as processes are found */ 1169 zone->zsz_scheds = 0; 1170 1171 /* Cpus updated later as psets bound are identified */ 1172 zone->zsz_cpus_online = 0; 1173 1174 zone->zsz_cputype = cputype; 1175 zone->zsz_iptype = iptype; 1176 zone->zsz_psetid = ZS_PSET_ERROR; 1177 zone->zsz_cpu_cap = cpu_cap; 1178 zone->zsz_cpu_shares = cpu_shares; 1179 zone->zsz_ram_cap = ram_cap; 1180 zone->zsz_locked_cap = locked_cap; 1181 zone->zsz_vm_cap = vm_cap; 1182 zone->zsz_processes_cap = processes_cap; 1183 zone->zsz_processes = processes; 1184 zone->zsz_lwps_cap = lwps_cap; 1185 zone->zsz_lwps = lwps; 1186 zone->zsz_shm_cap = shm_cap; 1187 zone->zsz_shm = shm; 1188 zone->zsz_shmids_cap = shmids_cap; 1189 zone->zsz_shmids = shmids; 1190 zone->zsz_semids_cap = semids_cap; 1191 zone->zsz_semids = semids; 1192 zone->zsz_msgids_cap = msgids_cap; 1193 zone->zsz_msgids = msgids; 1194 zone->zsz_lofi_cap = lofi_cap; 1195 zone->zsz_lofi = lofi; 1196 1197 sys->zss_processes += processes; 1198 sys->zss_lwps += lwps; 1199 sys->zss_shm += shm; 1200 sys->zss_shmids += shmids; 1201 sys->zss_semids += semids; 1202 sys->zss_msgids += msgids; 1203 sys->zss_lofi += lofi; 1204 zone->zsz_active = B_TRUE; 1205 } 1206 1207 1208 /* Determine which zones have halted */ 1209 static void 1210 zsd_mark_zones_end(zsd_ctl_t *ctl) 1211 { 1212 zsd_zone_t *zone, *tmp; 1213 1214 /* 1215 * Mark zone as not existing, or delete if it did not exist in 1216 * previous interval. 1217 */ 1218 zone = list_head(&ctl->zsctl_zones); 1219 while (zone != NULL) { 1220 if (zone->zsz_found == B_FALSE) { 1221 zone->zsz_empty = B_TRUE; 1222 if (zone->zsz_deleted == B_TRUE) { 1223 /* 1224 * Zone deleted in prior interval, 1225 * so it no longer exists. 1226 */ 1227 tmp = zone; 1228 zone = list_next(&ctl->zsctl_zones, zone); 1229 list_remove(&ctl->zsctl_zones, tmp); 1230 free(tmp); 1231 ctl->zsctl_nzones--; 1232 continue; 1233 } else { 1234 zone->zsz_new = B_FALSE; 1235 zone->zsz_deleted = B_TRUE; 1236 zone->zsz_active = B_TRUE; 1237 } 1238 } 1239 zone = list_next(&ctl->zsctl_zones, zone); 1240 } 1241 } 1242 1243 /* 1244 * Mark cpus as not existing. If a cpu is found, it will be updated. If 1245 * a cpu is not found, then it must have gone offline, so it will be 1246 * deleted. 1247 * 1248 * The kstat tracking data is rolled so that the usage since the previous 1249 * interval can be determined. 1250 */ 1251 static void 1252 zsd_mark_cpus_start(zsd_ctl_t *ctl, boolean_t roll) 1253 { 1254 zsd_cpu_t *cpu; 1255 1256 /* 1257 * Mark all cpus as not existing. As cpus are found, they will 1258 * be marked as existing. 1259 */ 1260 for (cpu = list_head(&ctl->zsctl_cpus); cpu != NULL; 1261 cpu = list_next(&ctl->zsctl_cpus, cpu)) { 1262 cpu->zsc_found = B_FALSE; 1263 if (cpu->zsc_active == B_TRUE && roll) { 1264 cpu->zsc_psetid_prev = cpu->zsc_psetid; 1265 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle; 1266 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr; 1267 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern; 1268 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user; 1269 } 1270 } 1271 } 1272 1273 /* 1274 * An array the size of the maximum number of cpus is kept. Within this array 1275 * a list of the online cpus is maintained. 1276 */ 1277 zsd_cpu_t * 1278 zsd_lookup_insert_cpu(zsd_ctl_t *ctl, processorid_t cpuid) 1279 { 1280 zsd_cpu_t *cpu; 1281 1282 assert(cpuid < ctl->zsctl_maxcpuid); 1283 cpu = &(ctl->zsctl_cpu_array[cpuid]); 1284 assert(cpuid == cpu->zsc_id); 1285 1286 if (cpu->zsc_allocated == B_FALSE) { 1287 cpu->zsc_allocated = B_TRUE; 1288 list_insert_tail(&ctl->zsctl_cpus, cpu); 1289 } 1290 return (cpu); 1291 } 1292 1293 /* A cpu has been found. Update its information */ 1294 static void 1295 zsd_mark_cpu_found(zsd_cpu_t *cpu, zsd_pset_t *pset, psetid_t psetid) 1296 { 1297 /* 1298 * legacy processor sets, the cpu may move while zonestatd is 1299 * inspecting, causing it to be found twice. In this case, just 1300 * leave cpu in the first processor set in which it was found. 1301 */ 1302 if (cpu->zsc_found == B_TRUE) 1303 return; 1304 1305 /* Mark cpu as online */ 1306 cpu->zsc_found = B_TRUE; 1307 cpu->zsc_offlined = B_FALSE; 1308 cpu->zsc_pset = pset; 1309 /* 1310 * cpu is newly online. 1311 */ 1312 if (cpu->zsc_active == B_FALSE) { 1313 /* 1314 * Cpu is newly online. 1315 */ 1316 cpu->zsc_onlined = B_TRUE; 1317 cpu->zsc_psetid = psetid; 1318 cpu->zsc_psetid_prev = psetid; 1319 } else { 1320 /* 1321 * cpu online during previous interval. Save properties at 1322 * start of interval 1323 */ 1324 cpu->zsc_onlined = B_FALSE; 1325 cpu->zsc_psetid = psetid; 1326 1327 } 1328 cpu->zsc_active = B_TRUE; 1329 } 1330 1331 /* Remove all offlined cpus from the list of tracked cpus */ 1332 static void 1333 zsd_mark_cpus_end(zsd_ctl_t *ctl) 1334 { 1335 zsd_cpu_t *cpu, *tmp; 1336 int id; 1337 1338 /* Mark cpu as online or offline */ 1339 cpu = list_head(&ctl->zsctl_cpus); 1340 while (cpu != NULL) { 1341 if (cpu->zsc_found == B_FALSE) { 1342 if (cpu->zsc_offlined == B_TRUE) { 1343 /* 1344 * cpu offlined in prior interval. It is gone. 1345 */ 1346 tmp = cpu; 1347 cpu = list_next(&ctl->zsctl_cpus, cpu); 1348 list_remove(&ctl->zsctl_cpus, tmp); 1349 /* Clear structure for future use */ 1350 id = tmp->zsc_id; 1351 bzero(tmp, sizeof (zsd_cpu_t)); 1352 tmp->zsc_id = id; 1353 tmp->zsc_allocated = B_FALSE; 1354 tmp->zsc_psetid = ZS_PSET_ERROR; 1355 tmp->zsc_psetid_prev = ZS_PSET_ERROR; 1356 1357 } else { 1358 /* 1359 * cpu online at start of interval. Treat 1360 * as still online, since it was online for 1361 * some portion of the interval. 1362 */ 1363 cpu->zsc_offlined = B_TRUE; 1364 cpu->zsc_onlined = B_FALSE; 1365 cpu->zsc_active = B_TRUE; 1366 cpu->zsc_psetid = cpu->zsc_psetid_prev; 1367 cpu->zsc_pset = NULL; 1368 } 1369 } 1370 cpu = list_next(&ctl->zsctl_cpus, cpu); 1371 } 1372 } 1373 1374 /* Some utility functions for managing the list of processor sets */ 1375 static zsd_pset_t * 1376 zsd_lookup_pset_byid(zsd_ctl_t *ctl, psetid_t psetid) 1377 { 1378 zsd_pset_t *pset; 1379 1380 for (pset = list_head(&ctl->zsctl_psets); pset != NULL; 1381 pset = list_next(&ctl->zsctl_psets, pset)) { 1382 if (pset->zsp_id == psetid) 1383 return (pset); 1384 } 1385 return (NULL); 1386 } 1387 1388 static zsd_pset_t * 1389 zsd_lookup_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid) 1390 { 1391 zsd_pset_t *pset; 1392 1393 for (pset = list_head(&ctl->zsctl_psets); pset != NULL; 1394 pset = list_next(&ctl->zsctl_psets, pset)) { 1395 if (strcmp(pset->zsp_name, psetname) == 0) { 1396 if (psetid != -1) 1397 pset->zsp_id = psetid; 1398 return (pset); 1399 } 1400 } 1401 return (NULL); 1402 } 1403 1404 static zsd_pset_t * 1405 zsd_allocate_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid) 1406 { 1407 zsd_pset_t *pset; 1408 1409 if ((pset = (zsd_pset_t *)calloc(1, sizeof (zsd_pset_t))) == NULL) 1410 return (NULL); 1411 1412 (void) strlcpy(pset->zsp_name, psetname, sizeof (pset->zsp_name)); 1413 pset->zsp_id = psetid; 1414 pset->zsp_found = B_FALSE; 1415 /* 1416 * Allocate as deleted so if not found in first pass, pset is deleted 1417 * from list. This can happen if pset is returned by pset_list, but 1418 * is destroyed before first attempt to fetch pset details. 1419 */ 1420 list_create(&pset->zsp_usage_list, sizeof (zsd_pset_usage_t), 1421 offsetof(zsd_pset_usage_t, zsu_next)); 1422 1423 pset->zsp_hrstart = g_hrnow; 1424 pset->zsp_deleted = B_TRUE; 1425 pset->zsp_empty = B_TRUE; 1426 ctl->zsctl_npsets++; 1427 1428 return (pset); 1429 } 1430 1431 static zsd_pset_t * 1432 zsd_lookup_insert_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid) 1433 { 1434 zsd_pset_t *pset, *tmp; 1435 1436 if ((pset = zsd_lookup_pset(ctl, psetname, psetid)) != NULL) 1437 return (pset); 1438 1439 if ((pset = zsd_allocate_pset(ctl, psetname, psetid)) == NULL) 1440 return (NULL); 1441 1442 /* Insert sorted by psetname */ 1443 tmp = list_head(&ctl->zsctl_psets); 1444 while (tmp != NULL && strcmp(psetname, tmp->zsp_name) > 0) 1445 tmp = list_next(&ctl->zsctl_psets, tmp); 1446 1447 list_insert_before(&ctl->zsctl_psets, tmp, pset); 1448 return (pset); 1449 } 1450 1451 /* Some utility functions for managing the list of zones using each pset */ 1452 static zsd_pset_usage_t * 1453 zsd_lookup_usage(zsd_pset_t *pset, zsd_zone_t *zone) 1454 { 1455 zsd_pset_usage_t *usage; 1456 1457 for (usage = list_head(&pset->zsp_usage_list); usage != NULL; 1458 usage = list_next(&pset->zsp_usage_list, usage)) 1459 if (usage->zsu_zone == zone) 1460 return (usage); 1461 1462 return (NULL); 1463 } 1464 1465 static zsd_pset_usage_t * 1466 zsd_allocate_pset_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone) 1467 { 1468 zsd_pset_usage_t *usage; 1469 1470 if ((usage = (zsd_pset_usage_t *)calloc(1, sizeof (zsd_pset_usage_t))) 1471 == NULL) 1472 return (NULL); 1473 1474 list_link_init(&usage->zsu_next); 1475 usage->zsu_zone = zone; 1476 usage->zsu_zoneid = zone->zsz_id; 1477 usage->zsu_pset = pset; 1478 usage->zsu_found = B_FALSE; 1479 usage->zsu_active = B_FALSE; 1480 usage->zsu_new = B_FALSE; 1481 /* 1482 * Allocate as not deleted. If a process is found in a pset for 1483 * a zone, the usage will not be deleted until at least the next 1484 * interval. 1485 */ 1486 usage->zsu_start = g_now; 1487 usage->zsu_hrstart = g_hrnow; 1488 usage->zsu_deleted = B_FALSE; 1489 usage->zsu_empty = B_TRUE; 1490 usage->zsu_scheds = 0; 1491 usage->zsu_cpu_shares = ZS_LIMIT_NONE; 1492 1493 ctl->zsctl_npset_usages++; 1494 pset->zsp_nusage++; 1495 1496 return (usage); 1497 } 1498 1499 static zsd_pset_usage_t * 1500 zsd_lookup_insert_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone) 1501 { 1502 zsd_pset_usage_t *usage, *tmp; 1503 1504 if ((usage = zsd_lookup_usage(pset, zone)) 1505 != NULL) 1506 return (usage); 1507 1508 if ((usage = zsd_allocate_pset_usage(ctl, pset, zone)) == NULL) 1509 return (NULL); 1510 1511 tmp = list_head(&pset->zsp_usage_list); 1512 while (tmp != NULL && strcmp(zone->zsz_name, tmp->zsu_zone->zsz_name) 1513 > 0) 1514 tmp = list_next(&pset->zsp_usage_list, tmp); 1515 1516 list_insert_before(&pset->zsp_usage_list, tmp, usage); 1517 return (usage); 1518 } 1519 1520 static void 1521 zsd_refresh_system(zsd_ctl_t *ctl) 1522 { 1523 zsd_system_t *system = ctl->zsctl_system; 1524 1525 /* Re-count these values each interval */ 1526 system->zss_processes = 0; 1527 system->zss_lwps = 0; 1528 system->zss_shm = 0; 1529 system->zss_shmids = 0; 1530 system->zss_semids = 0; 1531 system->zss_msgids = 0; 1532 system->zss_lofi = 0; 1533 } 1534 1535 1536 /* Reads each cpu's kstats, and adds the usage to the cpu's pset */ 1537 static void 1538 zsd_update_cpu_stats(zsd_ctl_t *ctl, zsd_cpu_t *cpu) 1539 { 1540 zsd_system_t *sys; 1541 processorid_t cpuid; 1542 zsd_pset_t *pset_prev; 1543 zsd_pset_t *pset; 1544 kstat_t *kstat; 1545 kstat_named_t *knp; 1546 kid_t kid; 1547 uint64_t idle, intr, kern, user; 1548 1549 sys = ctl->zsctl_system; 1550 pset = cpu->zsc_pset; 1551 knp = NULL; 1552 kid = -1; 1553 cpuid = cpu->zsc_id; 1554 1555 /* Get the cpu time totals for this cpu */ 1556 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "cpu", cpuid, "sys"); 1557 if (kstat == NULL) 1558 return; 1559 1560 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL); 1561 if (kid == -1) 1562 return; 1563 1564 knp = kstat_data_lookup(kstat, "cpu_nsec_idle"); 1565 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64) 1566 return; 1567 1568 idle = knp->value.ui64; 1569 1570 knp = kstat_data_lookup(kstat, "cpu_nsec_kernel"); 1571 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64) 1572 return; 1573 1574 kern = knp->value.ui64; 1575 1576 knp = kstat_data_lookup(kstat, "cpu_nsec_user"); 1577 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64) 1578 return; 1579 1580 user = knp->value.ui64; 1581 1582 /* 1583 * Tracking intr time per cpu just exists for future enhancements. 1584 * The value is presently always zero. 1585 */ 1586 intr = 0; 1587 cpu->zsc_nsec_idle = idle; 1588 cpu->zsc_nsec_intr = intr; 1589 cpu->zsc_nsec_kern = kern; 1590 cpu->zsc_nsec_user = user; 1591 1592 if (cpu->zsc_onlined == B_TRUE) { 1593 /* 1594 * cpu is newly online. There is no reference value, 1595 * so just record its current stats for comparison 1596 * on next stat read. 1597 */ 1598 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle; 1599 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr; 1600 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern; 1601 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user; 1602 return; 1603 } 1604 1605 /* 1606 * Calculate relative time since previous refresh. 1607 * Paranoia. Don't let time go backwards. 1608 */ 1609 idle = intr = kern = user = 0; 1610 if (cpu->zsc_nsec_idle > cpu->zsc_nsec_idle_prev) 1611 idle = cpu->zsc_nsec_idle - cpu->zsc_nsec_idle_prev; 1612 1613 if (cpu->zsc_nsec_intr > cpu->zsc_nsec_intr_prev) 1614 intr = cpu->zsc_nsec_intr - cpu->zsc_nsec_intr_prev; 1615 1616 if (cpu->zsc_nsec_kern > cpu->zsc_nsec_kern_prev) 1617 kern = cpu->zsc_nsec_kern - cpu->zsc_nsec_kern_prev; 1618 1619 if (cpu->zsc_nsec_user > cpu->zsc_nsec_user_prev) 1620 user = cpu->zsc_nsec_user - cpu->zsc_nsec_user_prev; 1621 1622 /* Update totals for cpu usage */ 1623 TIMESTRUC_ADD_NANOSEC(cpu->zsc_idle, idle); 1624 TIMESTRUC_ADD_NANOSEC(cpu->zsc_intr, intr); 1625 TIMESTRUC_ADD_NANOSEC(cpu->zsc_kern, kern); 1626 TIMESTRUC_ADD_NANOSEC(cpu->zsc_user, user); 1627 1628 /* 1629 * Add cpu's stats to its pset if it is known to be in 1630 * the pset since previous read. 1631 */ 1632 if (cpu->zsc_psetid == cpu->zsc_psetid_prev || 1633 cpu->zsc_psetid_prev == ZS_PSET_ERROR || 1634 (pset_prev = zsd_lookup_pset_byid(ctl, 1635 cpu->zsc_psetid_prev)) == NULL) { 1636 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle, idle); 1637 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr, intr); 1638 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern, kern); 1639 TIMESTRUC_ADD_NANOSEC(pset->zsp_user, user); 1640 } else { 1641 /* 1642 * Last pset was different than current pset. 1643 * Best guess is to split usage between the two. 1644 */ 1645 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_idle, idle / 2); 1646 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_intr, intr / 2); 1647 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_kern, kern / 2); 1648 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_user, user / 2); 1649 1650 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle, 1651 (idle / 2) + (idle % 2)); 1652 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr, 1653 (intr / 2) + (intr % 2)); 1654 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern, 1655 (kern / 2) + (kern % 2)); 1656 TIMESTRUC_ADD_NANOSEC(pset->zsp_user, 1657 (user / 2) + (user % 2)); 1658 } 1659 TIMESTRUC_ADD_NANOSEC(sys->zss_idle, idle); 1660 TIMESTRUC_ADD_NANOSEC(sys->zss_intr, intr); 1661 TIMESTRUC_ADD_NANOSEC(sys->zss_kern, kern); 1662 TIMESTRUC_ADD_NANOSEC(sys->zss_user, user); 1663 } 1664 1665 /* Determine the details of a processor set by pset_id */ 1666 static int 1667 zsd_get_pool_pset(zsd_ctl_t *ctl, psetid_t psetid, char *psetname, 1668 size_t namelen, uint_t *cputype, uint64_t *online, uint64_t *size, 1669 uint64_t *min, uint64_t *max, int64_t *importance) 1670 { 1671 uint_t old, num; 1672 1673 pool_conf_t *conf = ctl->zsctl_pool_conf; 1674 pool_value_t **vals = ctl->zsctl_pool_vals; 1675 pool_resource_t **res_list = NULL; 1676 pool_resource_t *pset; 1677 pool_component_t **cpus = NULL; 1678 processorid_t *cache; 1679 const char *string; 1680 uint64_t uint64; 1681 int64_t int64; 1682 int i, ret, type; 1683 1684 if (ctl->zsctl_pool_status == POOL_DISABLED) { 1685 1686 /* 1687 * Inspect legacy psets 1688 */ 1689 for (;;) { 1690 old = num = ctl->zsctl_cpu_ncache; 1691 ret = pset_info(psetid, &type, &num, 1692 ctl->zsctl_cpu_cache); 1693 if (ret < 0) { 1694 /* pset is gone. Tell caller to retry */ 1695 errno = EINTR; 1696 return (-1); 1697 } 1698 if (num <= old) { 1699 /* Success */ 1700 break; 1701 } 1702 if ((cache = (processorid_t *)realloc( 1703 ctl->zsctl_cpu_cache, num * 1704 sizeof (processorid_t))) != NULL) { 1705 ctl->zsctl_cpu_ncache = num; 1706 ctl->zsctl_cpu_cache = cache; 1707 } else { 1708 /* 1709 * Could not allocate to get new cpu list. 1710 */ 1711 zsd_warn(gettext( 1712 "Could not allocate for cpu list")); 1713 errno = ENOMEM; 1714 return (-1); 1715 } 1716 } 1717 /* 1718 * Old school pset. Just make min and max equal 1719 * to its size 1720 */ 1721 if (psetid == ZS_PSET_DEFAULT) { 1722 *cputype = ZS_CPUTYPE_DEFAULT_PSET; 1723 (void) strlcpy(psetname, "pset_default", namelen); 1724 } else { 1725 *cputype = ZS_CPUTYPE_PSRSET_PSET; 1726 (void) snprintf(psetname, namelen, 1727 "SUNWlegacy_pset_%d", psetid); 1728 } 1729 1730 /* 1731 * Just treat legacy pset as a simple pool pset 1732 */ 1733 *online = num; 1734 *size = num; 1735 *min = num; 1736 *max = num; 1737 *importance = 1; 1738 1739 return (0); 1740 } 1741 1742 /* Look up the pool pset using the pset id */ 1743 res_list = NULL; 1744 pool_value_set_int64(vals[1], psetid); 1745 if (pool_value_set_name(vals[1], "pset.sys_id") 1746 != PO_SUCCESS) 1747 goto err; 1748 1749 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS) 1750 goto err; 1751 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS) 1752 goto err; 1753 if ((res_list = pool_query_resources(conf, &num, vals)) == NULL) 1754 goto err; 1755 if (num != 1) 1756 goto err; 1757 pset = res_list[0]; 1758 free(res_list); 1759 res_list = NULL; 1760 if (pool_get_property(conf, pool_resource_to_elem(conf, pset), 1761 "pset.name", vals[0]) != POC_STRING || 1762 pool_value_get_string(vals[0], &string) != PO_SUCCESS) 1763 goto err; 1764 1765 (void) strlcpy(psetname, string, namelen); 1766 if (strncmp(psetname, "SUNWtmp", strlen("SUNWtmp")) == 0) 1767 *cputype = ZS_CPUTYPE_DEDICATED; 1768 else if (psetid == ZS_PSET_DEFAULT) 1769 *cputype = ZS_CPUTYPE_DEFAULT_PSET; 1770 else 1771 *cputype = ZS_CPUTYPE_POOL_PSET; 1772 1773 /* Get size, min, max, and importance */ 1774 if (pool_get_property(conf, pool_resource_to_elem(conf, 1775 pset), "pset.size", vals[0]) == POC_UINT && 1776 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS) 1777 *size = uint64; 1778 else 1779 *size = 0; 1780 1781 /* Get size, min, max, and importance */ 1782 if (pool_get_property(conf, pool_resource_to_elem(conf, 1783 pset), "pset.min", vals[0]) == POC_UINT && 1784 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS) 1785 *min = uint64; 1786 else 1787 *min = 0; 1788 if (*min >= ZSD_PSET_UNLIMITED) 1789 *min = ZS_LIMIT_NONE; 1790 1791 if (pool_get_property(conf, pool_resource_to_elem(conf, 1792 pset), "pset.max", vals[0]) == POC_UINT && 1793 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS) 1794 *max = uint64; 1795 else 1796 *max = ZS_LIMIT_NONE; 1797 1798 if (*max >= ZSD_PSET_UNLIMITED) 1799 *max = ZS_LIMIT_NONE; 1800 1801 if (pool_get_property(conf, pool_resource_to_elem(conf, 1802 pset), "pset.importance", vals[0]) == POC_INT && 1803 pool_value_get_int64(vals[0], &int64) == PO_SUCCESS) 1804 *importance = int64; 1805 else 1806 *importance = (uint64_t)1; 1807 1808 *online = 0; 1809 if (*size == 0) 1810 return (0); 1811 1812 /* get cpus */ 1813 cpus = pool_query_resource_components(conf, pset, &num, NULL); 1814 if (cpus == NULL) 1815 goto err; 1816 1817 /* Make sure there is space for cpu id list */ 1818 if (num > ctl->zsctl_cpu_ncache) { 1819 if ((cache = (processorid_t *)realloc( 1820 ctl->zsctl_cpu_cache, num * 1821 sizeof (processorid_t))) != NULL) { 1822 ctl->zsctl_cpu_ncache = num; 1823 ctl->zsctl_cpu_cache = cache; 1824 } else { 1825 /* 1826 * Could not allocate to get new cpu list. 1827 */ 1828 zsd_warn(gettext( 1829 "Could not allocate for cpu list")); 1830 goto err; 1831 } 1832 } 1833 1834 /* count the online cpus */ 1835 for (i = 0; i < num; i++) { 1836 if (pool_get_property(conf, pool_component_to_elem( 1837 conf, cpus[i]), "cpu.status", vals[0]) != POC_STRING || 1838 pool_value_get_string(vals[0], &string) != PO_SUCCESS) 1839 goto err; 1840 1841 if (strcmp(string, "on-line") != 0 && 1842 strcmp(string, "no-intr") != 0) 1843 continue; 1844 1845 if (pool_get_property(conf, pool_component_to_elem( 1846 conf, cpus[i]), "cpu.sys_id", vals[0]) != POC_INT || 1847 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS) 1848 goto err; 1849 1850 (*online)++; 1851 ctl->zsctl_cpu_cache[i] = (psetid_t)int64; 1852 } 1853 free(cpus); 1854 return (0); 1855 err: 1856 if (res_list != NULL) 1857 free(res_list); 1858 if (cpus != NULL) 1859 free(cpus); 1860 1861 /* 1862 * The pools operations should succeed since the conf is a consistent 1863 * snapshot. Tell caller there is no need to retry. 1864 */ 1865 errno = EINVAL; 1866 return (-1); 1867 } 1868 1869 /* 1870 * Update the current list of processor sets. 1871 * This also updates the list of online cpus, and each cpu's pset membership. 1872 */ 1873 static void 1874 zsd_refresh_psets(zsd_ctl_t *ctl) 1875 { 1876 int i, j, ret, state; 1877 uint_t old, num; 1878 uint_t cputype; 1879 int64_t sys_id, importance; 1880 uint64_t online, size, min, max; 1881 zsd_system_t *system; 1882 zsd_pset_t *pset; 1883 zsd_cpu_t *cpu; 1884 psetid_t *cache; 1885 char psetname[ZS_PSETNAME_MAX]; 1886 processorid_t cpuid; 1887 pool_value_t *pv_save = NULL; 1888 pool_resource_t **res_list = NULL; 1889 pool_resource_t *res; 1890 pool_value_t **vals; 1891 pool_conf_t *conf; 1892 boolean_t roll_cpus = B_TRUE; 1893 1894 /* Zero cpu counters to recount them */ 1895 system = ctl->zsctl_system; 1896 system->zss_ncpus = 0; 1897 system->zss_ncpus_online = 0; 1898 retry: 1899 ret = pool_get_status(&state); 1900 if (ret == 0 && state == POOL_ENABLED) { 1901 1902 conf = ctl->zsctl_pool_conf; 1903 vals = ctl->zsctl_pool_vals; 1904 pv_save = vals[1]; 1905 vals[1] = NULL; 1906 1907 if (ctl->zsctl_pool_status == POOL_DISABLED) { 1908 if (pool_conf_open(ctl->zsctl_pool_conf, 1909 pool_dynamic_location(), PO_RDONLY) == 0) { 1910 ctl->zsctl_pool_status = POOL_ENABLED; 1911 ctl->zsctl_pool_changed = POU_PSET; 1912 } 1913 } else { 1914 ctl->zsctl_pool_changed = 0; 1915 ret = pool_conf_update(ctl->zsctl_pool_conf, 1916 &(ctl->zsctl_pool_changed)); 1917 if (ret < 0) { 1918 /* Pools must have become disabled */ 1919 (void) pool_conf_close(ctl->zsctl_pool_conf); 1920 ctl->zsctl_pool_status = POOL_DISABLED; 1921 if (pool_error() == POE_SYSTEM && errno == 1922 ENOTACTIVE) 1923 goto retry; 1924 1925 zsd_warn(gettext( 1926 "Unable to update pool configuration")); 1927 /* Not able to get pool info. Don't update. */ 1928 goto err; 1929 } 1930 } 1931 /* Get the list of psets using libpool */ 1932 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS) 1933 goto err; 1934 1935 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS) 1936 goto err; 1937 if ((res_list = pool_query_resources(conf, &num, vals)) 1938 == NULL) 1939 goto err; 1940 1941 if (num > ctl->zsctl_pset_ncache) { 1942 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache, 1943 (num) * sizeof (psetid_t))) == NULL) { 1944 goto err; 1945 } 1946 ctl->zsctl_pset_ncache = num; 1947 ctl->zsctl_pset_cache = cache; 1948 } 1949 /* Save the pset id of each pset */ 1950 for (i = 0; i < num; i++) { 1951 res = res_list[i]; 1952 if (pool_get_property(conf, pool_resource_to_elem(conf, 1953 res), "pset.sys_id", vals[0]) != POC_INT || 1954 pool_value_get_int64(vals[0], &sys_id) 1955 != PO_SUCCESS) 1956 goto err; 1957 ctl->zsctl_pset_cache[i] = (int)sys_id; 1958 } 1959 vals[1] = pv_save; 1960 pv_save = NULL; 1961 } else { 1962 if (ctl->zsctl_pool_status == POOL_ENABLED) { 1963 (void) pool_conf_close(ctl->zsctl_pool_conf); 1964 ctl->zsctl_pool_status = POOL_DISABLED; 1965 } 1966 /* Get the pset list using legacy psets */ 1967 for (;;) { 1968 old = num = ctl->zsctl_pset_ncache; 1969 (void) pset_list(ctl->zsctl_pset_cache, &num); 1970 if ((num + 1) <= old) { 1971 break; 1972 } 1973 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache, 1974 (num + 1) * sizeof (psetid_t))) != NULL) { 1975 ctl->zsctl_pset_ncache = num + 1; 1976 ctl->zsctl_pset_cache = cache; 1977 } else { 1978 /* 1979 * Could not allocate to get new pset list. 1980 * Give up 1981 */ 1982 return; 1983 } 1984 } 1985 /* Add the default pset to list */ 1986 ctl->zsctl_pset_cache[num] = ctl->zsctl_pset_cache[0]; 1987 ctl->zsctl_pset_cache[0] = ZS_PSET_DEFAULT; 1988 num++; 1989 } 1990 psets_changed: 1991 zsd_mark_cpus_start(ctl, roll_cpus); 1992 zsd_mark_psets_start(ctl); 1993 roll_cpus = B_FALSE; 1994 1995 /* Refresh cpu membership of all psets */ 1996 for (i = 0; i < num; i++) { 1997 1998 /* Get pool pset information */ 1999 sys_id = ctl->zsctl_pset_cache[i]; 2000 if (zsd_get_pool_pset(ctl, sys_id, psetname, sizeof (psetname), 2001 &cputype, &online, &size, &min, &max, &importance) 2002 != 0) { 2003 if (errno == EINTR) 2004 goto psets_changed; 2005 zsd_warn(gettext("Failed to get info for pset %d"), 2006 sys_id); 2007 continue; 2008 } 2009 2010 system->zss_ncpus += size; 2011 system->zss_ncpus_online += online; 2012 2013 pset = zsd_lookup_insert_pset(ctl, psetname, 2014 ctl->zsctl_pset_cache[i]); 2015 2016 /* update pset info */ 2017 zsd_mark_pset_found(pset, cputype, online, size, min, 2018 max, importance); 2019 2020 /* update each cpu in pset */ 2021 for (j = 0; j < pset->zsp_online; j++) { 2022 cpuid = ctl->zsctl_cpu_cache[j]; 2023 cpu = zsd_lookup_insert_cpu(ctl, cpuid); 2024 zsd_mark_cpu_found(cpu, pset, sys_id); 2025 } 2026 } 2027 err: 2028 if (res_list != NULL) 2029 free(res_list); 2030 if (pv_save != NULL) 2031 vals[1] = pv_save; 2032 } 2033 2034 2035 2036 /* 2037 * Fetch the current pool and pset name for the given zone. 2038 */ 2039 static void 2040 zsd_get_zone_pool_pset(zsd_ctl_t *ctl, zsd_zone_t *zone, 2041 char *pool, int poollen, char *pset, int psetlen, uint_t *cputype) 2042 { 2043 poolid_t poolid; 2044 pool_t **pools = NULL; 2045 pool_resource_t **res_list = NULL; 2046 char poolname[ZS_POOLNAME_MAX]; 2047 char psetname[ZS_PSETNAME_MAX]; 2048 pool_conf_t *conf = ctl->zsctl_pool_conf; 2049 pool_value_t *pv_save = NULL; 2050 pool_value_t **vals = ctl->zsctl_pool_vals; 2051 const char *string; 2052 int ret; 2053 int64_t int64; 2054 uint_t num; 2055 2056 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_POOLID, 2057 &poolid, sizeof (poolid)); 2058 if (ret < 0) 2059 goto lookup_done; 2060 2061 pv_save = vals[1]; 2062 vals[1] = NULL; 2063 pools = NULL; 2064 res_list = NULL; 2065 2066 /* Default values if lookup fails */ 2067 (void) strlcpy(poolname, "pool_default", sizeof (poolname)); 2068 (void) strlcpy(psetname, "pset_default", sizeof (poolname)); 2069 *cputype = ZS_CPUTYPE_DEFAULT_PSET; 2070 2071 /* no dedicated cpu if pools are disabled */ 2072 if (ctl->zsctl_pool_status == POOL_DISABLED) 2073 goto lookup_done; 2074 2075 /* Get the pool name using the id */ 2076 pool_value_set_int64(vals[0], poolid); 2077 if (pool_value_set_name(vals[0], "pool.sys_id") != PO_SUCCESS) 2078 goto lookup_done; 2079 2080 if ((pools = pool_query_pools(conf, &num, vals)) == NULL) 2081 goto lookup_done; 2082 2083 if (num != 1) 2084 goto lookup_done; 2085 2086 if (pool_get_property(conf, pool_to_elem(conf, pools[0]), 2087 "pool.name", vals[0]) != POC_STRING || 2088 pool_value_get_string(vals[0], &string) != PO_SUCCESS) 2089 goto lookup_done; 2090 (void) strlcpy(poolname, (char *)string, sizeof (poolname)); 2091 2092 /* Get the name of the pset for the pool */ 2093 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS) 2094 goto lookup_done; 2095 2096 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS) 2097 goto lookup_done; 2098 2099 if ((res_list = pool_query_pool_resources(conf, pools[0], &num, vals)) 2100 == NULL) 2101 goto lookup_done; 2102 2103 if (num != 1) 2104 goto lookup_done; 2105 2106 if (pool_get_property(conf, pool_resource_to_elem(conf, 2107 res_list[0]), "pset.sys_id", vals[0]) != POC_INT || 2108 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS) 2109 goto lookup_done; 2110 2111 if (int64 == ZS_PSET_DEFAULT) 2112 *cputype = ZS_CPUTYPE_DEFAULT_PSET; 2113 2114 if (pool_get_property(conf, pool_resource_to_elem(conf, 2115 res_list[0]), "pset.name", vals[0]) != POC_STRING || 2116 pool_value_get_string(vals[0], &string) != PO_SUCCESS) 2117 goto lookup_done; 2118 2119 (void) strlcpy(psetname, (char *)string, sizeof (psetname)); 2120 2121 if (strncmp(psetname, "SUNWtmp_", strlen("SUNWtmp_")) == 0) 2122 *cputype = ZS_CPUTYPE_DEDICATED; 2123 if (strncmp(psetname, "SUNW_legacy_", strlen("SUNW_legacy_")) == 0) 2124 *cputype = ZS_CPUTYPE_PSRSET_PSET; 2125 else 2126 *cputype = ZS_CPUTYPE_POOL_PSET; 2127 2128 lookup_done: 2129 2130 if (pv_save != NULL) 2131 vals[1] = pv_save; 2132 2133 if (res_list) 2134 free(res_list); 2135 if (pools) 2136 free(pools); 2137 2138 (void) strlcpy(pool, poolname, poollen); 2139 (void) strlcpy(pset, psetname, psetlen); 2140 } 2141 2142 /* Convert scheduler names to ZS_* scheduler flags */ 2143 static uint_t 2144 zsd_schedname2int(char *clname, int pri) 2145 { 2146 uint_t sched = 0; 2147 2148 if (strcmp(clname, "TS") == 0) { 2149 sched = ZS_SCHED_TS; 2150 } else if (strcmp(clname, "IA") == 0) { 2151 sched = ZS_SCHED_IA; 2152 } else if (strcmp(clname, "FX") == 0) { 2153 if (pri > 59) { 2154 sched = ZS_SCHED_FX_60; 2155 } else { 2156 sched = ZS_SCHED_FX; 2157 } 2158 } else if (strcmp(clname, "RT") == 0) { 2159 sched = ZS_SCHED_RT; 2160 2161 } else if (strcmp(clname, "FSS") == 0) { 2162 sched = ZS_SCHED_FSS; 2163 } 2164 return (sched); 2165 } 2166 2167 static uint64_t 2168 zsd_get_zone_rctl_limit(char *name) 2169 { 2170 rctlblk_t *rblk; 2171 2172 rblk = (rctlblk_t *)alloca(rctlblk_size()); 2173 if (getrctl(name, NULL, rblk, RCTL_FIRST) 2174 != 0) { 2175 return (ZS_LIMIT_NONE); 2176 } 2177 return (rctlblk_get_value(rblk)); 2178 } 2179 2180 static uint64_t 2181 zsd_get_zone_rctl_usage(char *name) 2182 { 2183 rctlblk_t *rblk; 2184 2185 rblk = (rctlblk_t *)alloca(rctlblk_size()); 2186 if (getrctl(name, NULL, rblk, RCTL_USAGE) 2187 != 0) { 2188 return (0); 2189 } 2190 return (rctlblk_get_value(rblk)); 2191 } 2192 2193 #define ZSD_NUM_RCTL_VALS 19 2194 2195 /* 2196 * Fetch the limit information for a zone. This uses zone_enter() as the 2197 * getrctl(2) system call only returns rctl information for the zone of 2198 * the caller. 2199 */ 2200 static int 2201 zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares, 2202 uint64_t *cpu_cap, uint64_t *ram_cap, uint64_t *locked_cap, 2203 uint64_t *vm_cap, uint64_t *processes_cap, uint64_t *processes, 2204 uint64_t *lwps_cap, uint64_t *lwps, uint64_t *shm_cap, uint64_t *shm, 2205 uint64_t *shmids_cap, uint64_t *shmids, uint64_t *semids_cap, 2206 uint64_t *semids, uint64_t *msgids_cap, uint64_t *msgids, 2207 uint64_t *lofi_cap, uint64_t *lofi, uint_t *sched) 2208 { 2209 int p[2], pid, tmpl_fd, ret; 2210 ctid_t ct; 2211 char class[PC_CLNMSZ]; 2212 uint64_t vals[ZSD_NUM_RCTL_VALS]; 2213 zsd_system_t *sys = ctl->zsctl_system; 2214 int i = 0; 2215 int res = 0; 2216 2217 /* Treat all caps as no cap on error */ 2218 *cpu_shares = ZS_LIMIT_NONE; 2219 *cpu_cap = ZS_LIMIT_NONE; 2220 *ram_cap = ZS_LIMIT_NONE; 2221 *locked_cap = ZS_LIMIT_NONE; 2222 *vm_cap = ZS_LIMIT_NONE; 2223 2224 *processes_cap = ZS_LIMIT_NONE; 2225 *lwps_cap = ZS_LIMIT_NONE; 2226 *shm_cap = ZS_LIMIT_NONE; 2227 *shmids_cap = ZS_LIMIT_NONE; 2228 *semids_cap = ZS_LIMIT_NONE; 2229 *msgids_cap = ZS_LIMIT_NONE; 2230 *lofi_cap = ZS_LIMIT_NONE; 2231 2232 *processes = 0; 2233 *lwps = 0; 2234 *shm = 0; 2235 *shmids = 0; 2236 *semids = 0; 2237 *msgids = 0; 2238 *lofi = 0; 2239 2240 /* Get the ram cap first since it is a zone attr */ 2241 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_PHYS_MCAP, 2242 ram_cap, sizeof (*ram_cap)); 2243 if (ret < 0 || *ram_cap == 0) 2244 *ram_cap = ZS_LIMIT_NONE; 2245 2246 /* Get the zone's default scheduling class */ 2247 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_SCHED_CLASS, 2248 class, sizeof (class)); 2249 if (ret < 0) 2250 return (-1); 2251 2252 *sched = zsd_schedname2int(class, 0); 2253 2254 /* rctl caps must be fetched from within the zone */ 2255 if (pipe(p) != 0) 2256 return (-1); 2257 2258 if ((tmpl_fd = init_template()) == -1) { 2259 (void) close(p[0]); 2260 (void) close(p[1]); 2261 return (-1); 2262 } 2263 pid = forkx(0); 2264 if (pid < 0) { 2265 (void) ct_tmpl_clear(tmpl_fd); 2266 (void) close(p[0]); 2267 (void) close(p[1]); 2268 return (-1); 2269 } 2270 if (pid == 0) { 2271 2272 (void) ct_tmpl_clear(tmpl_fd); 2273 (void) close(tmpl_fd); 2274 (void) close(p[0]); 2275 if (zone->zsz_id != getzoneid()) { 2276 if (zone_enter(zone->zsz_id) < 0) { 2277 (void) close(p[1]); 2278 _exit(0); 2279 } 2280 } 2281 2282 /* Get caps for zone, and write them to zonestatd parent. */ 2283 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-shares"); 2284 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-cap"); 2285 vals[i++] = zsd_get_zone_rctl_limit("zone.max-locked-memory"); 2286 vals[i++] = zsd_get_zone_rctl_limit("zone.max-swap"); 2287 vals[i++] = zsd_get_zone_rctl_limit("zone.max-processes"); 2288 vals[i++] = zsd_get_zone_rctl_usage("zone.max-processes"); 2289 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lwps"); 2290 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lwps"); 2291 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-memory"); 2292 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-memory"); 2293 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-ids"); 2294 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-ids"); 2295 vals[i++] = zsd_get_zone_rctl_limit("zone.max-sem-ids"); 2296 vals[i++] = zsd_get_zone_rctl_usage("zone.max-sem-ids"); 2297 vals[i++] = zsd_get_zone_rctl_limit("zone.max-msg-ids"); 2298 vals[i++] = zsd_get_zone_rctl_usage("zone.max-msg-ids"); 2299 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lofi"); 2300 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lofi"); 2301 2302 if (write(p[1], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) != 2303 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) { 2304 (void) close(p[1]); 2305 _exit(1); 2306 } 2307 2308 (void) close(p[1]); 2309 _exit(0); 2310 } 2311 if (contract_latest(&ct) == -1) 2312 ct = -1; 2313 2314 (void) ct_tmpl_clear(tmpl_fd); 2315 (void) close(tmpl_fd); 2316 (void) close(p[1]); 2317 while (waitpid(pid, NULL, 0) != pid) 2318 ; 2319 2320 /* Read cap from child in zone */ 2321 if (read(p[0], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) != 2322 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) { 2323 res = -1; 2324 goto cleanup; 2325 } 2326 i = 0; 2327 *cpu_shares = vals[i++]; 2328 *cpu_cap = vals[i++]; 2329 *locked_cap = vals[i++]; 2330 *vm_cap = vals[i++]; 2331 *processes_cap = vals[i++]; 2332 *processes = vals[i++]; 2333 *lwps_cap = vals[i++]; 2334 *lwps = vals[i++]; 2335 *shm_cap = vals[i++]; 2336 *shm = vals[i++]; 2337 *shmids_cap = vals[i++]; 2338 *shmids = vals[i++]; 2339 *semids_cap = vals[i++]; 2340 *semids = vals[i++]; 2341 *msgids_cap = vals[i++]; 2342 *msgids = vals[i++]; 2343 *lofi_cap = vals[i++]; 2344 *lofi = vals[i++]; 2345 2346 /* Interpret maximum values as no cap */ 2347 if (*cpu_cap == UINT32_MAX || *cpu_cap == 0) 2348 *cpu_cap = ZS_LIMIT_NONE; 2349 if (*processes_cap == sys->zss_processes_max) 2350 *processes_cap = ZS_LIMIT_NONE; 2351 if (*lwps_cap == sys->zss_lwps_max) 2352 *lwps_cap = ZS_LIMIT_NONE; 2353 if (*shm_cap == sys->zss_shm_max) 2354 *shm_cap = ZS_LIMIT_NONE; 2355 if (*shmids_cap == sys->zss_shmids_max) 2356 *shmids_cap = ZS_LIMIT_NONE; 2357 if (*semids_cap == sys->zss_semids_max) 2358 *semids_cap = ZS_LIMIT_NONE; 2359 if (*msgids_cap == sys->zss_msgids_max) 2360 *msgids_cap = ZS_LIMIT_NONE; 2361 if (*lofi_cap == sys->zss_lofi_max) 2362 *lofi_cap = ZS_LIMIT_NONE; 2363 2364 2365 cleanup: 2366 (void) close(p[0]); 2367 (void) ct_tmpl_clear(tmpl_fd); 2368 (void) close(tmpl_fd); 2369 (void) contract_abandon_id(ct); 2370 2371 return (res); 2372 } 2373 2374 /* Update the current list of running zones */ 2375 static void 2376 zsd_refresh_zones(zsd_ctl_t *ctl) 2377 { 2378 zsd_zone_t *zone; 2379 uint_t old, num; 2380 ushort_t flags; 2381 int i, ret; 2382 zoneid_t *cache; 2383 uint64_t cpu_shares; 2384 uint64_t cpu_cap; 2385 uint64_t ram_cap; 2386 uint64_t locked_cap; 2387 uint64_t vm_cap; 2388 uint64_t processes_cap; 2389 uint64_t processes; 2390 uint64_t lwps_cap; 2391 uint64_t lwps; 2392 uint64_t shm_cap; 2393 uint64_t shm; 2394 uint64_t shmids_cap; 2395 uint64_t shmids; 2396 uint64_t semids_cap; 2397 uint64_t semids; 2398 uint64_t msgids_cap; 2399 uint64_t msgids; 2400 uint64_t lofi_cap; 2401 uint64_t lofi; 2402 2403 char zonename[ZS_ZONENAME_MAX]; 2404 char poolname[ZS_POOLNAME_MAX]; 2405 char psetname[ZS_PSETNAME_MAX]; 2406 uint_t sched; 2407 uint_t cputype; 2408 uint_t iptype; 2409 2410 /* Get the current list of running zones */ 2411 for (;;) { 2412 old = num = ctl->zsctl_zone_ncache; 2413 (void) zone_list(ctl->zsctl_zone_cache, &num); 2414 if (num <= old) 2415 break; 2416 if ((cache = (zoneid_t *)realloc(ctl->zsctl_zone_cache, 2417 (num) * sizeof (zoneid_t))) != NULL) { 2418 ctl->zsctl_zone_ncache = num; 2419 ctl->zsctl_zone_cache = cache; 2420 } else { 2421 /* Could not allocate to get new zone list. Give up */ 2422 return; 2423 } 2424 } 2425 2426 zsd_mark_zones_start(ctl); 2427 2428 for (i = 0; i < num; i++) { 2429 2430 ret = getzonenamebyid(ctl->zsctl_zone_cache[i], 2431 zonename, sizeof (zonename)); 2432 if (ret < 0) 2433 continue; 2434 2435 zone = zsd_lookup_insert_zone(ctl, zonename, 2436 ctl->zsctl_zone_cache[i]); 2437 2438 ret = zone_getattr(ctl->zsctl_zone_cache[i], ZONE_ATTR_FLAGS, 2439 &flags, sizeof (flags)); 2440 if (ret < 0) 2441 continue; 2442 2443 if (flags & ZF_NET_EXCL) 2444 iptype = ZS_IPTYPE_EXCLUSIVE; 2445 else 2446 iptype = ZS_IPTYPE_SHARED; 2447 2448 zsd_get_zone_pool_pset(ctl, zone, poolname, sizeof (poolname), 2449 psetname, sizeof (psetname), &cputype); 2450 2451 if (zsd_get_zone_caps(ctl, zone, &cpu_shares, &cpu_cap, 2452 &ram_cap, &locked_cap, &vm_cap, &processes_cap, &processes, 2453 &lwps_cap, &lwps, &shm_cap, &shm, &shmids_cap, &shmids, 2454 &semids_cap, &semids, &msgids_cap, &msgids, &lofi_cap, 2455 &lofi, &sched) != 0) 2456 continue; 2457 2458 zsd_mark_zone_found(ctl, zone, cpu_shares, cpu_cap, ram_cap, 2459 locked_cap, vm_cap, processes_cap, processes, lwps_cap, 2460 lwps, shm_cap, shm, shmids_cap, shmids, semids_cap, 2461 semids, msgids_cap, msgids, lofi_cap, lofi, poolname, 2462 psetname, sched, cputype, iptype); 2463 } 2464 } 2465 2466 /* Fetch the details of a process from its psinfo_t */ 2467 static void 2468 zsd_get_proc_info(zsd_ctl_t *ctl, psinfo_t *psinfo, psetid_t *psetid, 2469 psetid_t *prev_psetid, zoneid_t *zoneid, zoneid_t *prev_zoneid, 2470 timestruc_t *delta, uint_t *sched) 2471 { 2472 timestruc_t d; 2473 zsd_proc_t *proc; 2474 2475 /* Get cached data for proc */ 2476 proc = &(ctl->zsctl_proc_array[psinfo->pr_pid]); 2477 *psetid = psinfo->pr_lwp.pr_bindpset; 2478 2479 if (proc->zspr_psetid == ZS_PSET_ERROR) 2480 *prev_psetid = *psetid; 2481 else 2482 *prev_psetid = proc->zspr_psetid; 2483 2484 *zoneid = psinfo->pr_zoneid; 2485 if (proc->zspr_zoneid == -1) 2486 *prev_zoneid = *zoneid; 2487 else 2488 *prev_zoneid = proc->zspr_zoneid; 2489 2490 TIMESTRUC_DELTA(d, psinfo->pr_time, proc->zspr_usage); 2491 *delta = d; 2492 2493 *sched = zsd_schedname2int(psinfo->pr_lwp.pr_clname, 2494 psinfo->pr_lwp.pr_pri); 2495 2496 /* Update cached data for proc */ 2497 proc->zspr_psetid = psinfo->pr_lwp.pr_bindpset; 2498 proc->zspr_zoneid = psinfo->pr_zoneid; 2499 proc->zspr_sched = *sched; 2500 proc->zspr_usage.tv_sec = psinfo->pr_time.tv_sec; 2501 proc->zspr_usage.tv_nsec = psinfo->pr_time.tv_nsec; 2502 proc->zspr_ppid = psinfo->pr_ppid; 2503 } 2504 2505 /* 2506 * Reset the known cpu usage of a process. This is done after a process 2507 * exits so that if the pid is recycled, data from its previous life is 2508 * not reused 2509 */ 2510 static void 2511 zsd_flush_proc_info(zsd_proc_t *proc) 2512 { 2513 proc->zspr_usage.tv_sec = 0; 2514 proc->zspr_usage.tv_nsec = 0; 2515 } 2516 2517 /* 2518 * Open the current extended accounting file. On initialization, open the 2519 * file as the current file to be used. Otherwise, open the file as the 2520 * next file to use of the current file reaches EOF. 2521 */ 2522 static int 2523 zsd_open_exacct(zsd_ctl_t *ctl, boolean_t init) 2524 { 2525 int ret, oret, state, trys = 0, flags; 2526 int *fd, *open; 2527 ea_file_t *eaf; 2528 struct stat64 *stat; 2529 char path[MAXPATHLEN]; 2530 2531 /* 2532 * The accounting file is first opened at the tail. Following 2533 * opens to new accounting files are opened at the head. 2534 */ 2535 if (init == B_TRUE) { 2536 flags = EO_NO_VALID_HDR | EO_TAIL; 2537 fd = &ctl->zsctl_proc_fd; 2538 eaf = &ctl->zsctl_proc_eaf; 2539 stat = &ctl->zsctl_proc_stat; 2540 open = &ctl->zsctl_proc_open; 2541 } else { 2542 flags = EO_NO_VALID_HDR | EO_HEAD; 2543 fd = &ctl->zsctl_proc_fd_next; 2544 eaf = &ctl->zsctl_proc_eaf_next; 2545 stat = &ctl->zsctl_proc_stat_next; 2546 open = &ctl->zsctl_proc_open_next; 2547 } 2548 2549 *fd = -1; 2550 *open = 0; 2551 retry: 2552 /* open accounting files for cpu consumption */ 2553 ret = acctctl(AC_STATE_GET | AC_PROC, &state, sizeof (state)); 2554 if (ret != 0) { 2555 zsd_warn(gettext("Unable to get process accounting state")); 2556 goto err; 2557 } 2558 if (state != AC_ON) { 2559 if (trys > 0) { 2560 zsd_warn(gettext( 2561 "Unable to enable process accounting")); 2562 goto err; 2563 } 2564 (void) zsd_enable_cpu_stats(); 2565 trys++; 2566 goto retry; 2567 } 2568 2569 ret = acctctl(AC_FILE_GET | AC_PROC, path, sizeof (path)); 2570 if (ret != 0) { 2571 zsd_warn(gettext("Unable to get process accounting file")); 2572 goto err; 2573 } 2574 2575 if ((*fd = open64(path, O_RDONLY, 0)) >= 0 && 2576 (oret = ea_fdopen(eaf, *fd, NULL, flags, O_RDONLY)) == 0) 2577 ret = fstat64(*fd, stat); 2578 2579 if (*fd < 0 || oret < 0 || ret < 0) { 2580 struct timespec ts; 2581 2582 /* 2583 * It is possible the accounting file is momentarily unavailable 2584 * because it is being rolled. Try for up to half a second. 2585 * 2586 * If failure to open accounting file persists, give up. 2587 */ 2588 if (oret == 0) 2589 (void) ea_close(eaf); 2590 else if (*fd >= 0) 2591 (void) close(*fd); 2592 if (trys > 500) { 2593 zsd_warn(gettext( 2594 "Unable to open process accounting file")); 2595 goto err; 2596 } 2597 /* wait one millisecond */ 2598 ts.tv_sec = 0; 2599 ts.tv_nsec = NANOSEC / 1000; 2600 (void) nanosleep(&ts, NULL); 2601 goto retry; 2602 } 2603 *open = 1; 2604 return (0); 2605 err: 2606 if (*fd >= 0) 2607 (void) close(*fd); 2608 *open = 0; 2609 *fd = -1; 2610 return (-1); 2611 } 2612 2613 /* 2614 * Walk /proc and charge each process to its zone and processor set. 2615 * Then read exacct data for exited processes, and charge them as well. 2616 */ 2617 static void 2618 zsd_refresh_procs(zsd_ctl_t *ctl, boolean_t init) 2619 { 2620 DIR *dir; 2621 struct dirent *dent; 2622 psinfo_t psinfo; 2623 int fd, ret; 2624 zsd_proc_t *proc, *pproc, *tmp, *next; 2625 list_t pplist, plist; 2626 zsd_zone_t *zone, *prev_zone; 2627 zsd_pset_t *pset, *prev_pset; 2628 psetid_t psetid, prev_psetid; 2629 zoneid_t zoneid, prev_zoneid; 2630 zsd_pset_usage_t *usage, *prev_usage; 2631 char path[MAXPATHLEN]; 2632 2633 ea_object_t object; 2634 ea_object_t pobject; 2635 boolean_t hrtime_expired = B_FALSE; 2636 struct timeval interval_end; 2637 2638 timestruc_t delta, d1, d2; 2639 uint_t sched = 0; 2640 2641 /* 2642 * Get the current accounting file. The current accounting file 2643 * may be different than the file in use, as the accounting file 2644 * may have been rolled, or manually changed by an admin. 2645 */ 2646 ret = zsd_open_exacct(ctl, init); 2647 if (ret != 0) { 2648 zsd_warn(gettext("Unable to track process accounting")); 2649 return; 2650 } 2651 2652 /* 2653 * Mark the current time as the interval end time. Don't track 2654 * processes that exit after this time. 2655 */ 2656 (void) gettimeofday(&interval_end, NULL); 2657 2658 dir = opendir("/proc"); 2659 if (dir == NULL) { 2660 zsd_warn(gettext("Unable to open /proc")); 2661 return; 2662 } 2663 2664 dent = ctl->zsctl_procfs_dent; 2665 2666 (void) memset(dent, 0, ctl->zsctl_procfs_dent_size); 2667 2668 /* Walk all processes and compute each zone's usage on each pset. */ 2669 while (readdir_r(dir, dent) != 0) { 2670 2671 if (strcmp(dent->d_name, ".") == 0 || 2672 strcmp(dent->d_name, "..") == 0) 2673 continue; 2674 2675 (void) snprintf(path, sizeof (path), "/proc/%s/psinfo", 2676 dent->d_name); 2677 2678 fd = open(path, O_RDONLY); 2679 if (fd < 0) 2680 continue; 2681 2682 if (read(fd, &psinfo, sizeof (psinfo)) != sizeof (psinfo)) { 2683 (void) close(fd); 2684 continue; 2685 } 2686 (void) close(fd); 2687 2688 zsd_get_proc_info(ctl, &psinfo, &psetid, &prev_psetid, 2689 &zoneid, &prev_zoneid, &delta, &sched); 2690 2691 d1.tv_sec = delta.tv_sec / 2; 2692 d1.tv_nsec = delta.tv_nsec / 2; 2693 d2.tv_sec = (delta.tv_sec / 2) + (delta.tv_sec % 2); 2694 d2.tv_nsec = (delta.tv_nsec / 2) + (delta.tv_nsec % 2); 2695 2696 /* Get the zone and pset this process is running in */ 2697 zone = zsd_lookup_zone_byid(ctl, zoneid); 2698 if (zone == NULL) 2699 continue; 2700 pset = zsd_lookup_pset_byid(ctl, psetid); 2701 if (pset == NULL) 2702 continue; 2703 usage = zsd_lookup_insert_usage(ctl, pset, zone); 2704 if (usage == NULL) 2705 continue; 2706 2707 /* 2708 * Get the usage of the previous zone and pset if they were 2709 * different. 2710 */ 2711 if (zoneid != prev_zoneid) 2712 prev_zone = zsd_lookup_zone_byid(ctl, prev_zoneid); 2713 else 2714 prev_zone = NULL; 2715 2716 if (psetid != prev_psetid) 2717 prev_pset = zsd_lookup_pset_byid(ctl, prev_psetid); 2718 else 2719 prev_pset = NULL; 2720 2721 prev_usage = NULL; 2722 if (prev_zone != NULL || prev_pset != NULL) { 2723 if (prev_zone == NULL) 2724 prev_zone = zone; 2725 if (prev_pset == NULL) 2726 prev_pset = pset; 2727 2728 prev_usage = zsd_lookup_insert_usage(ctl, prev_pset, 2729 prev_zone); 2730 } 2731 2732 /* Update the usage with the processes info */ 2733 if (prev_usage == NULL) { 2734 zsd_mark_pset_usage_found(usage, sched); 2735 } else { 2736 zsd_mark_pset_usage_found(usage, sched); 2737 zsd_mark_pset_usage_found(prev_usage, sched); 2738 } 2739 2740 /* 2741 * First time around is just to get a starting point. All 2742 * usages will be zero. 2743 */ 2744 if (init == B_TRUE) 2745 continue; 2746 2747 if (prev_usage == NULL) { 2748 zsd_add_usage(ctl, usage, &delta); 2749 } else { 2750 zsd_add_usage(ctl, usage, &d1); 2751 zsd_add_usage(ctl, prev_usage, &d2); 2752 } 2753 } 2754 (void) closedir(dir); 2755 2756 /* 2757 * No need to collect exited proc data on initialization. Just 2758 * caching the usage of the known processes to get a zero starting 2759 * point. 2760 */ 2761 if (init == B_TRUE) 2762 return; 2763 2764 /* 2765 * Add accounting records to account for processes which have 2766 * exited. 2767 */ 2768 list_create(&plist, sizeof (zsd_proc_t), 2769 offsetof(zsd_proc_t, zspr_next)); 2770 list_create(&pplist, sizeof (zsd_proc_t), 2771 offsetof(zsd_proc_t, zspr_next)); 2772 2773 for (;;) { 2774 pid_t pid; 2775 pid_t ppid; 2776 timestruc_t user, sys, proc_usage; 2777 timestruc_t finish; 2778 int numfound = 0; 2779 2780 bzero(&object, sizeof (object)); 2781 proc = NULL; 2782 zone = NULL; 2783 pset = NULL; 2784 usage = NULL; 2785 ret = ea_get_object(&ctl->zsctl_proc_eaf, &object); 2786 if (ret == EO_ERROR) { 2787 if (ea_error() == EXR_EOF) { 2788 2789 struct stat64 *stat; 2790 struct stat64 *stat_next; 2791 2792 /* 2793 * See if the next accounting file is the 2794 * same as the current accounting file. 2795 */ 2796 stat = &(ctl->zsctl_proc_stat); 2797 stat_next = &(ctl->zsctl_proc_stat_next); 2798 if (stat->st_ino == stat_next->st_ino && 2799 stat->st_dev == stat_next->st_dev) { 2800 /* 2801 * End of current accounting file is 2802 * reached, so finished. Clear EOF 2803 * bit for next time around. 2804 */ 2805 ea_clear(&ctl->zsctl_proc_eaf); 2806 break; 2807 } else { 2808 /* 2809 * Accounting file has changed. Move 2810 * to current accounting file. 2811 */ 2812 (void) ea_close(&ctl->zsctl_proc_eaf); 2813 2814 ctl->zsctl_proc_fd = 2815 ctl->zsctl_proc_fd_next; 2816 ctl->zsctl_proc_eaf = 2817 ctl->zsctl_proc_eaf_next; 2818 ctl->zsctl_proc_stat = 2819 ctl->zsctl_proc_stat_next; 2820 2821 ctl->zsctl_proc_fd_next = -1; 2822 ctl->zsctl_proc_open_next = 0; 2823 continue; 2824 } 2825 } else { 2826 /* 2827 * Other accounting error. Give up on 2828 * accounting. 2829 */ 2830 goto ea_err; 2831 } 2832 } 2833 /* Skip if not a process group */ 2834 if ((object.eo_catalog & EXT_TYPE_MASK) != EXT_GROUP || 2835 (object.eo_catalog & EXD_DATA_MASK) != EXD_GROUP_PROC) { 2836 (void) ea_free_item(&object, EUP_ALLOC); 2837 continue; 2838 } 2839 2840 /* The process group entry should be complete */ 2841 while (numfound < 9) { 2842 bzero(&pobject, sizeof (pobject)); 2843 ret = ea_get_object(&ctl->zsctl_proc_eaf, 2844 &pobject); 2845 if (ret < 0) { 2846 (void) ea_free_item(&object, EUP_ALLOC); 2847 zsd_warn( 2848 "unable to get process accounting data"); 2849 goto ea_err; 2850 } 2851 /* Next entries should be process data */ 2852 if ((pobject.eo_catalog & EXT_TYPE_MASK) == 2853 EXT_GROUP) { 2854 (void) ea_free_item(&object, EUP_ALLOC); 2855 (void) ea_free_item(&pobject, EUP_ALLOC); 2856 zsd_warn( 2857 "process data of wrong type"); 2858 goto ea_err; 2859 } 2860 switch (pobject.eo_catalog & EXD_DATA_MASK) { 2861 case EXD_PROC_PID: 2862 pid = pobject.eo_item.ei_uint32; 2863 proc = &(ctl->zsctl_proc_array[pid]); 2864 /* 2865 * This process should not be currently in 2866 * the list of processes to process. 2867 */ 2868 assert(!list_link_active(&proc->zspr_next)); 2869 numfound++; 2870 break; 2871 case EXD_PROC_ANCPID: 2872 ppid = pobject.eo_item.ei_uint32; 2873 pproc = &(ctl->zsctl_proc_array[ppid]); 2874 numfound++; 2875 break; 2876 case EXD_PROC_ZONENAME: 2877 zone = zsd_lookup_zone(ctl, 2878 pobject.eo_item.ei_string, -1); 2879 numfound++; 2880 break; 2881 case EXD_PROC_CPU_USER_SEC: 2882 user.tv_sec = 2883 pobject.eo_item.ei_uint64; 2884 numfound++; 2885 break; 2886 case EXD_PROC_CPU_USER_NSEC: 2887 user.tv_nsec = 2888 pobject.eo_item.ei_uint64; 2889 numfound++; 2890 break; 2891 case EXD_PROC_CPU_SYS_SEC: 2892 sys.tv_sec = 2893 pobject.eo_item.ei_uint64; 2894 numfound++; 2895 break; 2896 case EXD_PROC_CPU_SYS_NSEC: 2897 sys.tv_nsec = 2898 pobject.eo_item.ei_uint64; 2899 numfound++; 2900 break; 2901 case EXD_PROC_FINISH_SEC: 2902 finish.tv_sec = 2903 pobject.eo_item.ei_uint64; 2904 numfound++; 2905 break; 2906 case EXD_PROC_FINISH_NSEC: 2907 finish.tv_nsec = 2908 pobject.eo_item.ei_uint64; 2909 numfound++; 2910 break; 2911 } 2912 (void) ea_free_item(&pobject, EUP_ALLOC); 2913 } 2914 (void) ea_free_item(&object, EUP_ALLOC); 2915 if (numfound != 9) { 2916 zsd_warn(gettext( 2917 "Malformed process accounting entry found")); 2918 goto proc_done; 2919 } 2920 2921 if (finish.tv_sec > interval_end.tv_sec || 2922 (finish.tv_sec == interval_end.tv_sec && 2923 finish.tv_nsec > (interval_end.tv_usec * 1000))) 2924 hrtime_expired = B_TRUE; 2925 2926 /* 2927 * Try to identify the zone and pset to which this 2928 * exited process belongs. 2929 */ 2930 if (zone == NULL) 2931 goto proc_done; 2932 2933 /* Save proc info */ 2934 proc->zspr_ppid = ppid; 2935 proc->zspr_zoneid = zone->zsz_id; 2936 2937 prev_psetid = ZS_PSET_ERROR; 2938 sched = 0; 2939 2940 /* 2941 * The following tries to deduce the processes pset. 2942 * 2943 * First choose pset and sched using cached value from the 2944 * most recent time the process has been seen. 2945 * 2946 * pset and sched can change across zone_enter, so make sure 2947 * most recent sighting of this process was in the same 2948 * zone before using most recent known value. 2949 * 2950 * If there is no known value, use value of processes 2951 * parent. If parent is unknown, walk parents until a known 2952 * parent is found. 2953 * 2954 * If no parent in the zone is found, use the zone's default 2955 * pset and scheduling class. 2956 */ 2957 if (proc->zspr_psetid != ZS_PSET_ERROR) { 2958 prev_psetid = proc->zspr_psetid; 2959 pset = zsd_lookup_pset_byid(ctl, prev_psetid); 2960 sched = proc->zspr_sched; 2961 } else if (pproc->zspr_zoneid == zone->zsz_id && 2962 pproc->zspr_psetid != ZS_PSET_ERROR) { 2963 prev_psetid = pproc->zspr_psetid; 2964 pset = zsd_lookup_pset_byid(ctl, prev_psetid); 2965 sched = pproc->zspr_sched; 2966 } 2967 2968 if (pset == NULL) { 2969 /* 2970 * Process or processes parent has never been seen. 2971 * Save to deduce a known parent later. 2972 */ 2973 proc_usage = sys; 2974 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user); 2975 TIMESTRUC_DELTA(delta, proc_usage, 2976 proc->zspr_usage); 2977 proc->zspr_usage = delta; 2978 list_insert_tail(&plist, proc); 2979 continue; 2980 } 2981 2982 /* Add the zone's usage to the pset */ 2983 usage = zsd_lookup_insert_usage(ctl, pset, zone); 2984 if (usage == NULL) 2985 goto proc_done; 2986 2987 zsd_mark_pset_usage_found(usage, sched); 2988 2989 /* compute the usage to add for the exited proc */ 2990 proc_usage = sys; 2991 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user); 2992 TIMESTRUC_DELTA(delta, proc_usage, 2993 proc->zspr_usage); 2994 2995 zsd_add_usage(ctl, usage, &delta); 2996 proc_done: 2997 zsd_flush_proc_info(proc); 2998 2999 if (hrtime_expired == B_TRUE) 3000 break; 3001 } 3002 /* 3003 * close next accounting file. 3004 */ 3005 if (ctl->zsctl_proc_open_next) { 3006 (void) ea_close( 3007 &ctl->zsctl_proc_eaf_next); 3008 ctl->zsctl_proc_open_next = 0; 3009 ctl->zsctl_proc_fd_next = -1; 3010 } 3011 3012 /* For the remaining processes, use pset and sched of a known parent */ 3013 proc = list_head(&plist); 3014 while (proc != NULL) { 3015 next = proc; 3016 for (;;) { 3017 if (next->zspr_ppid == 0 || next->zspr_ppid == -1) { 3018 /* 3019 * Kernel process, or parent is unknown, skip 3020 * process, remove from process list. 3021 */ 3022 tmp = proc; 3023 proc = list_next(&plist, proc); 3024 list_link_init(&tmp->zspr_next); 3025 break; 3026 } 3027 pproc = &(ctl->zsctl_proc_array[next->zspr_ppid]); 3028 if (pproc->zspr_zoneid != proc->zspr_zoneid) { 3029 /* 3030 * Parent in different zone. Save process and 3031 * use zone's default pset and sched below 3032 */ 3033 tmp = proc; 3034 proc = list_next(&plist, proc); 3035 list_remove(&plist, tmp); 3036 list_insert_tail(&pplist, tmp); 3037 break; 3038 } 3039 /* Parent has unknown pset, Search parent's parent */ 3040 if (pproc->zspr_psetid == ZS_PSET_ERROR) { 3041 next = pproc; 3042 continue; 3043 } 3044 /* Found parent with known pset. Use its info */ 3045 proc->zspr_psetid = pproc->zspr_psetid; 3046 proc->zspr_sched = pproc->zspr_sched; 3047 next->zspr_psetid = pproc->zspr_psetid; 3048 next->zspr_sched = pproc->zspr_sched; 3049 zone = zsd_lookup_zone_byid(ctl, 3050 proc->zspr_zoneid); 3051 if (zone == NULL) { 3052 tmp = proc; 3053 proc = list_next(&plist, proc); 3054 list_remove(&plist, tmp); 3055 list_link_init(&tmp->zspr_next); 3056 break; 3057 } 3058 pset = zsd_lookup_pset_byid(ctl, 3059 proc->zspr_psetid); 3060 if (pset == NULL) { 3061 tmp = proc; 3062 proc = list_next(&plist, proc); 3063 list_remove(&plist, tmp); 3064 list_link_init(&tmp->zspr_next); 3065 break; 3066 } 3067 /* Add the zone's usage to the pset */ 3068 usage = zsd_lookup_insert_usage(ctl, pset, zone); 3069 if (usage == NULL) { 3070 tmp = proc; 3071 proc = list_next(&plist, proc); 3072 list_remove(&plist, tmp); 3073 list_link_init(&tmp->zspr_next); 3074 break; 3075 } 3076 zsd_mark_pset_usage_found(usage, proc->zspr_sched); 3077 zsd_add_usage(ctl, usage, &proc->zspr_usage); 3078 zsd_flush_proc_info(proc); 3079 tmp = proc; 3080 proc = list_next(&plist, proc); 3081 list_remove(&plist, tmp); 3082 list_link_init(&tmp->zspr_next); 3083 break; 3084 } 3085 } 3086 /* 3087 * Process has never been seen. Using zone info to 3088 * determine pset and scheduling class. 3089 */ 3090 proc = list_head(&pplist); 3091 while (proc != NULL) { 3092 3093 zone = zsd_lookup_zone_byid(ctl, proc->zspr_zoneid); 3094 if (zone == NULL) 3095 goto next; 3096 if (zone->zsz_psetid != ZS_PSET_ERROR && 3097 zone->zsz_psetid != ZS_PSET_MULTI) { 3098 prev_psetid = zone->zsz_psetid; 3099 pset = zsd_lookup_pset_byid(ctl, prev_psetid); 3100 } else { 3101 pset = zsd_lookup_pset(ctl, zone->zsz_pset, -1); 3102 if (pset != NULL) 3103 prev_psetid = pset->zsp_id; 3104 } 3105 if (pset == NULL) 3106 goto next; 3107 3108 sched = zone->zsz_scheds; 3109 /* 3110 * Ignore FX high scheduling class if it is not the 3111 * only scheduling class in the zone. 3112 */ 3113 if (sched != ZS_SCHED_FX_60) 3114 sched &= (~ZS_SCHED_FX_60); 3115 /* 3116 * If more than one scheduling class has been found 3117 * in the zone, use zone's default scheduling class for 3118 * this process. 3119 */ 3120 if ((sched & (sched - 1)) != 0) 3121 sched = zone->zsz_default_sched; 3122 3123 /* Add the zone's usage to the pset */ 3124 usage = zsd_lookup_insert_usage(ctl, pset, zone); 3125 if (usage == NULL) 3126 goto next; 3127 3128 zsd_mark_pset_usage_found(usage, sched); 3129 zsd_add_usage(ctl, usage, &proc->zspr_usage); 3130 next: 3131 tmp = proc; 3132 proc = list_next(&pplist, proc); 3133 zsd_flush_proc_info(tmp); 3134 list_link_init(&tmp->zspr_next); 3135 } 3136 return; 3137 ea_err: 3138 /* 3139 * Close the next accounting file if we have not transitioned to it 3140 * yet. 3141 */ 3142 if (ctl->zsctl_proc_open_next) { 3143 (void) ea_close(&ctl->zsctl_proc_eaf_next); 3144 ctl->zsctl_proc_open_next = 0; 3145 ctl->zsctl_proc_fd_next = -1; 3146 } 3147 } 3148 3149 /* 3150 * getvmusage(2) uses size_t's in the passwd data structure, which differ 3151 * in size for 32bit and 64 bit kernels. Since this is a contracted interface, 3152 * and zonestatd does not necessarily match the kernel's bitness, marshal 3153 * results appropriately. 3154 */ 3155 static int 3156 zsd_getvmusage(zsd_ctl_t *ctl, uint_t flags, time_t age, zsd_vmusage64_t *buf, 3157 uint64_t *nres) 3158 { 3159 zsd_vmusage32_t *vmu32; 3160 zsd_vmusage64_t *vmu64; 3161 uint32_t nres32; 3162 int i; 3163 int ret; 3164 3165 if (ctl->zsctl_kern_bits == 32) { 3166 nres32 = *nres; 3167 ret = syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE, 3168 flags, age, (uintptr_t)buf, (uintptr_t)&nres32); 3169 *nres = nres32; 3170 if (ret == 0 && buf != NULL) { 3171 /* 3172 * An array of vmusage32_t's has been returned. 3173 * Convert it to an array of vmusage64_t's. 3174 */ 3175 vmu32 = (zsd_vmusage32_t *)buf; 3176 vmu64 = (zsd_vmusage64_t *)buf; 3177 for (i = nres32 - 1; i >= 0; i--) { 3178 3179 vmu64[i].vmu_zoneid = vmu32[i].vmu_zoneid; 3180 vmu64[i].vmu_type = vmu32[i].vmu_type; 3181 vmu64[i].vmu_type = vmu32[i].vmu_type; 3182 vmu64[i].vmu_rss_all = vmu32[i].vmu_rss_all; 3183 vmu64[i].vmu_rss_private = 3184 vmu32[i].vmu_rss_private; 3185 vmu64[i].vmu_rss_shared = 3186 vmu32[i].vmu_rss_shared; 3187 vmu64[i].vmu_swap_all = vmu32[i].vmu_swap_all; 3188 vmu64[i].vmu_swap_private = 3189 vmu32[i].vmu_swap_private; 3190 vmu64[i].vmu_swap_shared = 3191 vmu32[i].vmu_swap_shared; 3192 } 3193 } 3194 return (ret); 3195 } else { 3196 /* 3197 * kernel is 64 bit, so use 64 bit structures as zonestat 3198 * expects. 3199 */ 3200 return (syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE, 3201 flags, age, (uintptr_t)buf, (uintptr_t)nres)); 3202 3203 } 3204 } 3205 3206 /* 3207 * Update the current physical, virtual, and locked memory usage of the 3208 * running zones. 3209 */ 3210 static void 3211 zsd_refresh_memory(zsd_ctl_t *ctl, boolean_t init) 3212 { 3213 3214 uint64_t phys_total; 3215 uint64_t phys_used; 3216 uint64_t phys_zones; 3217 uint64_t phys_zones_overcount; 3218 uint64_t phys_zones_extra; 3219 uint64_t phys_zones_credit; 3220 3221 uint64_t vm_free; 3222 uint64_t vm_used; 3223 3224 uint64_t disk_swap_total; 3225 uint64_t disk_swap_used; /* disk swap with contents */ 3226 3227 uint64_t physmem; 3228 uint64_t pp_kernel; 3229 uint64_t arc_size = 0; 3230 struct anoninfo ani; 3231 3232 int num_swap_devices; 3233 struct swaptable *swt; 3234 struct swapent *swent; 3235 size_t swt_size; 3236 char *path; 3237 3238 zsd_vmusage64_t *vmusage; 3239 uint64_t num_vmusage; 3240 3241 int i, ret; 3242 3243 zsd_system_t *sys; 3244 zsd_zone_t *zone; 3245 int vmu_nzones; 3246 3247 kstat_t *kstat; 3248 char kstat_name[KSTAT_STRLEN]; 3249 kstat_named_t *knp; 3250 kid_t kid; 3251 3252 if (init) 3253 return; 3254 3255 sys = ctl->zsctl_system; 3256 3257 /* interrogate swap devices to find the amount of disk swap */ 3258 disk_swap_again: 3259 num_swap_devices = swapctl(SC_GETNSWP, NULL); 3260 3261 if (num_swap_devices == 0) { 3262 sys->zss_swap_total = disk_swap_total = 0; 3263 sys->zss_swap_used = disk_swap_used = 0; 3264 /* No disk swap */ 3265 goto disk_swap_done; 3266 } 3267 /* see if swap table needs to be larger */ 3268 if (num_swap_devices > ctl->zsctl_swap_cache_num) { 3269 swt_size = sizeof (int) + 3270 (num_swap_devices * sizeof (struct swapent)) + 3271 (num_swap_devices * MAXPATHLEN); 3272 if (ctl->zsctl_swap_cache != NULL) 3273 free(ctl->zsctl_swap_cache); 3274 3275 swt = (struct swaptable *)malloc(swt_size); 3276 if (swt == NULL) { 3277 /* 3278 * Could not allocate to get list of swap devices. 3279 * Just use data from the most recent read, which will 3280 * be zero if this is the first read. 3281 */ 3282 zsd_warn(gettext("Unable to allocate to determine " 3283 "virtual memory")); 3284 disk_swap_total = sys->zss_swap_total; 3285 disk_swap_used = sys->zss_swap_used; 3286 goto disk_swap_done; 3287 } 3288 swent = swt->swt_ent; 3289 path = (char *)swt + (sizeof (int) + 3290 num_swap_devices * sizeof (swapent_t)); 3291 for (i = 0; i < num_swap_devices; i++, swent++) { 3292 swent->ste_path = path; 3293 path += MAXPATHLEN; 3294 } 3295 swt->swt_n = num_swap_devices; 3296 ctl->zsctl_swap_cache = swt; 3297 ctl->zsctl_swap_cache_size = swt_size; 3298 ctl->zsctl_swap_cache_num = num_swap_devices; 3299 } 3300 num_swap_devices = swapctl(SC_LIST, ctl->zsctl_swap_cache); 3301 if (num_swap_devices < 0) { 3302 /* More swap devices have arrived */ 3303 if (errno == ENOMEM) 3304 goto disk_swap_again; 3305 3306 zsd_warn(gettext("Unable to determine disk swap devices")); 3307 /* Unexpected error. Use existing data */ 3308 disk_swap_total = sys->zss_swap_total; 3309 disk_swap_used = sys->zss_swap_used; 3310 goto disk_swap_done; 3311 } 3312 3313 /* add up the disk swap */ 3314 disk_swap_total = 0; 3315 disk_swap_used = 0; 3316 swent = ctl->zsctl_swap_cache->swt_ent; 3317 for (i = 0; i < num_swap_devices; i++, swent++) { 3318 disk_swap_total += swent->ste_pages; 3319 disk_swap_used += (swent->ste_pages - swent->ste_free); 3320 } 3321 disk_swap_total *= ctl->zsctl_pagesize; 3322 disk_swap_used *= ctl->zsctl_pagesize; 3323 3324 sys->zss_swap_total = disk_swap_total; 3325 sys->zss_swap_used = disk_swap_used; 3326 3327 disk_swap_done: 3328 3329 /* get system pages kstat */ 3330 kid = -1; 3331 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "unix", 0, "system_pages"); 3332 if (kstat == NULL) 3333 zsd_warn(gettext("Unable to lookup system pages kstat")); 3334 else 3335 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL); 3336 3337 if (kid == -1) { 3338 zsd_warn(gettext("Unable to read system pages kstat")); 3339 return; 3340 } else { 3341 knp = kstat_data_lookup(kstat, "physmem"); 3342 if (knp == NULL) { 3343 zsd_warn(gettext("Unable to read physmem")); 3344 } else { 3345 if (knp->data_type == KSTAT_DATA_UINT64) 3346 physmem = knp->value.ui64; 3347 else if (knp->data_type == KSTAT_DATA_UINT32) 3348 physmem = knp->value.ui32; 3349 else 3350 return; 3351 } 3352 knp = kstat_data_lookup(kstat, "pp_kernel"); 3353 if (knp == NULL) { 3354 zsd_warn(gettext("Unable to read pp_kernel")); 3355 } else { 3356 if (knp->data_type == KSTAT_DATA_UINT64) 3357 pp_kernel = knp->value.ui64; 3358 else if (knp->data_type == KSTAT_DATA_UINT32) 3359 pp_kernel = knp->value.ui32; 3360 else 3361 return; 3362 } 3363 } 3364 physmem *= ctl->zsctl_pagesize; 3365 pp_kernel *= ctl->zsctl_pagesize; 3366 3367 /* get the zfs arc size if available */ 3368 arc_size = 0; 3369 kid = -1; 3370 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "zfs", 0, "arcstats"); 3371 if (kstat != NULL) 3372 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL); 3373 if (kid != -1) { 3374 knp = kstat_data_lookup(kstat, "size"); 3375 if (knp != NULL) 3376 if (knp->data_type == KSTAT_DATA_UINT64) 3377 arc_size = knp->value.ui64; 3378 } 3379 3380 /* Try to get swap information */ 3381 if (swapctl(SC_AINFO, &ani) < 0) { 3382 zsd_warn(gettext("Unable to get swap info")); 3383 return; 3384 } 3385 3386 vmusage_again: 3387 /* getvmusage to get physical memory usage */ 3388 vmusage = ctl->zsctl_vmusage_cache; 3389 num_vmusage = ctl->zsctl_vmusage_cache_num; 3390 3391 ret = zsd_getvmusage(ctl, VMUSAGE_SYSTEM | VMUSAGE_ALL_ZONES, 0, 3392 vmusage, &num_vmusage); 3393 3394 if (ret != 0) { 3395 /* Unexpected error. Use existing data */ 3396 if (errno != EOVERFLOW) { 3397 zsd_warn(gettext( 3398 "Unable to read physical memory usage")); 3399 phys_zones = sys->zss_ram_zones; 3400 goto vmusage_done; 3401 } 3402 } 3403 /* vmusage results cache too small */ 3404 if (num_vmusage > ctl->zsctl_vmusage_cache_num) { 3405 3406 size_t size = sizeof (zsd_vmusage64_t) * num_vmusage; 3407 3408 if (ctl->zsctl_vmusage_cache != NULL) 3409 free(ctl->zsctl_vmusage_cache); 3410 vmusage = (zsd_vmusage64_t *)malloc(size); 3411 if (vmusage == NULL) { 3412 zsd_warn(gettext("Unable to alloc to determine " 3413 "physical memory usage")); 3414 phys_zones = sys->zss_ram_zones; 3415 goto vmusage_done; 3416 } 3417 ctl->zsctl_vmusage_cache = vmusage; 3418 ctl->zsctl_vmusage_cache_num = num_vmusage; 3419 goto vmusage_again; 3420 } 3421 3422 phys_zones_overcount = 0; 3423 vmu_nzones = 0; 3424 for (i = 0; i < num_vmusage; i++) { 3425 switch (vmusage[i].vmu_type) { 3426 case VMUSAGE_SYSTEM: 3427 /* total pages backing user process mappings */ 3428 phys_zones = sys->zss_ram_zones = 3429 vmusage[i].vmu_rss_all; 3430 break; 3431 case VMUSAGE_ZONE: 3432 vmu_nzones++; 3433 phys_zones_overcount += vmusage[i].vmu_rss_all; 3434 zone = zsd_lookup_zone_byid(ctl, vmusage[i].vmu_id); 3435 if (zone != NULL) 3436 zone->zsz_usage_ram = vmusage[i].vmu_rss_all; 3437 break; 3438 default: 3439 break; 3440 } 3441 } 3442 /* 3443 * Figure how much memory was double counted due to text sharing 3444 * between zones. Credit this back so that the sum of the zones 3445 * equals the total zone ram usage; 3446 */ 3447 phys_zones_extra = phys_zones_overcount - phys_zones; 3448 phys_zones_credit = phys_zones_extra / vmu_nzones; 3449 3450 vmusage_done: 3451 3452 /* walk the zones to get swap and locked kstats. Fetch ram cap. */ 3453 sys->zss_locked_zones = 0; 3454 sys->zss_vm_zones = 0; 3455 for (zone = list_head(&ctl->zsctl_zones); zone != NULL; 3456 zone = list_next(&ctl->zsctl_zones, zone)) { 3457 3458 /* If zone halted during interval, show memory usage as none */ 3459 if (zone->zsz_active == B_FALSE || 3460 zone->zsz_deleted == B_TRUE) { 3461 zone->zsz_usage_ram = 0; 3462 zone->zsz_usage_vm = 0; 3463 zone->zsz_usage_locked = 0; 3464 continue; 3465 } 3466 3467 if (phys_zones_credit > 0) { 3468 if (zone->zsz_usage_ram > phys_zones_credit) { 3469 zone->zsz_usage_ram -= phys_zones_credit; 3470 } 3471 } 3472 /* 3473 * Get zone's swap usage. Since zone could have halted, 3474 * treats as zero if cannot read 3475 */ 3476 zone->zsz_usage_vm = 0; 3477 (void) snprintf(kstat_name, sizeof (kstat_name), 3478 "swapresv_zone_%d", zone->zsz_id); 3479 kid = -1; 3480 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps", 3481 zone->zsz_id, kstat_name); 3482 if (kstat != NULL) 3483 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL); 3484 if (kid != -1) { 3485 knp = kstat_data_lookup(kstat, "usage"); 3486 if (knp != NULL && 3487 knp->data_type == KSTAT_DATA_UINT64) { 3488 zone->zsz_usage_vm = knp->value.ui64; 3489 sys->zss_vm_zones += knp->value.ui64; 3490 } 3491 } 3492 /* 3493 * Get zone's locked usage. Since zone could have halted, 3494 * treats as zero if cannot read 3495 */ 3496 zone->zsz_usage_locked = 0; 3497 (void) snprintf(kstat_name, sizeof (kstat_name), 3498 "lockedmem_zone_%d", zone->zsz_id); 3499 kid = -1; 3500 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps", 3501 zone->zsz_id, kstat_name); 3502 if (kstat != NULL) 3503 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL); 3504 if (kid != -1) { 3505 knp = kstat_data_lookup(kstat, "usage"); 3506 if (knp != NULL && 3507 knp->data_type == KSTAT_DATA_UINT64) { 3508 zone->zsz_usage_locked = knp->value.ui64; 3509 /* 3510 * Since locked memory accounting for zones 3511 * can double count ddi locked memory, cap each 3512 * zone's locked usage at its ram usage. 3513 */ 3514 if (zone->zsz_usage_locked > 3515 zone->zsz_usage_ram) 3516 zone->zsz_usage_locked = 3517 zone->zsz_usage_ram; 3518 sys->zss_locked_zones += 3519 zone->zsz_usage_locked; 3520 } 3521 } 3522 } 3523 3524 phys_total = 3525 sysconf(_SC_PHYS_PAGES) * ctl->zsctl_pagesize; 3526 3527 phys_used = (sysconf(_SC_PHYS_PAGES) - sysconf(_SC_AVPHYS_PAGES)) 3528 * ctl->zsctl_pagesize; 3529 3530 /* Compute remaining statistics */ 3531 sys->zss_ram_total = phys_total; 3532 sys->zss_ram_zones = phys_zones; 3533 sys->zss_ram_kern = phys_used - phys_zones - arc_size; 3534 3535 /* 3536 * The total for kernel locked memory should include 3537 * segkp locked pages, but oh well. The arc size is subtracted, 3538 * as that physical memory is reclaimable. 3539 */ 3540 sys->zss_locked_kern = pp_kernel - arc_size; 3541 /* Add memory used by kernel startup and obp to kernel locked */ 3542 if ((phys_total - physmem) > 0) 3543 sys->zss_locked_kern += phys_total - physmem; 3544 3545 /* 3546 * Add in the portion of (RAM+DISK) that is not available as swap, 3547 * and consider it swap used by the kernel. 3548 */ 3549 sys->zss_vm_total = phys_total + disk_swap_total; 3550 vm_free = (ani.ani_max - ani.ani_resv) * ctl->zsctl_pagesize; 3551 vm_used = sys->zss_vm_total - vm_free; 3552 sys->zss_vm_kern = vm_used - sys->zss_vm_zones - arc_size; 3553 } 3554 3555 /* 3556 * Charge each cpu's usage to its processor sets. Also add the cpu's total 3557 * time to each zone using the processor set. This tracks the maximum 3558 * amount of cpu time that a zone could have used. 3559 */ 3560 static void 3561 zsd_refresh_cpu_stats(zsd_ctl_t *ctl, boolean_t init) 3562 { 3563 zsd_system_t *sys; 3564 zsd_zone_t *zone; 3565 zsd_pset_usage_t *usage; 3566 zsd_cpu_t *cpu; 3567 zsd_cpu_t *cpu_next; 3568 zsd_pset_t *pset; 3569 timestruc_t ts; 3570 uint64_t hrtime; 3571 timestruc_t delta; 3572 3573 /* Update the per-cpu kstat data */ 3574 cpu_next = list_head(&ctl->zsctl_cpus); 3575 while (cpu_next != NULL) { 3576 cpu = cpu_next; 3577 cpu_next = list_next(&ctl->zsctl_cpus, cpu); 3578 zsd_update_cpu_stats(ctl, cpu); 3579 } 3580 /* Update the elapsed real time */ 3581 hrtime = gethrtime(); 3582 if (init) { 3583 /* first time around, store hrtime for future comparision */ 3584 ctl->zsctl_hrtime = hrtime; 3585 ctl->zsctl_hrtime_prev = hrtime; 3586 3587 } else { 3588 /* Compute increase in hrtime since the most recent read */ 3589 ctl->zsctl_hrtime_prev = ctl->zsctl_hrtime; 3590 ctl->zsctl_hrtime = hrtime; 3591 if ((hrtime = hrtime - ctl->zsctl_hrtime_prev) > 0) 3592 TIMESTRUC_ADD_NANOSEC(ctl->zsctl_hrtime_total, hrtime); 3593 } 3594 3595 /* On initialization, all psets have zero time */ 3596 if (init) 3597 return; 3598 3599 for (pset = list_head(&ctl->zsctl_psets); pset != NULL; 3600 pset = list_next(&ctl->zsctl_psets, pset)) { 3601 3602 if (pset->zsp_active == B_FALSE) { 3603 zsd_warn(gettext("Internal error,inactive pset found")); 3604 continue; 3605 } 3606 3607 /* sum total used time for pset */ 3608 ts.tv_sec = 0; 3609 ts.tv_nsec = 0; 3610 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_intr); 3611 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_kern); 3612 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_user); 3613 /* kernel time in pset is total time minus zone time */ 3614 TIMESTRUC_DELTA(pset->zsp_usage_kern, ts, 3615 pset->zsp_usage_zones); 3616 if (pset->zsp_usage_kern.tv_sec < 0 || 3617 pset->zsp_usage_kern.tv_nsec < 0) { 3618 pset->zsp_usage_kern.tv_sec = 0; 3619 pset->zsp_usage_kern.tv_nsec = 0; 3620 } 3621 /* Total pset elapsed time is used time plus idle time */ 3622 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_idle); 3623 3624 TIMESTRUC_DELTA(delta, ts, pset->zsp_total_time); 3625 3626 for (usage = list_head(&pset->zsp_usage_list); usage != NULL; 3627 usage = list_next(&pset->zsp_usage_list, usage)) { 3628 3629 zone = usage->zsu_zone; 3630 if (usage->zsu_cpu_shares != ZS_LIMIT_NONE && 3631 usage->zsu_cpu_shares != ZS_SHARES_UNLIMITED && 3632 usage->zsu_cpu_shares != 0) { 3633 /* 3634 * Figure out how many nanoseconds of share time 3635 * to give to the zone 3636 */ 3637 hrtime = delta.tv_sec; 3638 hrtime *= NANOSEC; 3639 hrtime += delta.tv_nsec; 3640 hrtime *= usage->zsu_cpu_shares; 3641 hrtime /= pset->zsp_cpu_shares; 3642 TIMESTRUC_ADD_NANOSEC(zone->zsz_share_time, 3643 hrtime); 3644 } 3645 /* Add pset time to each zone using pset */ 3646 TIMESTRUC_ADD_TIMESTRUC(zone->zsz_pset_time, delta); 3647 3648 zone->zsz_cpus_online += pset->zsp_online; 3649 } 3650 pset->zsp_total_time = ts; 3651 } 3652 3653 for (zone = list_head(&ctl->zsctl_zones); zone != NULL; 3654 zone = list_next(&ctl->zsctl_zones, zone)) { 3655 3656 /* update cpu cap tracking if the zone has a cpu cap */ 3657 if (zone->zsz_cpu_cap != ZS_LIMIT_NONE) { 3658 uint64_t elapsed; 3659 3660 elapsed = ctl->zsctl_hrtime - ctl->zsctl_hrtime_prev; 3661 elapsed *= zone->zsz_cpu_cap; 3662 elapsed = elapsed / 100; 3663 TIMESTRUC_ADD_NANOSEC(zone->zsz_cap_time, elapsed); 3664 } 3665 } 3666 sys = ctl->zsctl_system; 3667 ts.tv_sec = 0; 3668 ts.tv_nsec = 0; 3669 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_intr); 3670 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_kern); 3671 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_user); 3672 3673 /* kernel time in pset is total time minus zone time */ 3674 TIMESTRUC_DELTA(sys->zss_cpu_usage_kern, ts, 3675 sys->zss_cpu_usage_zones); 3676 if (sys->zss_cpu_usage_kern.tv_sec < 0 || 3677 sys->zss_cpu_usage_kern.tv_nsec < 0) { 3678 sys->zss_cpu_usage_kern.tv_sec = 0; 3679 sys->zss_cpu_usage_kern.tv_nsec = 0; 3680 } 3681 /* Total pset elapsed time is used time plus idle time */ 3682 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_idle); 3683 sys->zss_cpu_total_time = ts; 3684 } 3685 3686 /* 3687 * Saves current usage data to a cache that is read by libzonestat when 3688 * calling zs_usage_read(). 3689 * 3690 * All pointers in the cached data structure are set to NULL. When 3691 * libzonestat reads the cached data, it will set the pointers relative to 3692 * its address space. 3693 */ 3694 static void 3695 zsd_usage_cache_update(zsd_ctl_t *ctl) 3696 { 3697 zs_usage_cache_t *cache; 3698 zs_usage_cache_t *old; 3699 zs_usage_t *usage; 3700 3701 zs_system_t *sys; 3702 zsd_system_t *dsys; 3703 zs_zone_t *zone = NULL; 3704 zsd_zone_t *dzone; 3705 zs_pset_t *pset = NULL; 3706 zsd_pset_t *dpset; 3707 zs_pset_zone_t *pusage; 3708 zsd_pset_usage_t *dpusage; 3709 3710 char *next; 3711 uint_t size, i, j; 3712 3713 size = 3714 sizeof (zs_usage_cache_t) + 3715 sizeof (zs_usage_t) + 3716 sizeof (zs_system_t) + 3717 sizeof (zs_zone_t) * ctl->zsctl_nzones + 3718 sizeof (zs_pset_t) * ctl->zsctl_npsets + 3719 sizeof (zs_pset_zone_t) * ctl->zsctl_npset_usages; 3720 3721 cache = (zs_usage_cache_t *)malloc(size); 3722 if (cache == NULL) { 3723 zsd_warn(gettext("Unable to allocate usage cache\n")); 3724 return; 3725 } 3726 3727 next = (char *)cache; 3728 cache->zsuc_size = size - sizeof (zs_usage_cache_t); 3729 next += sizeof (zs_usage_cache_t); 3730 3731 /* LINTED */ 3732 usage = cache->zsuc_usage = (zs_usage_t *)next; 3733 next += sizeof (zs_usage_t); 3734 usage->zsu_start = g_start; 3735 usage->zsu_hrstart = g_hrstart; 3736 usage->zsu_time = g_now; 3737 usage->zsu_hrtime = g_hrnow; 3738 usage->zsu_nzones = ctl->zsctl_nzones; 3739 usage->zsu_npsets = ctl->zsctl_npsets; 3740 usage->zsu_system = NULL; 3741 3742 /* LINTED */ 3743 sys = (zs_system_t *)next; 3744 next += sizeof (zs_system_t); 3745 dsys = ctl->zsctl_system; 3746 sys->zss_ram_total = dsys->zss_ram_total; 3747 sys->zss_ram_kern = dsys->zss_ram_kern; 3748 sys->zss_ram_zones = dsys->zss_ram_zones; 3749 sys->zss_locked_kern = dsys->zss_locked_kern; 3750 sys->zss_locked_zones = dsys->zss_locked_zones; 3751 sys->zss_vm_total = dsys->zss_vm_total; 3752 sys->zss_vm_kern = dsys->zss_vm_kern; 3753 sys->zss_vm_zones = dsys->zss_vm_zones; 3754 sys->zss_swap_total = dsys->zss_swap_total; 3755 sys->zss_swap_used = dsys->zss_swap_used; 3756 sys->zss_ncpus = dsys->zss_ncpus; 3757 sys->zss_ncpus_online = dsys->zss_ncpus_online; 3758 3759 sys->zss_processes_max = dsys->zss_maxpid; 3760 sys->zss_lwps_max = dsys->zss_lwps_max; 3761 sys->zss_shm_max = dsys->zss_shm_max; 3762 sys->zss_shmids_max = dsys->zss_shmids_max; 3763 sys->zss_semids_max = dsys->zss_semids_max; 3764 sys->zss_msgids_max = dsys->zss_msgids_max; 3765 sys->zss_lofi_max = dsys->zss_lofi_max; 3766 3767 sys->zss_processes = dsys->zss_processes; 3768 sys->zss_lwps = dsys->zss_lwps; 3769 sys->zss_shm = dsys->zss_shm; 3770 sys->zss_shmids = dsys->zss_shmids; 3771 sys->zss_semids = dsys->zss_semids; 3772 sys->zss_msgids = dsys->zss_msgids; 3773 sys->zss_lofi = dsys->zss_lofi; 3774 3775 sys->zss_cpu_total_time = dsys->zss_cpu_total_time; 3776 sys->zss_cpu_usage_zones = dsys->zss_cpu_usage_zones; 3777 sys->zss_cpu_usage_kern = dsys->zss_cpu_usage_kern; 3778 3779 for (i = 0, dzone = list_head(&ctl->zsctl_zones); 3780 i < ctl->zsctl_nzones; 3781 i++, dzone = list_next(&ctl->zsctl_zones, dzone)) { 3782 /* LINTED */ 3783 zone = (zs_zone_t *)next; 3784 next += sizeof (zs_zone_t); 3785 list_link_init(&zone->zsz_next); 3786 zone->zsz_system = NULL; 3787 3788 (void) strlcpy(zone->zsz_name, dzone->zsz_name, 3789 sizeof (zone->zsz_name)); 3790 (void) strlcpy(zone->zsz_pool, dzone->zsz_pool, 3791 sizeof (zone->zsz_pool)); 3792 (void) strlcpy(zone->zsz_pset, dzone->zsz_pset, 3793 sizeof (zone->zsz_pset)); 3794 zone->zsz_id = dzone->zsz_id; 3795 zone->zsz_cputype = dzone->zsz_cputype; 3796 zone->zsz_iptype = dzone->zsz_iptype; 3797 zone->zsz_start = dzone->zsz_start; 3798 zone->zsz_hrstart = dzone->zsz_hrstart; 3799 zone->zsz_scheds = dzone->zsz_scheds; 3800 zone->zsz_cpu_shares = dzone->zsz_cpu_shares; 3801 zone->zsz_cpu_cap = dzone->zsz_cpu_cap; 3802 zone->zsz_ram_cap = dzone->zsz_ram_cap; 3803 zone->zsz_vm_cap = dzone->zsz_vm_cap; 3804 zone->zsz_locked_cap = dzone->zsz_locked_cap; 3805 zone->zsz_cpu_usage = dzone->zsz_cpu_usage; 3806 zone->zsz_cpus_online = dzone->zsz_cpus_online; 3807 zone->zsz_pset_time = dzone->zsz_pset_time; 3808 zone->zsz_cap_time = dzone->zsz_cap_time; 3809 zone->zsz_share_time = dzone->zsz_share_time; 3810 zone->zsz_usage_ram = dzone->zsz_usage_ram; 3811 zone->zsz_usage_locked = dzone->zsz_usage_locked; 3812 zone->zsz_usage_vm = dzone->zsz_usage_vm; 3813 3814 zone->zsz_processes_cap = dzone->zsz_processes_cap; 3815 zone->zsz_lwps_cap = dzone->zsz_lwps_cap; 3816 zone->zsz_shm_cap = dzone->zsz_shm_cap; 3817 zone->zsz_shmids_cap = dzone->zsz_shmids_cap; 3818 zone->zsz_semids_cap = dzone->zsz_semids_cap; 3819 zone->zsz_msgids_cap = dzone->zsz_msgids_cap; 3820 zone->zsz_lofi_cap = dzone->zsz_lofi_cap; 3821 3822 zone->zsz_processes = dzone->zsz_processes; 3823 zone->zsz_lwps = dzone->zsz_lwps; 3824 zone->zsz_shm = dzone->zsz_shm; 3825 zone->zsz_shmids = dzone->zsz_shmids; 3826 zone->zsz_semids = dzone->zsz_semids; 3827 zone->zsz_msgids = dzone->zsz_msgids; 3828 zone->zsz_lofi = dzone->zsz_lofi; 3829 } 3830 3831 for (i = 0, dpset = list_head(&ctl->zsctl_psets); 3832 i < ctl->zsctl_npsets; 3833 i++, dpset = list_next(&ctl->zsctl_psets, dpset)) { 3834 /* LINTED */ 3835 pset = (zs_pset_t *)next; 3836 next += sizeof (zs_pset_t); 3837 list_link_init(&pset->zsp_next); 3838 (void) strlcpy(pset->zsp_name, dpset->zsp_name, 3839 sizeof (pset->zsp_name)); 3840 pset->zsp_id = dpset->zsp_id; 3841 pset->zsp_cputype = dpset->zsp_cputype; 3842 pset->zsp_start = dpset->zsp_start; 3843 pset->zsp_hrstart = dpset->zsp_hrstart; 3844 pset->zsp_online = dpset->zsp_online; 3845 pset->zsp_size = dpset->zsp_size; 3846 pset->zsp_min = dpset->zsp_min; 3847 pset->zsp_max = dpset->zsp_max; 3848 pset->zsp_importance = dpset->zsp_importance; 3849 pset->zsp_scheds = dpset->zsp_scheds; 3850 pset->zsp_cpu_shares = dpset->zsp_cpu_shares; 3851 pset->zsp_total_time = dpset->zsp_total_time; 3852 pset->zsp_usage_kern = dpset->zsp_usage_kern; 3853 pset->zsp_usage_zones = dpset->zsp_usage_zones; 3854 pset->zsp_nusage = dpset->zsp_nusage; 3855 /* Add pset usages for pset */ 3856 for (j = 0, dpusage = list_head(&dpset->zsp_usage_list); 3857 j < dpset->zsp_nusage; 3858 j++, dpusage = list_next(&dpset->zsp_usage_list, dpusage)) { 3859 /* LINTED */ 3860 pusage = (zs_pset_zone_t *)next; 3861 next += sizeof (zs_pset_zone_t); 3862 /* pointers are computed by client */ 3863 pusage->zspz_pset = NULL; 3864 pusage->zspz_zone = NULL; 3865 list_link_init(&pusage->zspz_next); 3866 pusage->zspz_zoneid = dpusage->zsu_zone->zsz_id; 3867 pusage->zspz_start = dpusage->zsu_start; 3868 pusage->zspz_hrstart = dpusage->zsu_hrstart; 3869 pusage->zspz_hrstart = dpusage->zsu_hrstart; 3870 pusage->zspz_cpu_shares = dpusage->zsu_cpu_shares; 3871 pusage->zspz_scheds = dpusage->zsu_scheds; 3872 pusage->zspz_cpu_usage = dpusage->zsu_cpu_usage; 3873 } 3874 } 3875 3876 /* Update the current cache pointer */ 3877 (void) mutex_lock(&g_usage_cache_lock); 3878 old = g_usage_cache; 3879 cache->zsuc_ref = 1; 3880 cache->zsuc_gen = g_gen_next; 3881 usage->zsu_gen = g_gen_next; 3882 usage->zsu_size = size; 3883 g_usage_cache = cache; 3884 if (old != NULL) { 3885 old->zsuc_ref--; 3886 if (old->zsuc_ref == 0) 3887 free(old); 3888 } 3889 g_gen_next++; 3890 /* Wake up any clients that are waiting for this calculation */ 3891 if (g_usage_cache_kickers > 0) { 3892 (void) cond_broadcast(&g_usage_cache_wait); 3893 } 3894 (void) mutex_unlock(&g_usage_cache_lock); 3895 } 3896 3897 static zs_usage_cache_t * 3898 zsd_usage_cache_hold_locked() 3899 { 3900 zs_usage_cache_t *ret; 3901 3902 ret = g_usage_cache; 3903 ret->zsuc_ref++; 3904 return (ret); 3905 } 3906 3907 void 3908 zsd_usage_cache_rele(zs_usage_cache_t *cache) 3909 { 3910 (void) mutex_lock(&g_usage_cache_lock); 3911 cache->zsuc_ref--; 3912 if (cache->zsuc_ref == 0) 3913 free(cache); 3914 (void) mutex_unlock(&g_usage_cache_lock); 3915 } 3916 3917 /* Close the handles held by zsd_open() */ 3918 void 3919 zsd_close(zsd_ctl_t *ctl) 3920 { 3921 zsd_zone_t *zone; 3922 zsd_pset_t *pset; 3923 zsd_pset_usage_t *usage; 3924 zsd_cpu_t *cpu; 3925 int id; 3926 3927 if (ctl->zsctl_kstat_ctl) { 3928 (void) kstat_close(ctl->zsctl_kstat_ctl); 3929 ctl->zsctl_kstat_ctl = NULL; 3930 } 3931 if (ctl->zsctl_proc_open) { 3932 (void) ea_close(&ctl->zsctl_proc_eaf); 3933 ctl->zsctl_proc_open = 0; 3934 ctl->zsctl_proc_fd = -1; 3935 } 3936 if (ctl->zsctl_pool_conf) { 3937 if (ctl->zsctl_pool_status == POOL_ENABLED) 3938 (void) pool_conf_close(ctl->zsctl_pool_conf); 3939 ctl->zsctl_pool_status = POOL_DISABLED; 3940 } 3941 3942 while ((zone = list_head(&ctl->zsctl_zones)) != NULL) { 3943 list_remove(&ctl->zsctl_zones, zone); 3944 free(zone); 3945 ctl->zsctl_nzones--; 3946 } 3947 3948 while ((pset = list_head(&ctl->zsctl_psets)) != NULL) { 3949 while ((usage = list_head(&pset->zsp_usage_list)) 3950 != NULL) { 3951 list_remove(&pset->zsp_usage_list, usage); 3952 ctl->zsctl_npset_usages--; 3953 free(usage); 3954 } 3955 list_remove(&ctl->zsctl_psets, pset); 3956 free(pset); 3957 ctl->zsctl_npsets--; 3958 } 3959 3960 /* Release all cpus being tracked */ 3961 while (cpu = list_head(&ctl->zsctl_cpus)) { 3962 list_remove(&ctl->zsctl_cpus, cpu); 3963 id = cpu->zsc_id; 3964 bzero(cpu, sizeof (zsd_cpu_t)); 3965 cpu->zsc_id = id; 3966 cpu->zsc_allocated = B_FALSE; 3967 cpu->zsc_psetid = ZS_PSET_ERROR; 3968 cpu->zsc_psetid_prev = ZS_PSET_ERROR; 3969 } 3970 3971 assert(ctl->zsctl_npset_usages == 0); 3972 assert(ctl->zsctl_npsets == 0); 3973 assert(ctl->zsctl_nzones == 0); 3974 (void) zsd_disable_cpu_stats(); 3975 } 3976 3977 3978 /* 3979 * Update the utilization data for all zones and processor sets. 3980 */ 3981 static int 3982 zsd_read(zsd_ctl_t *ctl, boolean_t init, boolean_t do_memory) 3983 { 3984 (void) kstat_chain_update(ctl->zsctl_kstat_ctl); 3985 (void) gettimeofday(&(ctl->zsctl_timeofday), NULL); 3986 3987 zsd_refresh_system(ctl); 3988 3989 /* 3990 * Memory calculation is expensive. Only update it on sample 3991 * intervals. 3992 */ 3993 if (do_memory == B_TRUE) 3994 zsd_refresh_memory(ctl, init); 3995 zsd_refresh_zones(ctl); 3996 zsd_refresh_psets(ctl); 3997 zsd_refresh_procs(ctl, init); 3998 zsd_refresh_cpu_stats(ctl, init); 3999 4000 /* 4001 * Delete objects that no longer exist. 4002 * Pset usages must be deleted first as they point to zone and 4003 * pset objects. 4004 */ 4005 zsd_mark_pset_usages_end(ctl); 4006 zsd_mark_psets_end(ctl); 4007 zsd_mark_cpus_end(ctl); 4008 zsd_mark_zones_end(ctl); 4009 4010 /* 4011 * Save results for clients. 4012 */ 4013 zsd_usage_cache_update(ctl); 4014 4015 /* 4016 * Roll process accounting file. 4017 */ 4018 (void) zsd_roll_exacct(); 4019 return (0); 4020 } 4021 4022 /* 4023 * Get the system rctl, which is the upper most limit 4024 */ 4025 static uint64_t 4026 zsd_get_system_rctl(char *name) 4027 { 4028 rctlblk_t *rblk, *rblk_last; 4029 4030 rblk = (rctlblk_t *)alloca(rctlblk_size()); 4031 rblk_last = (rctlblk_t *)alloca(rctlblk_size()); 4032 4033 if (getrctl(name, NULL, rblk_last, RCTL_FIRST) != 0) 4034 return (ZS_LIMIT_NONE); 4035 4036 while (getrctl(name, rblk_last, rblk, RCTL_NEXT) == 0) 4037 (void) bcopy(rblk, rblk_last, rctlblk_size()); 4038 4039 return (rctlblk_get_value(rblk_last)); 4040 } 4041 4042 /* 4043 * Open any necessary subsystems for collecting utilization data, 4044 * allocate and initialize data structures, and get initial utilization. 4045 * 4046 * Errors: 4047 * ENOMEM out of memory 4048 * EINVAL other error 4049 */ 4050 static zsd_ctl_t * 4051 zsd_open(zsd_ctl_t *ctl) 4052 { 4053 zsd_system_t *system; 4054 4055 char path[MAXPATHLEN]; 4056 long pathmax; 4057 struct statvfs svfs; 4058 int ret; 4059 int i; 4060 size_t size; 4061 int err; 4062 4063 if (ctl == NULL && (ctl = (zsd_ctl_t *)calloc(1, 4064 sizeof (zsd_ctl_t))) == NULL) { 4065 zsd_warn(gettext("Out of Memory")); 4066 errno = ENOMEM; 4067 goto err; 4068 } 4069 ctl->zsctl_proc_fd = -1; 4070 4071 /* open kstats */ 4072 if (ctl->zsctl_kstat_ctl == NULL && 4073 (ctl->zsctl_kstat_ctl = kstat_open()) == NULL) { 4074 err = errno; 4075 zsd_warn(gettext("Unable to open kstats")); 4076 errno = err; 4077 if (errno != ENOMEM) 4078 errno = EAGAIN; 4079 goto err; 4080 } 4081 4082 /* 4083 * These are set when the accounting file is opened by 4084 * zsd_update_procs() 4085 */ 4086 ctl->zsctl_proc_fd = -1; 4087 ctl->zsctl_proc_fd_next = -1; 4088 ctl->zsctl_proc_open = 0; 4089 ctl->zsctl_proc_open_next = 0; 4090 4091 check_exacct: 4092 (void) zsd_enable_cpu_stats(); 4093 4094 /* Create structures to track usage */ 4095 if (ctl->zsctl_system == NULL && (ctl->zsctl_system = (zsd_system_t *) 4096 calloc(1, sizeof (zsd_system_t))) == NULL) { 4097 ret = -1; 4098 zsd_warn(gettext("Out of Memory")); 4099 errno = ENOMEM; 4100 goto err; 4101 } 4102 system = ctl->zsctl_system; 4103 /* get the kernel bitness to know structure layout for getvmusage */ 4104 ret = sysinfo(SI_ARCHITECTURE_64, path, sizeof (path)); 4105 if (ret < 0) 4106 ctl->zsctl_kern_bits = 32; 4107 else 4108 ctl->zsctl_kern_bits = 64; 4109 ctl->zsctl_pagesize = sysconf(_SC_PAGESIZE); 4110 4111 size = sysconf(_SC_CPUID_MAX); 4112 ctl->zsctl_maxcpuid = size; 4113 if (ctl->zsctl_cpu_array == NULL && (ctl->zsctl_cpu_array = 4114 (zsd_cpu_t *)calloc(size + 1, sizeof (zsd_cpu_t))) == NULL) { 4115 zsd_warn(gettext("Out of Memory")); 4116 errno = ENOMEM; 4117 goto err; 4118 } 4119 for (i = 0; i <= ctl->zsctl_maxcpuid; i++) { 4120 ctl->zsctl_cpu_array[i].zsc_id = i; 4121 ctl->zsctl_cpu_array[i].zsc_allocated = B_FALSE; 4122 ctl->zsctl_cpu_array[i].zsc_psetid = ZS_PSET_ERROR; 4123 ctl->zsctl_cpu_array[i].zsc_psetid_prev = ZS_PSET_ERROR; 4124 } 4125 if (statvfs("/proc", &svfs) != 0 || 4126 strcmp("/proc", svfs.f_fstr) != 0) { 4127 zsd_warn(gettext("/proc not a procfs filesystem")); 4128 errno = EINVAL; 4129 goto err; 4130 } 4131 4132 size = sysconf(_SC_MAXPID) + 1; 4133 ctl->zsctl_maxproc = size; 4134 if (ctl->zsctl_proc_array == NULL && 4135 (ctl->zsctl_proc_array = (zsd_proc_t *)calloc(size, 4136 sizeof (zsd_proc_t))) == NULL) { 4137 zsd_warn(gettext("Out of Memory")); 4138 errno = ENOMEM; 4139 goto err; 4140 } 4141 for (i = 0; i <= ctl->zsctl_maxproc; i++) { 4142 list_link_init(&(ctl->zsctl_proc_array[i].zspr_next)); 4143 ctl->zsctl_proc_array[i].zspr_psetid = ZS_PSET_ERROR; 4144 ctl->zsctl_proc_array[i].zspr_zoneid = -1; 4145 ctl->zsctl_proc_array[i].zspr_usage.tv_sec = 0; 4146 ctl->zsctl_proc_array[i].zspr_usage.tv_nsec = 0; 4147 ctl->zsctl_proc_array[i].zspr_ppid = -1; 4148 } 4149 4150 list_create(&ctl->zsctl_zones, sizeof (zsd_zone_t), 4151 offsetof(zsd_zone_t, zsz_next)); 4152 4153 list_create(&ctl->zsctl_psets, sizeof (zsd_pset_t), 4154 offsetof(zsd_pset_t, zsp_next)); 4155 4156 list_create(&ctl->zsctl_cpus, sizeof (zsd_cpu_t), 4157 offsetof(zsd_cpu_t, zsc_next)); 4158 4159 pathmax = pathconf("/proc", _PC_NAME_MAX); 4160 if (pathmax < 0) { 4161 zsd_warn(gettext("Unable to determine max path of /proc")); 4162 errno = EINVAL; 4163 goto err; 4164 } 4165 size = sizeof (struct dirent) + pathmax + 1; 4166 4167 ctl->zsctl_procfs_dent_size = size; 4168 if (ctl->zsctl_procfs_dent == NULL && 4169 (ctl->zsctl_procfs_dent = (struct dirent *)calloc(1, size)) 4170 == NULL) { 4171 zsd_warn(gettext("Out of Memory")); 4172 errno = ENOMEM; 4173 goto err; 4174 } 4175 4176 if (ctl->zsctl_pool_conf == NULL && 4177 (ctl->zsctl_pool_conf = pool_conf_alloc()) == NULL) { 4178 zsd_warn(gettext("Out of Memory")); 4179 errno = ENOMEM; 4180 goto err; 4181 } 4182 ctl->zsctl_pool_status = POOL_DISABLED; 4183 ctl->zsctl_pool_changed = 0; 4184 4185 if (ctl->zsctl_pool_vals[0] == NULL && 4186 (ctl->zsctl_pool_vals[0] = pool_value_alloc()) == NULL) { 4187 zsd_warn(gettext("Out of Memory")); 4188 errno = ENOMEM; 4189 goto err; 4190 } 4191 if (ctl->zsctl_pool_vals[1] == NULL && 4192 (ctl->zsctl_pool_vals[1] = pool_value_alloc()) == NULL) { 4193 zsd_warn(gettext("Out of Memory")); 4194 errno = ENOMEM; 4195 goto err; 4196 } 4197 ctl->zsctl_pool_vals[2] = NULL; 4198 4199 /* 4200 * get system limits 4201 */ 4202 system->zss_maxpid = size = sysconf(_SC_MAXPID); 4203 system->zss_processes_max = zsd_get_system_rctl("zone.max-processes"); 4204 system->zss_lwps_max = zsd_get_system_rctl("zone.max-lwps"); 4205 system->zss_shm_max = zsd_get_system_rctl("zone.max-shm-memory"); 4206 system->zss_shmids_max = zsd_get_system_rctl("zone.max-shm-ids"); 4207 system->zss_semids_max = zsd_get_system_rctl("zone.max-sem-ids"); 4208 system->zss_msgids_max = zsd_get_system_rctl("zone.max-msg-ids"); 4209 system->zss_lofi_max = zsd_get_system_rctl("zone.max-lofi"); 4210 4211 g_gen_next = 1; 4212 4213 if (zsd_read(ctl, B_TRUE, B_FALSE) != 0) 4214 zsd_warn(gettext("Reading zone statistics failed")); 4215 4216 return (ctl); 4217 err: 4218 if (ctl) 4219 zsd_close(ctl); 4220 4221 return (NULL); 4222 } 4223 4224 /* Copy utilization data to buffer, filtering data if non-global zone. */ 4225 static void 4226 zsd_usage_filter(zoneid_t zid, zs_usage_cache_t *cache, zs_usage_t *usage, 4227 boolean_t is_gz) 4228 { 4229 zs_usage_t *cusage; 4230 zs_system_t *sys, *csys; 4231 zs_zone_t *zone, *czone; 4232 zs_pset_t *pset, *cpset; 4233 zs_pset_zone_t *pz, *cpz, *foundpz; 4234 size_t size = 0, csize = 0; 4235 char *start, *cstart; 4236 int i, j; 4237 timestruc_t delta; 4238 4239 /* Privileged users in the global zone get everything */ 4240 if (is_gz) { 4241 cusage = cache->zsuc_usage; 4242 (void) bcopy(cusage, usage, cusage->zsu_size); 4243 return; 4244 } 4245 4246 /* Zones just get their own usage */ 4247 cusage = cache->zsuc_usage; 4248 4249 start = (char *)usage; 4250 cstart = (char *)cusage; 4251 size += sizeof (zs_usage_t); 4252 csize += sizeof (zs_usage_t); 4253 4254 usage->zsu_start = cusage->zsu_start; 4255 usage->zsu_hrstart = cusage->zsu_hrstart; 4256 usage->zsu_time = cusage->zsu_time; 4257 usage->zsu_hrtime = cusage->zsu_hrtime; 4258 usage->zsu_gen = cusage->zsu_gen; 4259 usage->zsu_nzones = 1; 4260 usage->zsu_npsets = 0; 4261 4262 /* LINTED */ 4263 sys = (zs_system_t *)(start + size); 4264 /* LINTED */ 4265 csys = (zs_system_t *)(cstart + csize); 4266 size += sizeof (zs_system_t); 4267 csize += sizeof (zs_system_t); 4268 4269 /* Save system limits but not usage */ 4270 *sys = *csys; 4271 sys->zss_ncpus = 0; 4272 sys->zss_ncpus_online = 0; 4273 4274 /* LINTED */ 4275 zone = (zs_zone_t *)(start + size); 4276 /* LINTED */ 4277 czone = (zs_zone_t *)(cstart + csize); 4278 /* Find the matching zone */ 4279 for (i = 0; i < cusage->zsu_nzones; i++) { 4280 if (czone->zsz_id == zid) { 4281 *zone = *czone; 4282 size += sizeof (zs_zone_t); 4283 } 4284 csize += sizeof (zs_zone_t); 4285 /* LINTED */ 4286 czone = (zs_zone_t *)(cstart + csize); 4287 } 4288 sys->zss_ram_kern += (sys->zss_ram_zones - zone->zsz_usage_ram); 4289 sys->zss_ram_zones = zone->zsz_usage_ram; 4290 4291 sys->zss_vm_kern += (sys->zss_vm_zones - zone->zsz_usage_vm); 4292 sys->zss_vm_zones = zone->zsz_usage_vm; 4293 4294 sys->zss_locked_kern += (sys->zss_locked_zones - 4295 zone->zsz_usage_locked); 4296 sys->zss_locked_zones = zone->zsz_usage_locked; 4297 4298 TIMESTRUC_DELTA(delta, sys->zss_cpu_usage_zones, zone->zsz_cpu_usage); 4299 TIMESTRUC_ADD_TIMESTRUC(sys->zss_cpu_usage_kern, delta); 4300 sys->zss_cpu_usage_zones = zone->zsz_cpu_usage; 4301 4302 /* LINTED */ 4303 pset = (zs_pset_t *)(start + size); 4304 /* LINTED */ 4305 cpset = (zs_pset_t *)(cstart + csize); 4306 for (i = 0; i < cusage->zsu_npsets; i++) { 4307 csize += sizeof (zs_pset_t); 4308 /* LINTED */ 4309 cpz = (zs_pset_zone_t *)(csize + cstart); 4310 foundpz = NULL; 4311 for (j = 0; j < cpset->zsp_nusage; j++) { 4312 if (cpz->zspz_zoneid == zid) 4313 foundpz = cpz; 4314 4315 csize += sizeof (zs_pset_zone_t); 4316 /* LINTED */ 4317 cpz = (zs_pset_zone_t *)(csize + cstart); 4318 } 4319 if (foundpz != NULL) { 4320 size += sizeof (zs_pset_t); 4321 /* LINTED */ 4322 pz = (zs_pset_zone_t *)(start + size); 4323 size += sizeof (zs_pset_zone_t); 4324 4325 *pset = *cpset; 4326 *pz = *foundpz; 4327 4328 TIMESTRUC_DELTA(delta, pset->zsp_usage_zones, 4329 pz->zspz_cpu_usage); 4330 TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_kern, delta); 4331 pset->zsp_usage_zones = pz->zspz_cpu_usage; 4332 pset->zsp_nusage = 1; 4333 usage->zsu_npsets++; 4334 sys->zss_ncpus += pset->zsp_size; 4335 sys->zss_ncpus_online += pset->zsp_online; 4336 } 4337 /* LINTED */ 4338 cpset = (zs_pset_t *)(cstart + csize); 4339 } 4340 usage->zsu_size = size; 4341 } 4342 4343 /* 4344 * Respond to new connections from libzonestat.so. Also respond to zoneadmd, 4345 * which reports new zones. 4346 */ 4347 /* ARGSUSED */ 4348 static void 4349 zsd_server(void *cookie, char *argp, size_t arg_size, 4350 door_desc_t *dp, uint_t n_desc) 4351 { 4352 int *args, cmd; 4353 door_desc_t door; 4354 ucred_t *ucred; 4355 const priv_set_t *eset; 4356 4357 if (argp == DOOR_UNREF_DATA) { 4358 (void) door_return(NULL, 0, NULL, 0); 4359 thr_exit(NULL); 4360 } 4361 4362 if (arg_size != sizeof (cmd) * 2) { 4363 (void) door_return(NULL, 0, NULL, 0); 4364 thr_exit(NULL); 4365 } 4366 4367 /* LINTED */ 4368 args = (int *)argp; 4369 cmd = args[0]; 4370 4371 /* If connection, return door to stat server */ 4372 if (cmd == ZSD_CMD_CONNECT) { 4373 4374 /* Verify client compilation version */ 4375 if (args[1] != ZS_VERSION) { 4376 args[1] = ZSD_STATUS_VERSION_MISMATCH; 4377 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0); 4378 thr_exit(NULL); 4379 } 4380 ucred = alloca(ucred_size()); 4381 /* Verify client permission */ 4382 if (door_ucred(&ucred) != 0) { 4383 args[1] = ZSD_STATUS_INTERNAL_ERROR; 4384 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0); 4385 thr_exit(NULL); 4386 } 4387 4388 eset = ucred_getprivset(ucred, PRIV_EFFECTIVE); 4389 if (eset == NULL) { 4390 args[1] = ZSD_STATUS_INTERNAL_ERROR; 4391 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0); 4392 thr_exit(NULL); 4393 } 4394 if (!priv_ismember(eset, PRIV_PROC_INFO)) { 4395 args[1] = ZSD_STATUS_PERMISSION; 4396 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0); 4397 thr_exit(NULL); 4398 } 4399 4400 /* Return stat server door */ 4401 args[1] = ZSD_STATUS_OK; 4402 door.d_attributes = DOOR_DESCRIPTOR; 4403 door.d_data.d_desc.d_descriptor = g_stat_door; 4404 (void) door_return(argp, sizeof (cmd) * 2, &door, 1); 4405 thr_exit(NULL); 4406 } 4407 4408 /* Respond to zoneadmd informing zonestatd of a new zone */ 4409 if (cmd == ZSD_CMD_NEW_ZONE) { 4410 zsd_fattach_zone(args[1], g_server_door, B_FALSE); 4411 (void) door_return(NULL, 0, NULL, 0); 4412 thr_exit(NULL); 4413 } 4414 4415 args[1] = ZSD_STATUS_INTERNAL_ERROR; 4416 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0); 4417 thr_exit(NULL); 4418 } 4419 4420 /* 4421 * Respond to libzonestat.so clients with the current utlilzation data. 4422 */ 4423 /* ARGSUSED */ 4424 static void 4425 zsd_stat_server(void *cookie, char *argp, size_t arg_size, 4426 door_desc_t *dp, uint_t n_desc) 4427 { 4428 uint64_t *args, cmd; 4429 zs_usage_cache_t *cache; 4430 int ret; 4431 char *rvalp; 4432 size_t rvals; 4433 zs_usage_t *usage; 4434 ucred_t *ucred; 4435 zoneid_t zoneid; 4436 const priv_set_t *eset; 4437 boolean_t is_gz = B_FALSE; 4438 4439 /* Tell stat thread there are no more clients */ 4440 if (argp == DOOR_UNREF_DATA) { 4441 (void) mutex_lock(&g_usage_cache_lock); 4442 g_hasclient = B_FALSE; 4443 (void) cond_signal(&g_usage_cache_kick); 4444 (void) mutex_unlock(&g_usage_cache_lock); 4445 (void) door_return(NULL, 0, NULL, 0); 4446 thr_exit(NULL); 4447 } 4448 if (arg_size != sizeof (cmd) * 2) { 4449 (void) door_return(NULL, 0, NULL, 0); 4450 thr_exit(NULL); 4451 } 4452 /* LINTED */ 4453 args = (uint64_t *)argp; 4454 cmd = args[0]; 4455 if (cmd != ZSD_CMD_READ) { 4456 (void) door_return(NULL, 0, NULL, 0); 4457 thr_exit(NULL); 4458 } 4459 ucred = alloca(ucred_size()); 4460 if (door_ucred(&ucred) != 0) { 4461 (void) door_return(NULL, 0, NULL, 0); 4462 thr_exit(NULL); 4463 } 4464 zoneid = ucred_getzoneid(ucred); 4465 4466 if (zoneid == GLOBAL_ZONEID) 4467 is_gz = B_TRUE; 4468 4469 eset = ucred_getprivset(ucred, PRIV_EFFECTIVE); 4470 if (eset == NULL) { 4471 (void) door_return(NULL, 0, NULL, 0); 4472 thr_exit(NULL); 4473 } 4474 if (!priv_ismember(eset, PRIV_PROC_INFO)) { 4475 (void) door_return(NULL, 0, NULL, 0); 4476 thr_exit(NULL); 4477 } 4478 (void) mutex_lock(&g_usage_cache_lock); 4479 g_hasclient = B_TRUE; 4480 4481 /* 4482 * Force a new cpu calculation for client. This will force a 4483 * new memory calculation if the memory data is older than the 4484 * sample period. 4485 */ 4486 g_usage_cache_kickers++; 4487 (void) cond_signal(&g_usage_cache_kick); 4488 ret = cond_wait(&g_usage_cache_wait, &g_usage_cache_lock); 4489 g_usage_cache_kickers--; 4490 if (ret != 0 && errno == EINTR) { 4491 (void) mutex_unlock(&g_usage_cache_lock); 4492 zsd_warn(gettext( 4493 "Interrupted before writing usage size to client\n")); 4494 (void) door_return(NULL, 0, NULL, 0); 4495 thr_exit(NULL); 4496 } 4497 cache = zsd_usage_cache_hold_locked(); 4498 if (cache == NULL) { 4499 zsd_warn(gettext("Usage cache empty.\n")); 4500 (void) door_return(NULL, 0, NULL, 0); 4501 thr_exit(NULL); 4502 } 4503 (void) mutex_unlock(&g_usage_cache_lock); 4504 4505 /* Copy current usage data to stack to send to client */ 4506 usage = (zs_usage_t *)alloca(cache->zsuc_size); 4507 4508 /* Filter out results if caller is non-global zone */ 4509 zsd_usage_filter(zoneid, cache, usage, is_gz); 4510 4511 rvalp = (void *)usage; 4512 rvals = usage->zsu_size; 4513 zsd_usage_cache_rele(cache); 4514 4515 (void) door_return(rvalp, rvals, 0, NULL); 4516 thr_exit(NULL); 4517 } 4518 4519 static volatile boolean_t g_quit; 4520 4521 /* ARGSUSED */ 4522 static void 4523 zonestat_quithandler(int sig) 4524 { 4525 g_quit = B_TRUE; 4526 } 4527 4528 /* 4529 * The stat thread generates new utilization data when clients request 4530 * it. It also manages opening and closing the subsystems used to gather 4531 * data depending on if clients exist. 4532 */ 4533 /* ARGSUSED */ 4534 void * 4535 stat_thread(void *arg) 4536 { 4537 time_t start; 4538 time_t now; 4539 time_t next_memory; 4540 boolean_t do_memory; 4541 boolean_t do_read; 4542 boolean_t do_close; 4543 4544 start = time(NULL); 4545 if (start < 0) { 4546 if (g_quit == B_TRUE) 4547 goto quit; 4548 zsd_warn(gettext("Unable to fetch current time")); 4549 g_quit = B_TRUE; 4550 goto quit; 4551 } 4552 4553 next_memory = start; 4554 while (g_quit == B_FALSE) { 4555 for (;;) { 4556 /* 4557 * These are used to decide if the most recent memory 4558 * calculation was within a sample interval, 4559 * and weather or not the usage collection needs to 4560 * be opened or closed. 4561 */ 4562 do_memory = B_FALSE; 4563 do_read = B_FALSE; 4564 do_close = B_FALSE; 4565 4566 /* 4567 * If all clients have gone, close usage collecting 4568 */ 4569 (void) mutex_lock(&g_usage_cache_lock); 4570 if (!g_hasclient && g_open == B_TRUE) { 4571 do_close = B_TRUE; 4572 (void) mutex_unlock(&g_usage_cache_lock); 4573 break; 4574 } 4575 if (g_quit == B_TRUE) { 4576 (void) mutex_unlock( 4577 &g_usage_cache_lock); 4578 break; 4579 } 4580 /* 4581 * Wait for a usage data request 4582 */ 4583 if (g_usage_cache_kickers == 0) { 4584 (void) cond_wait(&g_usage_cache_kick, 4585 &g_usage_cache_lock); 4586 } 4587 now = time(NULL); 4588 if (now < 0) { 4589 if (g_quit == B_TRUE) { 4590 (void) mutex_unlock( 4591 &g_usage_cache_lock); 4592 goto quit; 4593 } 4594 g_quit = B_TRUE; 4595 (void) mutex_unlock(&g_usage_cache_lock); 4596 zsd_warn(gettext( 4597 "Unable to fetch current time")); 4598 goto quit; 4599 } 4600 if (g_hasclient) { 4601 do_read = B_TRUE; 4602 if (now >= next_memory) { 4603 do_memory = B_TRUE; 4604 next_memory = now + g_interval; 4605 } 4606 } else { 4607 do_close = B_TRUE; 4608 } 4609 (void) mutex_unlock(&g_usage_cache_lock); 4610 if (do_read || do_close) 4611 break; 4612 } 4613 g_now = now; 4614 g_hrnow = gethrtime(); 4615 if (g_hasclient && g_open == B_FALSE) { 4616 g_start = g_now; 4617 g_hrstart = g_hrnow; 4618 g_ctl = zsd_open(g_ctl); 4619 if (g_ctl == NULL) 4620 zsd_warn(gettext( 4621 "Unable to open zone statistics")); 4622 else 4623 g_open = B_TRUE; 4624 } 4625 if (do_read && g_ctl) { 4626 if (zsd_read(g_ctl, B_FALSE, do_memory) != 0) { 4627 zsd_warn(gettext( 4628 "Unable to read zone statistics")); 4629 g_quit = B_TRUE; 4630 return (NULL); 4631 } 4632 } 4633 (void) mutex_lock(&g_usage_cache_lock); 4634 if (!g_hasclient && g_open == B_TRUE && g_ctl) { 4635 (void) mutex_unlock(&g_usage_cache_lock); 4636 zsd_close(g_ctl); 4637 g_open = B_FALSE; 4638 } else { 4639 (void) mutex_unlock(&g_usage_cache_lock); 4640 } 4641 } 4642 quit: 4643 if (g_open) 4644 zsd_close(g_ctl); 4645 4646 (void) thr_kill(g_main, SIGINT); 4647 thr_exit(NULL); 4648 return (NULL); 4649 } 4650 4651 void 4652 zsd_set_fx() 4653 { 4654 pcinfo_t pcinfo; 4655 pcparms_t pcparms; 4656 4657 (void) strlcpy(pcinfo.pc_clname, "FX", sizeof (pcinfo.pc_clname)); 4658 if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) == -1) { 4659 zsd_warn(gettext("cannot get FX class parameters")); 4660 return; 4661 } 4662 pcparms.pc_cid = pcinfo.pc_cid; 4663 ((fxparms_t *)pcparms.pc_clparms)->fx_upri = 60; 4664 ((fxparms_t *)pcparms.pc_clparms)->fx_uprilim = 60; 4665 ((fxparms_t *)pcparms.pc_clparms)->fx_tqsecs = 0; 4666 ((fxparms_t *)pcparms.pc_clparms)->fx_tqnsecs = FX_NOCHANGE; 4667 if (priocntl(P_PID, getpid(), PC_SETPARMS, (caddr_t)&pcparms) == -1) 4668 zsd_warn(gettext("cannot enter the FX class")); 4669 } 4670 4671 static int pipe_fd; 4672 4673 static void 4674 daemonize_ready(char status) 4675 { 4676 /* 4677 * wake the parent with a clue 4678 */ 4679 (void) write(pipe_fd, &status, 1); 4680 (void) close(pipe_fd); 4681 } 4682 4683 static int 4684 daemonize_start(void) 4685 { 4686 char data; 4687 int status; 4688 4689 int filedes[2]; 4690 pid_t pid; 4691 4692 (void) close(0); 4693 (void) dup2(2, 1); 4694 4695 if (pipe(filedes) < 0) 4696 return (-1); 4697 4698 (void) fflush(NULL); 4699 4700 if ((pid = fork1()) < 0) 4701 return (-1); 4702 4703 if (pid != 0) { 4704 /* 4705 * parent 4706 */ 4707 struct sigaction act; 4708 4709 act.sa_sigaction = SIG_DFL; 4710 (void) sigemptyset(&act.sa_mask); 4711 act.sa_flags = 0; 4712 4713 (void) sigaction(SIGPIPE, &act, NULL); /* ignore SIGPIPE */ 4714 4715 (void) close(filedes[1]); 4716 if (read(filedes[0], &data, 1) == 1) { 4717 /* forward ready code via exit status */ 4718 exit(data); 4719 } 4720 status = -1; 4721 (void) wait4(pid, &status, 0, NULL); 4722 /* daemon process exited before becoming ready */ 4723 if (WIFEXITED(status)) { 4724 /* assume daemon process printed useful message */ 4725 exit(WEXITSTATUS(status)); 4726 } else { 4727 zsd_warn(gettext("daemon process killed or died")); 4728 exit(1); 4729 } 4730 } 4731 4732 /* 4733 * child 4734 */ 4735 pipe_fd = filedes[1]; 4736 (void) close(filedes[0]); 4737 4738 /* 4739 * generic Unix setup 4740 */ 4741 (void) setsid(); 4742 (void) umask(0000); 4743 4744 return (0); 4745 } 4746 4747 static void 4748 fattach_all_zones(boolean_t detach_only) 4749 { 4750 zoneid_t *zids; 4751 uint_t nzids, nzids_last; 4752 int i; 4753 4754 again: 4755 (void) zone_list(NULL, &nzids); 4756 nzids_last = nzids; 4757 zids = (zoneid_t *)malloc(sizeof (zoneid_t) * nzids_last); 4758 if (zids == NULL) 4759 zsd_error(gettext("Out of memory")); 4760 4761 (void) zone_list(zids, &nzids); 4762 if (nzids > nzids_last) { 4763 free(zids); 4764 goto again; 4765 } 4766 for (i = 0; i < nzids; i++) 4767 zsd_fattach_zone(zids[i], g_server_door, detach_only); 4768 4769 free(zids); 4770 } 4771 4772 int 4773 main(int argc, char *argv[]) 4774 { 4775 4776 int arg; 4777 thread_t tid; 4778 scf_simple_prop_t *prop; 4779 uint64_t *intervalp; 4780 boolean_t opt_cleanup = B_FALSE; 4781 4782 g_main = thr_self(); 4783 g_quit = B_FALSE; 4784 (void) signal(SIGINT, zonestat_quithandler); 4785 (void) signal(SIGTERM, zonestat_quithandler); 4786 (void) signal(SIGHUP, zonestat_quithandler); 4787 /* (void) sigignore(SIGCHLD); */ 4788 (void) sigignore(SIGPIPE); 4789 4790 if (getzoneid() != GLOBAL_ZONEID) 4791 zsd_error(gettext("Must be run from global zone only")); 4792 4793 while ((arg = getopt(argc, argv, "c")) 4794 != EOF) { 4795 switch (arg) { 4796 case 'c': 4797 opt_cleanup = B_TRUE; 4798 break; 4799 default: 4800 zsd_error(gettext("Invalid option")); 4801 } 4802 } 4803 4804 if (opt_cleanup) { 4805 if (zsd_disable_cpu_stats() != 0) 4806 exit(1); 4807 else 4808 exit(0); 4809 } 4810 4811 /* Get the configured sample interval */ 4812 prop = scf_simple_prop_get(NULL, "svc:/system/zones-monitoring:default", 4813 "config", "sample_interval"); 4814 if (prop == NULL) 4815 zsd_error(gettext("Unable to fetch SMF property " 4816 "\"config/sample_interval\"")); 4817 4818 if (scf_simple_prop_type(prop) != SCF_TYPE_COUNT) 4819 zsd_error(gettext("Malformed SMF property " 4820 "\"config/sample_interval\". Must be of type \"count\"")); 4821 4822 intervalp = scf_simple_prop_next_count(prop); 4823 g_interval = *intervalp; 4824 if (g_interval == 0) 4825 zsd_error(gettext("Malformed SMF property " 4826 "\"config/sample_interval\". Must be greater than zero")); 4827 4828 scf_simple_prop_free(prop); 4829 4830 if (daemonize_start() < 0) 4831 zsd_error(gettext("Unable to start daemon\n")); 4832 4833 /* Run at high priority */ 4834 zsd_set_fx(); 4835 4836 (void) mutex_init(&g_usage_cache_lock, USYNC_THREAD, NULL); 4837 (void) cond_init(&g_usage_cache_kick, USYNC_THREAD, NULL); 4838 (void) cond_init(&g_usage_cache_wait, USYNC_THREAD, NULL); 4839 4840 g_server_door = door_create(zsd_server, NULL, 4841 DOOR_REFUSE_DESC | DOOR_NO_CANCEL); 4842 if (g_server_door < 0) 4843 zsd_error(gettext("Unable to create server door\n")); 4844 4845 4846 g_stat_door = door_create(zsd_stat_server, NULL, DOOR_UNREF_MULTI | 4847 DOOR_REFUSE_DESC | DOOR_NO_CANCEL); 4848 if (g_stat_door < 0) 4849 zsd_error(gettext("Unable to create statistics door\n")); 4850 4851 fattach_all_zones(B_FALSE); 4852 4853 if (thr_create(NULL, 0, stat_thread, NULL, 0, &tid) != 0) 4854 zsd_error(gettext("Unable to create statistics thread\n")); 4855 4856 daemonize_ready(0); 4857 4858 /* Wait for signal to quit */ 4859 while (g_quit == B_FALSE) 4860 (void) pause(); 4861 4862 /* detach doors */ 4863 fattach_all_zones(B_TRUE); 4864 4865 (void) door_revoke(g_server_door); 4866 (void) door_revoke(g_stat_door); 4867 4868 /* kick stat thread and wait for it to close the statistics */ 4869 (void) mutex_lock(&g_usage_cache_lock); 4870 g_quit = B_TRUE; 4871 (void) cond_signal(&g_usage_cache_kick); 4872 (void) mutex_unlock(&g_usage_cache_lock); 4873 end: 4874 (void) thr_join(tid, NULL, NULL); 4875 return (0); 4876 } 4877