1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/exacct.h> 26 #include <sys/exacct_catalog.h> 27 #include <sys/disp.h> 28 #include <sys/task.h> 29 #include <sys/proc.h> 30 #include <sys/cmn_err.h> 31 #include <sys/kmem.h> 32 #include <sys/project.h> 33 #include <sys/systm.h> 34 #include <sys/vnode.h> 35 #include <sys/file.h> 36 #include <sys/acctctl.h> 37 #include <sys/time.h> 38 #include <sys/utsname.h> 39 #include <sys/session.h> 40 #include <sys/sysmacros.h> 41 #include <sys/bitmap.h> 42 #include <sys/msacct.h> 43 44 /* 45 * exacct usage and recording routines 46 * 47 * wracct(2), getacct(2), and the records written at process or task 48 * termination are constructed using the exacct_assemble_[task,proc]_usage() 49 * functions, which take a callback that takes the appropriate action on 50 * the packed exacct record for the task or process. For the process-related 51 * actions, we partition the routines such that the data collecting component 52 * can be performed while holding p_lock, and all sleeping or blocking 53 * operations can be performed without acquiring p_lock. 54 * 55 * putacct(2), which allows an application to construct a customized record 56 * associated with an existing process or task, has its own entry points: 57 * exacct_tag_task() and exacct_tag_proc(). 58 */ 59 60 taskq_t *exacct_queue; 61 kmem_cache_t *exacct_object_cache; 62 63 zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED; 64 65 static const uint32_t exacct_version = EXACCT_VERSION; 66 static const char exacct_header[] = "exacct"; 67 static const char exacct_creator[] = "SunOS"; 68 69 ea_object_t * 70 ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz) 71 { 72 ea_object_t *item; 73 74 item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 75 bzero(item, sizeof (ea_object_t)); 76 (void) ea_set_item(item, catalog, buf, bufsz); 77 return (item); 78 } 79 80 ea_object_t * 81 ea_alloc_group(ea_catalog_t catalog) 82 { 83 ea_object_t *group; 84 85 group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 86 bzero(group, sizeof (ea_object_t)); 87 (void) ea_set_group(group, catalog); 88 return (group); 89 } 90 91 ea_object_t * 92 ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog) 93 { 94 ea_object_t *item; 95 96 item = ea_alloc_item(catalog, buf, bufsz); 97 (void) ea_attach_to_group(grp, item); 98 return (item); 99 } 100 101 /* 102 * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract 103 * microstate accounting data and resource usage counters from one task_usage_t 104 * from those supplied in another. These functions do not operate on *all* 105 * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make 106 * sense. 107 */ 108 static void 109 exacct_add_task_mstate(task_usage_t *tu, task_usage_t *delta) 110 { 111 tu->tu_utime += delta->tu_utime; 112 tu->tu_stime += delta->tu_stime; 113 tu->tu_minflt += delta->tu_minflt; 114 tu->tu_majflt += delta->tu_majflt; 115 tu->tu_sndmsg += delta->tu_sndmsg; 116 tu->tu_rcvmsg += delta->tu_rcvmsg; 117 tu->tu_ioch += delta->tu_ioch; 118 tu->tu_iblk += delta->tu_iblk; 119 tu->tu_oblk += delta->tu_oblk; 120 tu->tu_vcsw += delta->tu_vcsw; 121 tu->tu_icsw += delta->tu_icsw; 122 tu->tu_nsig += delta->tu_nsig; 123 tu->tu_nswp += delta->tu_nswp; 124 tu->tu_nscl += delta->tu_nscl; 125 } 126 127 /* 128 * See the comments for exacct_add_task_mstate(), above. 129 */ 130 static void 131 exacct_sub_task_mstate(task_usage_t *tu, task_usage_t *delta) 132 { 133 tu->tu_utime -= delta->tu_utime; 134 tu->tu_stime -= delta->tu_stime; 135 tu->tu_minflt -= delta->tu_minflt; 136 tu->tu_majflt -= delta->tu_majflt; 137 tu->tu_sndmsg -= delta->tu_sndmsg; 138 tu->tu_rcvmsg -= delta->tu_rcvmsg; 139 tu->tu_ioch -= delta->tu_ioch; 140 tu->tu_iblk -= delta->tu_iblk; 141 tu->tu_oblk -= delta->tu_oblk; 142 tu->tu_vcsw -= delta->tu_vcsw; 143 tu->tu_icsw -= delta->tu_icsw; 144 tu->tu_nsig -= delta->tu_nsig; 145 tu->tu_nswp -= delta->tu_nswp; 146 tu->tu_nscl -= delta->tu_nscl; 147 } 148 149 /* 150 * Wrapper for vn_rdwr() used by exacct_vn_write() and exacct_write_header() 151 * to write to the accounting file without corrupting it in case of an I/O or 152 * filesystem error. 153 */ 154 static int 155 exacct_vn_write_impl(ac_info_t *info, void *buf, ssize_t bufsize) 156 { 157 int error; 158 ssize_t resid; 159 struct vattr va; 160 161 ASSERT(info != NULL); 162 ASSERT(info->ac_vnode != NULL); 163 ASSERT(MUTEX_HELD(&info->ac_lock)); 164 165 /* 166 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting 167 * the present accounting file. 168 */ 169 va.va_mask = AT_SIZE; 170 error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred, NULL); 171 if (error == 0) { 172 error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf, 173 bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T, 174 kcred, &resid); 175 if (error) { 176 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 177 } else if (resid != 0) { 178 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 179 error = ENOSPC; 180 } 181 } 182 return (error); 183 } 184 185 /* 186 * exacct_vn_write() safely writes to an accounting file. acctctl() prevents 187 * the two accounting vnodes from being equal, and the appropriate ac_lock is 188 * held across the call, so we're single threaded through this code for each 189 * file. 190 */ 191 static int 192 exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize) 193 { 194 int error; 195 196 if (info == NULL) 197 return (0); 198 199 mutex_enter(&info->ac_lock); 200 201 /* 202 * Don't do anything unless accounting file is set. 203 */ 204 if (info->ac_vnode == NULL) { 205 mutex_exit(&info->ac_lock); 206 return (0); 207 } 208 error = exacct_vn_write_impl(info, buf, bufsize); 209 mutex_exit(&info->ac_lock); 210 211 return (error); 212 } 213 214 /* 215 * void *exacct_create_header(size_t *) 216 * 217 * Overview 218 * exacct_create_header() constructs an exacct file header identifying the 219 * accounting file as the output of the kernel. exacct_create_header() and 220 * the static write_header() and verify_header() routines in libexacct must 221 * remain synchronized. 222 * 223 * Return values 224 * A pointer to a packed exacct buffer containing the appropriate header is 225 * returned; the size of the buffer is placed in the location indicated by 226 * sizep. 227 * 228 * Caller's context 229 * Suitable for KM_SLEEP allocations. 230 */ 231 void * 232 exacct_create_header(size_t *sizep) 233 { 234 ea_object_t *hdr_grp; 235 uint32_t bskip; 236 void *buf; 237 size_t bufsize; 238 239 hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER); 240 (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0, 241 EXT_UINT32 | EXC_DEFAULT | EXD_VERSION); 242 (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0, 243 EXT_STRING | EXC_DEFAULT | EXD_FILETYPE); 244 (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0, 245 EXT_STRING | EXC_DEFAULT | EXD_CREATOR); 246 (void) ea_attach_item(hdr_grp, uts_nodename(), 0, 247 EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME); 248 249 bufsize = ea_pack_object(hdr_grp, NULL, 0); 250 buf = kmem_alloc(bufsize, KM_SLEEP); 251 (void) ea_pack_object(hdr_grp, buf, bufsize); 252 ea_free_object(hdr_grp, EUP_ALLOC); 253 254 /* 255 * To prevent reading the header when reading the file backwards, 256 * set the large backskip of the header group to 0 (last 4 bytes). 257 */ 258 bskip = 0; 259 exacct_order32(&bskip); 260 bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip), 261 sizeof (bskip)); 262 263 *sizep = bufsize; 264 return (buf); 265 } 266 267 /* 268 * int exacct_write_header(ac_info_t *, void *, size_t) 269 * 270 * Overview 271 * exacct_write_header() writes the given header buffer to the indicated 272 * vnode. 273 * 274 * Return values 275 * The result of the write operation is returned. 276 * 277 * Caller's context 278 * Caller must hold the ac_lock of the appropriate accounting file 279 * information block (ac_info_t). 280 */ 281 int 282 exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize) 283 { 284 if (info != NULL && info->ac_vnode != NULL) 285 return (exacct_vn_write_impl(info, hdr, hdrsize)); 286 287 return (0); 288 } 289 290 static void 291 exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu, 292 task_usage_t **tu_buf) 293 { 294 task_usage_t *oldtu, *newtu; 295 task_usage_t **prevusage; 296 297 ASSERT(MUTEX_HELD(&tk->tk_usage_lock)); 298 if (getzoneid() != GLOBAL_ZONEID) { 299 prevusage = &tk->tk_zoneusage; 300 } else { 301 prevusage = &tk->tk_prevusage; 302 } 303 if ((oldtu = *prevusage) != NULL) { 304 /* 305 * In case we have any accounting information 306 * saved from the previous interval record. 307 */ 308 newtu = *tu_buf; 309 bcopy(tu, newtu, sizeof (task_usage_t)); 310 tu->tu_minflt -= oldtu->tu_minflt; 311 tu->tu_majflt -= oldtu->tu_majflt; 312 tu->tu_sndmsg -= oldtu->tu_sndmsg; 313 tu->tu_rcvmsg -= oldtu->tu_rcvmsg; 314 tu->tu_ioch -= oldtu->tu_ioch; 315 tu->tu_iblk -= oldtu->tu_iblk; 316 tu->tu_oblk -= oldtu->tu_oblk; 317 tu->tu_vcsw -= oldtu->tu_vcsw; 318 tu->tu_icsw -= oldtu->tu_icsw; 319 tu->tu_nsig -= oldtu->tu_nsig; 320 tu->tu_nswp -= oldtu->tu_nswp; 321 tu->tu_nscl -= oldtu->tu_nscl; 322 tu->tu_utime -= oldtu->tu_utime; 323 tu->tu_stime -= oldtu->tu_stime; 324 325 tu->tu_startsec = oldtu->tu_finishsec; 326 tu->tu_startnsec = oldtu->tu_finishnsec; 327 /* 328 * Copy the data from our temporary storage to the task's 329 * previous interval usage structure for future reference. 330 */ 331 bcopy(newtu, oldtu, sizeof (task_usage_t)); 332 } else { 333 /* 334 * Store current statistics in the task's previous interval 335 * usage structure for future references. 336 */ 337 *prevusage = *tu_buf; 338 bcopy(tu, *prevusage, sizeof (task_usage_t)); 339 *tu_buf = NULL; 340 } 341 } 342 343 static void 344 exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu) 345 { 346 timestruc_t ts; 347 proc_t *p; 348 349 ASSERT(MUTEX_HELD(&pidlock)); 350 351 if ((p = tk->tk_memb_list) == NULL) 352 return; 353 354 /* 355 * exacct_snapshot_task_usage() provides an approximate snapshot of the 356 * usage of the potentially many members of the task. Since we don't 357 * guarantee exactness, we don't acquire the p_lock of any of the member 358 * processes. 359 */ 360 do { 361 mutex_enter(&p->p_lock); 362 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 363 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 364 mutex_exit(&p->p_lock); 365 tu->tu_minflt += p->p_ru.minflt; 366 tu->tu_majflt += p->p_ru.majflt; 367 tu->tu_sndmsg += p->p_ru.msgsnd; 368 tu->tu_rcvmsg += p->p_ru.msgrcv; 369 tu->tu_ioch += p->p_ru.ioch; 370 tu->tu_iblk += p->p_ru.inblock; 371 tu->tu_oblk += p->p_ru.oublock; 372 tu->tu_vcsw += p->p_ru.nvcsw; 373 tu->tu_icsw += p->p_ru.nivcsw; 374 tu->tu_nsig += p->p_ru.nsignals; 375 tu->tu_nswp += p->p_ru.nswap; 376 tu->tu_nscl += p->p_ru.sysc; 377 } while ((p = p->p_tasknext) != tk->tk_memb_list); 378 379 /* 380 * The resource usage accounted for so far will include that 381 * contributed by the task's first process. If this process 382 * came from another task, then its accumulated resource usage 383 * will include a contribution from work performed there. 384 * We must therefore subtract any resource usage that was 385 * inherited with the first process. 386 */ 387 exacct_sub_task_mstate(tu, tk->tk_inherited); 388 389 gethrestime(&ts); 390 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 391 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 392 } 393 394 /* 395 * void exacct_update_task_mstate(proc_t *) 396 * 397 * Overview 398 * exacct_update_task_mstate() updates the task usage; it is intended 399 * to be called from proc_exit(). 400 * 401 * Return values 402 * None. 403 * 404 * Caller's context 405 * p_lock must be held at entry. 406 */ 407 void 408 exacct_update_task_mstate(proc_t *p) 409 { 410 task_usage_t *tu; 411 412 mutex_enter(&p->p_task->tk_usage_lock); 413 tu = p->p_task->tk_usage; 414 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 415 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 416 tu->tu_minflt += p->p_ru.minflt; 417 tu->tu_majflt += p->p_ru.majflt; 418 tu->tu_sndmsg += p->p_ru.msgsnd; 419 tu->tu_rcvmsg += p->p_ru.msgrcv; 420 tu->tu_ioch += p->p_ru.ioch; 421 tu->tu_iblk += p->p_ru.inblock; 422 tu->tu_oblk += p->p_ru.oublock; 423 tu->tu_vcsw += p->p_ru.nvcsw; 424 tu->tu_icsw += p->p_ru.nivcsw; 425 tu->tu_nsig += p->p_ru.nsignals; 426 tu->tu_nswp += p->p_ru.nswap; 427 tu->tu_nscl += p->p_ru.sysc; 428 mutex_exit(&p->p_task->tk_usage_lock); 429 } 430 431 static void 432 exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag) 433 { 434 timestruc_t ts; 435 task_usage_t *tu_buf; 436 437 switch (flag) { 438 case EW_PARTIAL: 439 /* 440 * For partial records we must report the sum of current 441 * accounting statistics with previously accumulated 442 * statistics. 443 */ 444 mutex_enter(&pidlock); 445 mutex_enter(&tk->tk_usage_lock); 446 447 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 448 exacct_snapshot_task_usage(tk, tu); 449 450 mutex_exit(&tk->tk_usage_lock); 451 mutex_exit(&pidlock); 452 break; 453 case EW_INTERVAL: 454 /* 455 * We need to allocate spare task_usage_t buffer before 456 * grabbing pidlock because we might need it later in 457 * exacct_get_interval_task_usage(). 458 */ 459 tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 460 mutex_enter(&pidlock); 461 mutex_enter(&tk->tk_usage_lock); 462 463 /* 464 * For interval records, we deduct the previous microstate 465 * accounting data and cpu usage times from previously saved 466 * results and update the previous task usage structure. 467 */ 468 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 469 exacct_snapshot_task_usage(tk, tu); 470 exacct_get_interval_task_usage(tk, tu, &tu_buf); 471 472 mutex_exit(&tk->tk_usage_lock); 473 mutex_exit(&pidlock); 474 475 if (tu_buf != NULL) 476 kmem_free(tu_buf, sizeof (task_usage_t)); 477 break; 478 case EW_FINAL: 479 /* 480 * For final records, we deduct, from the task's current 481 * usage, any usage that was inherited with the arrival 482 * of a process from a previous task. We then record 483 * the task's finish time. 484 */ 485 mutex_enter(&tk->tk_usage_lock); 486 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 487 exacct_sub_task_mstate(tu, tk->tk_inherited); 488 mutex_exit(&tk->tk_usage_lock); 489 490 gethrestime(&ts); 491 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 492 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 493 494 break; 495 } 496 } 497 498 static int 499 exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record, 500 int res) 501 { 502 int attached = 1; 503 504 switch (res) { 505 case AC_TASK_TASKID: 506 (void) ea_attach_item(record, &tk->tk_tkid, 507 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID); 508 break; 509 case AC_TASK_PROJID: 510 (void) ea_attach_item(record, &tk->tk_proj->kpj_id, 511 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID); 512 break; 513 case AC_TASK_CPU: { 514 timestruc_t ts; 515 uint64_t ui; 516 517 hrt2ts(tu->tu_stime, &ts); 518 ui = ts.tv_sec; 519 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 520 EXT_UINT64 | EXD_TASK_CPU_SYS_SEC); 521 ui = ts.tv_nsec; 522 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 523 EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC); 524 525 hrt2ts(tu->tu_utime, &ts); 526 ui = ts.tv_sec; 527 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 528 EXT_UINT64 | EXD_TASK_CPU_USER_SEC); 529 ui = ts.tv_nsec; 530 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 531 EXT_UINT64 | EXD_TASK_CPU_USER_NSEC); 532 } 533 break; 534 case AC_TASK_TIME: 535 (void) ea_attach_item(record, &tu->tu_startsec, 536 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC); 537 (void) ea_attach_item(record, &tu->tu_startnsec, 538 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC); 539 (void) ea_attach_item(record, &tu->tu_finishsec, 540 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC); 541 (void) ea_attach_item(record, &tu->tu_finishnsec, 542 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC); 543 break; 544 case AC_TASK_HOSTNAME: 545 (void) ea_attach_item(record, tk->tk_zone->zone_nodename, 546 strlen(tk->tk_zone->zone_nodename) + 1, 547 EXT_STRING | EXD_TASK_HOSTNAME); 548 break; 549 case AC_TASK_MICROSTATE: 550 (void) ea_attach_item(record, &tu->tu_majflt, 551 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR); 552 (void) ea_attach_item(record, &tu->tu_minflt, 553 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR); 554 (void) ea_attach_item(record, &tu->tu_sndmsg, 555 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND); 556 (void) ea_attach_item(record, &tu->tu_rcvmsg, 557 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV); 558 (void) ea_attach_item(record, &tu->tu_iblk, 559 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN); 560 (void) ea_attach_item(record, &tu->tu_oblk, 561 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT); 562 (void) ea_attach_item(record, &tu->tu_ioch, 563 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR); 564 (void) ea_attach_item(record, &tu->tu_vcsw, 565 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL); 566 (void) ea_attach_item(record, &tu->tu_icsw, 567 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV); 568 (void) ea_attach_item(record, &tu->tu_nsig, 569 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS); 570 (void) ea_attach_item(record, &tu->tu_nswp, 571 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS); 572 (void) ea_attach_item(record, &tu->tu_nscl, 573 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS); 574 break; 575 case AC_TASK_ANCTASKID: 576 (void) ea_attach_item(record, &tu->tu_anctaskid, 577 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID); 578 break; 579 case AC_TASK_ZONENAME: 580 (void) ea_attach_item(record, tk->tk_zone->zone_name, 581 strlen(tk->tk_zone->zone_name) + 1, 582 EXT_STRING | EXD_TASK_ZONENAME); 583 break; 584 default: 585 attached = 0; 586 } 587 return (attached); 588 } 589 590 static ea_object_t * 591 exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask, 592 ea_catalog_t record_type) 593 { 594 int res, count; 595 ea_object_t *record; 596 597 /* 598 * Assemble usage values into group. 599 */ 600 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 601 for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++) 602 if (BT_TEST(mask, res)) 603 count += exacct_attach_task_item(tk, tu, record, res); 604 if (count == 0) { 605 ea_free_object(record, EUP_ALLOC); 606 record = NULL; 607 } 608 return (record); 609 } 610 611 /* 612 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *, 613 * size_t, size_t *), void *, size_t, size_t *, int) 614 * 615 * Overview 616 * exacct_assemble_task_usage() builds the packed exacct buffer for the 617 * indicated task, executes the given callback function, and free the packed 618 * buffer. 619 * 620 * Return values 621 * Returns 0 on success; otherwise the appropriate error code is returned. 622 * 623 * Caller's context 624 * Suitable for KM_SLEEP allocations. 625 */ 626 int 627 exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk, 628 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 629 void *ubuf, size_t ubufsize, size_t *actual, int flag) 630 { 631 ulong_t mask[AC_MASK_SZ]; 632 ea_object_t *task_record; 633 ea_catalog_t record_type; 634 task_usage_t *tu; 635 void *buf; 636 size_t bufsize; 637 int ret; 638 639 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL); 640 641 mutex_enter(&ac_task->ac_lock); 642 if (ac_task->ac_state == AC_OFF) { 643 mutex_exit(&ac_task->ac_lock); 644 return (ENOTACTIVE); 645 } 646 bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ); 647 mutex_exit(&ac_task->ac_lock); 648 649 switch (flag) { 650 case EW_FINAL: 651 record_type = EXD_GROUP_TASK; 652 break; 653 case EW_PARTIAL: 654 record_type = EXD_GROUP_TASK_PARTIAL; 655 break; 656 case EW_INTERVAL: 657 record_type = EXD_GROUP_TASK_INTERVAL; 658 break; 659 default: 660 return (0); 661 } 662 663 /* 664 * Calculate task usage and assemble it into the task record. 665 */ 666 tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 667 exacct_calculate_task_usage(tk, tu, flag); 668 task_record = exacct_assemble_task_record(tk, tu, mask, record_type); 669 if (task_record == NULL) { 670 /* 671 * The current configuration of the accounting system has 672 * resulted in records with no data; accordingly, we don't write 673 * these, but we return success. 674 */ 675 kmem_free(tu, sizeof (task_usage_t)); 676 return (0); 677 } 678 679 /* 680 * Pack object into buffer and run callback on it. 681 */ 682 bufsize = ea_pack_object(task_record, NULL, 0); 683 buf = kmem_alloc(bufsize, KM_SLEEP); 684 (void) ea_pack_object(task_record, buf, bufsize); 685 ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual); 686 687 /* 688 * Free all previously allocated structures. 689 */ 690 kmem_free(buf, bufsize); 691 ea_free_object(task_record, EUP_ALLOC); 692 kmem_free(tu, sizeof (task_usage_t)); 693 return (ret); 694 } 695 696 /* 697 * void exacct_commit_task(void *) 698 * 699 * Overview 700 * exacct_commit_task() calculates the final usage for a task, updating the 701 * task usage if task accounting is active, and writing a task record if task 702 * accounting is active. exacct_commit_task() is intended for being called 703 * from a task queue (taskq_t). 704 * 705 * Return values 706 * None. 707 * 708 * Caller's context 709 * Suitable for KM_SLEEP allocations. 710 */ 711 712 void 713 exacct_commit_task(void *arg) 714 { 715 task_t *tk = (task_t *)arg; 716 size_t size; 717 zone_t *zone = tk->tk_zone; 718 struct exacct_globals *acg; 719 720 ASSERT(tk != task0p); 721 ASSERT(tk->tk_memb_list == NULL); 722 723 /* 724 * Don't do any extra work if the acctctl module isn't loaded. 725 * If acctctl module is loaded when zone is in down state then 726 * zone_getspecific can return NULL for that zone. 727 */ 728 if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) { 729 acg = zone_getspecific(exacct_zone_key, zone); 730 if (acg == NULL) 731 goto err; 732 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 733 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 734 if (tk->tk_zone != global_zone) { 735 acg = zone_getspecific(exacct_zone_key, global_zone); 736 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 737 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 738 } 739 } 740 /* 741 * Release associated project and finalize task. 742 */ 743 err: 744 task_end(tk); 745 } 746 747 static int 748 exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res) 749 { 750 int attached = 1; 751 752 switch (res) { 753 case AC_PROC_PID: 754 (void) ea_attach_item(record, &pu->pu_pid, 755 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID); 756 break; 757 case AC_PROC_UID: 758 (void) ea_attach_item(record, &pu->pu_ruid, 759 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID); 760 break; 761 case AC_PROC_FLAG: 762 (void) ea_attach_item(record, &pu->pu_acflag, 763 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS); 764 break; 765 case AC_PROC_GID: 766 (void) ea_attach_item(record, &pu->pu_rgid, 767 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID); 768 break; 769 case AC_PROC_PROJID: 770 (void) ea_attach_item(record, &pu->pu_projid, 771 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID); 772 break; 773 case AC_PROC_TASKID: 774 (void) ea_attach_item(record, &pu->pu_taskid, 775 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID); 776 break; 777 case AC_PROC_CPU: 778 (void) ea_attach_item(record, &pu->pu_utimesec, 779 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC); 780 (void) ea_attach_item(record, &pu->pu_utimensec, 781 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC); 782 (void) ea_attach_item(record, &pu->pu_stimesec, 783 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC); 784 (void) ea_attach_item(record, &pu->pu_stimensec, 785 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC); 786 break; 787 case AC_PROC_TIME: 788 (void) ea_attach_item(record, &pu->pu_startsec, 789 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC); 790 (void) ea_attach_item(record, &pu->pu_startnsec, 791 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC); 792 (void) ea_attach_item(record, &pu->pu_finishsec, 793 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC); 794 (void) ea_attach_item(record, &pu->pu_finishnsec, 795 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC); 796 break; 797 case AC_PROC_COMMAND: 798 (void) ea_attach_item(record, pu->pu_command, 799 strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND); 800 break; 801 case AC_PROC_HOSTNAME: 802 (void) ea_attach_item(record, pu->pu_nodename, 803 strlen(pu->pu_nodename) + 1, 804 EXT_STRING | EXD_PROC_HOSTNAME); 805 break; 806 case AC_PROC_TTY: 807 (void) ea_attach_item(record, &pu->pu_major, 808 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR); 809 (void) ea_attach_item(record, &pu->pu_minor, 810 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR); 811 break; 812 case AC_PROC_MICROSTATE: 813 (void) ea_attach_item(record, &pu->pu_majflt, 814 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR); 815 (void) ea_attach_item(record, &pu->pu_minflt, 816 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR); 817 (void) ea_attach_item(record, &pu->pu_sndmsg, 818 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND); 819 (void) ea_attach_item(record, &pu->pu_rcvmsg, 820 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV); 821 (void) ea_attach_item(record, &pu->pu_iblk, 822 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN); 823 (void) ea_attach_item(record, &pu->pu_oblk, 824 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT); 825 (void) ea_attach_item(record, &pu->pu_ioch, 826 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR); 827 (void) ea_attach_item(record, &pu->pu_vcsw, 828 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL); 829 (void) ea_attach_item(record, &pu->pu_icsw, 830 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV); 831 (void) ea_attach_item(record, &pu->pu_nsig, 832 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS); 833 (void) ea_attach_item(record, &pu->pu_nswp, 834 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS); 835 (void) ea_attach_item(record, &pu->pu_nscl, 836 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS); 837 break; 838 case AC_PROC_ANCPID: 839 (void) ea_attach_item(record, &pu->pu_ancpid, 840 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID); 841 break; 842 case AC_PROC_WAIT_STATUS: 843 (void) ea_attach_item(record, &pu->pu_wstat, 844 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS); 845 break; 846 case AC_PROC_ZONENAME: 847 (void) ea_attach_item(record, pu->pu_zonename, 848 strlen(pu->pu_zonename) + 1, 849 EXT_STRING | EXD_PROC_ZONENAME); 850 break; 851 case AC_PROC_MEM: 852 (void) ea_attach_item(record, &pu->pu_mem_rss_avg, 853 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K); 854 (void) ea_attach_item(record, &pu->pu_mem_rss_max, 855 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K); 856 break; 857 default: 858 attached = 0; 859 } 860 return (attached); 861 } 862 863 static ea_object_t * 864 exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask, 865 ea_catalog_t record_type) 866 { 867 int res, count; 868 ea_object_t *record; 869 870 /* 871 * Assemble usage values into group. 872 */ 873 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 874 for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++) 875 if (BT_TEST(mask, res)) 876 count += exacct_attach_proc_item(pu, record, res); 877 if (count == 0) { 878 ea_free_object(record, EUP_ALLOC); 879 record = NULL; 880 } 881 return (record); 882 } 883 884 /* 885 * The following two routines assume that process's p_lock is held or 886 * exacct_commit_proc has been called from exit() when all lwps are stopped. 887 */ 888 static void 889 exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu) 890 { 891 kthread_t *t; 892 893 ASSERT(MUTEX_HELD(&p->p_lock)); 894 if ((t = p->p_tlist) == NULL) 895 return; 896 897 do { 898 pu->pu_minflt += t->t_lwp->lwp_ru.minflt; 899 pu->pu_majflt += t->t_lwp->lwp_ru.majflt; 900 pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd; 901 pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv; 902 pu->pu_ioch += t->t_lwp->lwp_ru.ioch; 903 pu->pu_iblk += t->t_lwp->lwp_ru.inblock; 904 pu->pu_oblk += t->t_lwp->lwp_ru.oublock; 905 pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw; 906 pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw; 907 pu->pu_nsig += t->t_lwp->lwp_ru.nsignals; 908 pu->pu_nswp += t->t_lwp->lwp_ru.nswap; 909 pu->pu_nscl += t->t_lwp->lwp_ru.sysc; 910 } while ((t = t->t_forw) != p->p_tlist); 911 } 912 913 static void 914 exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu) 915 { 916 pu->pu_minflt = p->p_ru.minflt; 917 pu->pu_majflt = p->p_ru.majflt; 918 pu->pu_sndmsg = p->p_ru.msgsnd; 919 pu->pu_rcvmsg = p->p_ru.msgrcv; 920 pu->pu_ioch = p->p_ru.ioch; 921 pu->pu_iblk = p->p_ru.inblock; 922 pu->pu_oblk = p->p_ru.oublock; 923 pu->pu_vcsw = p->p_ru.nvcsw; 924 pu->pu_icsw = p->p_ru.nivcsw; 925 pu->pu_nsig = p->p_ru.nsignals; 926 pu->pu_nswp = p->p_ru.nswap; 927 pu->pu_nscl = p->p_ru.sysc; 928 } 929 930 void 931 exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask, 932 int flag, int wstat) 933 { 934 timestruc_t ts, ts_run; 935 936 ASSERT(MUTEX_HELD(&p->p_lock)); 937 938 /* 939 * Convert CPU and execution times to sec/nsec format. 940 */ 941 if (BT_TEST(mask, AC_PROC_CPU)) { 942 hrt2ts(mstate_aggr_state(p, LMS_USER), &ts); 943 pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec; 944 pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec; 945 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts); 946 pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec; 947 pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec; 948 } 949 if (BT_TEST(mask, AC_PROC_TIME)) { 950 gethrestime(&ts); 951 pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 952 pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 953 hrt2ts(gethrtime() - p->p_mstart, &ts_run); 954 ts.tv_sec -= ts_run.tv_sec; 955 ts.tv_nsec -= ts_run.tv_nsec; 956 if (ts.tv_nsec < 0) { 957 ts.tv_sec--; 958 if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) { 959 ts.tv_sec++; 960 ts.tv_nsec -= NANOSEC; 961 } 962 } 963 pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec; 964 pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec; 965 } 966 967 pu->pu_pid = p->p_pidp->pid_id; 968 pu->pu_acflag = p->p_user.u_acflag; 969 pu->pu_projid = p->p_task->tk_proj->kpj_id; 970 pu->pu_taskid = p->p_task->tk_tkid; 971 pu->pu_major = getmajor(p->p_sessp->s_dev); 972 pu->pu_minor = getminor(p->p_sessp->s_dev); 973 pu->pu_ancpid = p->p_ancpid; 974 pu->pu_wstat = wstat; 975 /* 976 * Compute average RSS in K. The denominator is the number of 977 * samples: the number of clock ticks plus the initial value. 978 */ 979 pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) * 980 (PAGESIZE / 1024); 981 pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024); 982 983 mutex_enter(&p->p_crlock); 984 pu->pu_ruid = crgetruid(p->p_cred); 985 pu->pu_rgid = crgetrgid(p->p_cred); 986 mutex_exit(&p->p_crlock); 987 988 bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1); 989 bcopy(p->p_zone->zone_name, pu->pu_zonename, 990 strlen(p->p_zone->zone_name) + 1); 991 bcopy(p->p_zone->zone_nodename, pu->pu_nodename, 992 strlen(p->p_zone->zone_nodename) + 1); 993 994 /* 995 * Calculate microstate accounting data for a process that is still 996 * running. Presently, we explicitly collect all of the LWP usage into 997 * the proc usage structure here. 998 */ 999 if (flag & EW_PARTIAL) 1000 exacct_calculate_proc_mstate(p, pu); 1001 if (flag & EW_FINAL) 1002 exacct_copy_proc_mstate(p, pu); 1003 } 1004 1005 /* 1006 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void 1007 * *, size_t, size_t *), void *, size_t, size_t *) 1008 * 1009 * Overview 1010 * Assemble record with miscellaneous accounting information about the process 1011 * and execute the callback on it. It is the callback's job to set "actual" to 1012 * the size of record. 1013 * 1014 * Return values 1015 * The result of the callback function, unless the extended process accounting 1016 * feature is not active, in which case ENOTACTIVE is returned. 1017 * 1018 * Caller's context 1019 * Suitable for KM_SLEEP allocations. 1020 */ 1021 int 1022 exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu, 1023 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1024 void *ubuf, size_t ubufsize, size_t *actual, int flag) 1025 { 1026 ulong_t mask[AC_MASK_SZ]; 1027 ea_object_t *proc_record; 1028 ea_catalog_t record_type; 1029 void *buf; 1030 size_t bufsize; 1031 int ret; 1032 1033 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL); 1034 1035 mutex_enter(&ac_proc->ac_lock); 1036 if (ac_proc->ac_state == AC_OFF) { 1037 mutex_exit(&ac_proc->ac_lock); 1038 return (ENOTACTIVE); 1039 } 1040 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1041 mutex_exit(&ac_proc->ac_lock); 1042 1043 switch (flag) { 1044 case EW_FINAL: 1045 record_type = EXD_GROUP_PROC; 1046 break; 1047 case EW_PARTIAL: 1048 record_type = EXD_GROUP_PROC_PARTIAL; 1049 break; 1050 default: 1051 record_type = EXD_NONE; 1052 break; 1053 } 1054 1055 proc_record = exacct_assemble_proc_record(pu, mask, record_type); 1056 if (proc_record == NULL) 1057 return (0); 1058 1059 /* 1060 * Pack object into buffer and pass to callback. 1061 */ 1062 bufsize = ea_pack_object(proc_record, NULL, 0); 1063 buf = kmem_alloc(bufsize, KM_SLEEP); 1064 (void) ea_pack_object(proc_record, buf, bufsize); 1065 1066 ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual); 1067 1068 /* 1069 * Free all previously allocations. 1070 */ 1071 kmem_free(buf, bufsize); 1072 ea_free_object(proc_record, EUP_ALLOC); 1073 return (ret); 1074 } 1075 1076 /* 1077 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t, 1078 * size_t *) 1079 * 1080 * Overview 1081 * exacct_commit_callback() writes the indicated buffer to the indicated 1082 * extended accounting file. 1083 * 1084 * Return values 1085 * The result of the write operation is returned. "actual" is updated to 1086 * contain the number of bytes actually written. 1087 * 1088 * Caller's context 1089 * Suitable for a vn_rdwr() operation. 1090 */ 1091 /*ARGSUSED*/ 1092 int 1093 exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize, 1094 void *buf, size_t bufsize, size_t *actual) 1095 { 1096 int error = 0; 1097 1098 *actual = 0; 1099 if ((error = exacct_vn_write(info, buf, bufsize)) == 0) 1100 *actual = bufsize; 1101 return (error); 1102 } 1103 1104 static void 1105 exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat) 1106 { 1107 size_t size; 1108 proc_usage_t *pu; 1109 ulong_t mask[AC_MASK_SZ]; 1110 1111 mutex_enter(&ac_proc->ac_lock); 1112 if (ac_proc->ac_state == AC_ON) { 1113 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1114 mutex_exit(&ac_proc->ac_lock); 1115 } else { 1116 mutex_exit(&ac_proc->ac_lock); 1117 return; 1118 } 1119 1120 mutex_enter(&p->p_lock); 1121 size = strlen(p->p_user.u_comm) + 1; 1122 mutex_exit(&p->p_lock); 1123 1124 pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP); 1125 pu->pu_command = kmem_alloc(size, KM_SLEEP); 1126 mutex_enter(&p->p_lock); 1127 exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat); 1128 mutex_exit(&p->p_lock); 1129 1130 (void) exacct_assemble_proc_usage(ac_proc, pu, 1131 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 1132 1133 kmem_free(pu->pu_command, strlen(pu->pu_command) + 1); 1134 kmem_free(pu, sizeof (proc_usage_t)); 1135 } 1136 1137 /* 1138 * void exacct_commit_proc(proc_t *, int) 1139 * 1140 * Overview 1141 * exacct_commit_proc() calculates the final usage for a process, updating the 1142 * task usage if task accounting is active, and writing a process record if 1143 * process accounting is active. exacct_commit_proc() is intended for being 1144 * called from proc_exit(). 1145 * 1146 * Return values 1147 * None. 1148 * 1149 * Caller's context 1150 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry. 1151 */ 1152 void 1153 exacct_commit_proc(proc_t *p, int wstat) 1154 { 1155 zone_t *zone = p->p_zone; 1156 struct exacct_globals *acg, *gacg = NULL; 1157 1158 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1159 /* 1160 * acctctl module not loaded. Nothing to do. 1161 */ 1162 return; 1163 } 1164 1165 /* 1166 * If acctctl module is loaded when zone is in down state then 1167 * zone_getspecific can return NULL for that zone. 1168 */ 1169 acg = zone_getspecific(exacct_zone_key, zone); 1170 if (acg == NULL) 1171 return; 1172 exacct_do_commit_proc(&acg->ac_proc, p, wstat); 1173 if (zone != global_zone) { 1174 gacg = zone_getspecific(exacct_zone_key, global_zone); 1175 exacct_do_commit_proc(&gacg->ac_proc, p, wstat); 1176 } 1177 } 1178 1179 static int 1180 exacct_attach_netstat_item(net_stat_t *ns, ea_object_t *record, int res) 1181 { 1182 int attached = 1; 1183 1184 switch (res) { 1185 case AC_NET_NAME: 1186 (void) ea_attach_item(record, ns->ns_name, 1187 strlen(ns->ns_name) + 1, EXT_STRING | EXD_NET_STATS_NAME); 1188 break; 1189 case AC_NET_CURTIME: 1190 { 1191 uint64_t now; 1192 timestruc_t ts; 1193 1194 gethrestime(&ts); 1195 now = (uint64_t)(ulong_t)ts.tv_sec; 1196 (void) ea_attach_item(record, &now, sizeof (uint64_t), 1197 EXT_UINT64 | EXD_NET_STATS_CURTIME); 1198 } 1199 break; 1200 case AC_NET_IBYTES: 1201 (void) ea_attach_item(record, &ns->ns_ibytes, 1202 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IBYTES); 1203 break; 1204 case AC_NET_OBYTES: 1205 (void) ea_attach_item(record, &ns->ns_obytes, 1206 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OBYTES); 1207 break; 1208 case AC_NET_IPKTS: 1209 (void) ea_attach_item(record, &ns->ns_ipackets, 1210 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IPKTS); 1211 break; 1212 case AC_NET_OPKTS: 1213 (void) ea_attach_item(record, &ns->ns_opackets, 1214 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OPKTS); 1215 break; 1216 case AC_NET_IERRPKTS: 1217 (void) ea_attach_item(record, &ns->ns_ierrors, 1218 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IERRPKTS); 1219 break; 1220 case AC_NET_OERRPKTS: 1221 (void) ea_attach_item(record, &ns->ns_oerrors, 1222 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OERRPKTS); 1223 break; 1224 default: 1225 attached = 0; 1226 } 1227 return (attached); 1228 } 1229 1230 static int 1231 exacct_attach_netdesc_item(net_desc_t *nd, ea_object_t *record, int res) 1232 { 1233 int attached = 1; 1234 1235 switch (res) { 1236 case AC_NET_NAME: 1237 (void) ea_attach_item(record, nd->nd_name, 1238 strlen(nd->nd_name) + 1, EXT_STRING | EXD_NET_DESC_NAME); 1239 break; 1240 case AC_NET_DEVNAME: 1241 (void) ea_attach_item(record, nd->nd_devname, 1242 strlen(nd->nd_devname) + 1, EXT_STRING | 1243 EXD_NET_DESC_DEVNAME); 1244 break; 1245 case AC_NET_EHOST: 1246 (void) ea_attach_item(record, &nd->nd_ehost, 1247 sizeof (nd->nd_ehost), EXT_RAW | EXD_NET_DESC_EHOST); 1248 break; 1249 case AC_NET_EDEST: 1250 (void) ea_attach_item(record, &nd->nd_edest, 1251 sizeof (nd->nd_edest), EXT_RAW | EXD_NET_DESC_EDEST); 1252 break; 1253 case AC_NET_VLAN_TPID: 1254 (void) ea_attach_item(record, &nd->nd_vlan_tpid, 1255 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TPID); 1256 break; 1257 case AC_NET_VLAN_TCI: 1258 (void) ea_attach_item(record, &nd->nd_vlan_tci, 1259 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TCI); 1260 break; 1261 case AC_NET_SAP: 1262 (void) ea_attach_item(record, &nd->nd_sap, 1263 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_SAP); 1264 break; 1265 case AC_NET_PRIORITY: 1266 (void) ea_attach_item(record, &nd->nd_priority, 1267 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_PRIORITY); 1268 break; 1269 case AC_NET_BWLIMIT: 1270 (void) ea_attach_item(record, &nd->nd_bw_limit, 1271 sizeof (uint64_t), EXT_UINT64 | EXD_NET_DESC_BWLIMIT); 1272 break; 1273 case AC_NET_SADDR: 1274 if (nd->nd_isv4) { 1275 (void) ea_attach_item(record, &nd->nd_saddr[3], 1276 sizeof (uint32_t), EXT_UINT32 | 1277 EXD_NET_DESC_V4SADDR); 1278 } else { 1279 (void) ea_attach_item(record, &nd->nd_saddr, 1280 sizeof (nd->nd_saddr), EXT_RAW | 1281 EXD_NET_DESC_V6SADDR); 1282 } 1283 break; 1284 case AC_NET_DADDR: 1285 if (nd->nd_isv4) { 1286 (void) ea_attach_item(record, &nd->nd_daddr[3], 1287 sizeof (uint32_t), EXT_UINT32 | 1288 EXD_NET_DESC_V4DADDR); 1289 } else { 1290 (void) ea_attach_item(record, &nd->nd_daddr, 1291 sizeof (nd->nd_daddr), EXT_RAW | 1292 EXD_NET_DESC_V6DADDR); 1293 } 1294 break; 1295 case AC_NET_SPORT: 1296 (void) ea_attach_item(record, &nd->nd_sport, 1297 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_SPORT); 1298 break; 1299 case AC_NET_DPORT: 1300 (void) ea_attach_item(record, &nd->nd_dport, 1301 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_DPORT); 1302 break; 1303 case AC_NET_PROTOCOL: 1304 (void) ea_attach_item(record, &nd->nd_protocol, 1305 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_PROTOCOL); 1306 break; 1307 case AC_NET_DSFIELD: 1308 (void) ea_attach_item(record, &nd->nd_dsfield, 1309 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_DSFIELD); 1310 break; 1311 default: 1312 attached = 0; 1313 } 1314 return (attached); 1315 } 1316 1317 static ea_object_t * 1318 exacct_assemble_net_record(void *ninfo, ulong_t *mask, ea_catalog_t record_type, 1319 int what) 1320 { 1321 int res; 1322 int count; 1323 ea_object_t *record; 1324 1325 /* 1326 * Assemble usage values into group. 1327 */ 1328 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1329 for (res = 1, count = 0; res <= AC_NET_MAX_RES; res++) 1330 if (BT_TEST(mask, res)) { 1331 if (what == EX_NET_LNDESC_REC || 1332 what == EX_NET_FLDESC_REC) { 1333 count += exacct_attach_netdesc_item( 1334 (net_desc_t *)ninfo, record, res); 1335 } else { 1336 count += exacct_attach_netstat_item( 1337 (net_stat_t *)ninfo, record, res); 1338 } 1339 } 1340 if (count == 0) { 1341 ea_free_object(record, EUP_ALLOC); 1342 record = NULL; 1343 } 1344 return (record); 1345 } 1346 1347 int 1348 exacct_assemble_net_usage(ac_info_t *ac_net, void *ninfo, 1349 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1350 void *ubuf, size_t ubufsize, size_t *actual, int what) 1351 { 1352 ulong_t mask[AC_MASK_SZ]; 1353 ea_object_t *net_desc; 1354 ea_catalog_t record_type; 1355 void *buf; 1356 size_t bufsize; 1357 int ret; 1358 1359 mutex_enter(&ac_net->ac_lock); 1360 if (ac_net->ac_state == AC_OFF) { 1361 mutex_exit(&ac_net->ac_lock); 1362 return (ENOTACTIVE); 1363 } 1364 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1365 mutex_exit(&ac_net->ac_lock); 1366 1367 switch (what) { 1368 case EX_NET_LNDESC_REC: 1369 record_type = EXD_GROUP_NET_LINK_DESC; 1370 break; 1371 case EX_NET_LNSTAT_REC: 1372 record_type = EXD_GROUP_NET_LINK_STATS; 1373 break; 1374 case EX_NET_FLDESC_REC: 1375 record_type = EXD_GROUP_NET_FLOW_DESC; 1376 break; 1377 case EX_NET_FLSTAT_REC: 1378 record_type = EXD_GROUP_NET_FLOW_STATS; 1379 break; 1380 default: 1381 return (0); 1382 } 1383 1384 net_desc = exacct_assemble_net_record(ninfo, mask, record_type, what); 1385 if (net_desc == NULL) 1386 return (0); 1387 1388 /* 1389 * Pack object into buffer and pass to callback. 1390 */ 1391 bufsize = ea_pack_object(net_desc, NULL, 0); 1392 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1393 if (buf == NULL) 1394 return (ENOMEM); 1395 1396 (void) ea_pack_object(net_desc, buf, bufsize); 1397 1398 ret = callback(ac_net, ubuf, ubufsize, buf, bufsize, actual); 1399 1400 /* 1401 * Free all previously allocations. 1402 */ 1403 kmem_free(buf, bufsize); 1404 ea_free_object(net_desc, EUP_ALLOC); 1405 return (ret); 1406 } 1407 1408 int 1409 exacct_commit_netinfo(void *arg, int what) 1410 { 1411 size_t size; 1412 ulong_t mask[AC_MASK_SZ]; 1413 struct exacct_globals *acg; 1414 ac_info_t *ac_net; 1415 1416 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1417 /* 1418 * acctctl module not loaded. Nothing to do. 1419 */ 1420 return (ENOTACTIVE); 1421 } 1422 1423 /* 1424 * Even though each zone nominally has its own flow accounting settings 1425 * (ac_flow), these are only maintained by and for the global zone. 1426 * 1427 * If this were to change in the future, this function should grow a 1428 * second zoneid (or zone) argument, and use the corresponding zone's 1429 * settings rather than always using those of the global zone. 1430 */ 1431 acg = zone_getspecific(exacct_zone_key, global_zone); 1432 ac_net = &acg->ac_net; 1433 1434 mutex_enter(&ac_net->ac_lock); 1435 if (ac_net->ac_state == AC_OFF) { 1436 mutex_exit(&ac_net->ac_lock); 1437 return (ENOTACTIVE); 1438 } 1439 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1440 mutex_exit(&ac_net->ac_lock); 1441 1442 return (exacct_assemble_net_usage(ac_net, arg, exacct_commit_callback, 1443 NULL, 0, &size, what)); 1444 } 1445 1446 static int 1447 exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res) 1448 { 1449 int attached = 1; 1450 1451 switch (res) { 1452 case AC_FLOW_SADDR: 1453 if (fu->fu_isv4) { 1454 (void) ea_attach_item(record, &fu->fu_saddr[3], 1455 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR); 1456 } else { 1457 (void) ea_attach_item(record, &fu->fu_saddr, 1458 sizeof (fu->fu_saddr), EXT_RAW | 1459 EXD_FLOW_V6SADDR); 1460 } 1461 break; 1462 case AC_FLOW_DADDR: 1463 if (fu->fu_isv4) { 1464 (void) ea_attach_item(record, &fu->fu_daddr[3], 1465 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR); 1466 } else { 1467 (void) ea_attach_item(record, &fu->fu_daddr, 1468 sizeof (fu->fu_daddr), EXT_RAW | 1469 EXD_FLOW_V6DADDR); 1470 } 1471 break; 1472 case AC_FLOW_SPORT: 1473 (void) ea_attach_item(record, &fu->fu_sport, 1474 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT); 1475 break; 1476 case AC_FLOW_DPORT: 1477 (void) ea_attach_item(record, &fu->fu_dport, 1478 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT); 1479 break; 1480 case AC_FLOW_PROTOCOL: 1481 (void) ea_attach_item(record, &fu->fu_protocol, 1482 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL); 1483 break; 1484 case AC_FLOW_DSFIELD: 1485 (void) ea_attach_item(record, &fu->fu_dsfield, 1486 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD); 1487 break; 1488 case AC_FLOW_CTIME: 1489 (void) ea_attach_item(record, &fu->fu_ctime, 1490 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME); 1491 break; 1492 case AC_FLOW_LSEEN: 1493 (void) ea_attach_item(record, &fu->fu_lseen, 1494 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN); 1495 break; 1496 case AC_FLOW_NBYTES: 1497 (void) ea_attach_item(record, &fu->fu_nbytes, 1498 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES); 1499 break; 1500 case AC_FLOW_NPKTS: 1501 (void) ea_attach_item(record, &fu->fu_npackets, 1502 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS); 1503 break; 1504 case AC_FLOW_PROJID: 1505 if (fu->fu_projid >= 0) { 1506 (void) ea_attach_item(record, &fu->fu_projid, 1507 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID); 1508 } 1509 break; 1510 case AC_FLOW_UID: 1511 (void) ea_attach_item(record, &fu->fu_userid, 1512 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID); 1513 break; 1514 case AC_FLOW_ANAME: 1515 (void) ea_attach_item(record, fu->fu_aname, 1516 strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME); 1517 break; 1518 default: 1519 attached = 0; 1520 } 1521 return (attached); 1522 } 1523 1524 static ea_object_t * 1525 exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask, 1526 ea_catalog_t record_type) 1527 { 1528 int res, count; 1529 ea_object_t *record; 1530 1531 /* 1532 * Assemble usage values into group. 1533 */ 1534 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1535 for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++) 1536 if (BT_TEST(mask, res)) 1537 count += exacct_attach_flow_item(fu, record, res); 1538 if (count == 0) { 1539 ea_free_object(record, EUP_ALLOC); 1540 record = NULL; 1541 } 1542 return (record); 1543 } 1544 1545 int 1546 exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu, 1547 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1548 void *ubuf, size_t ubufsize, size_t *actual) 1549 { 1550 ulong_t mask[AC_MASK_SZ]; 1551 ea_object_t *flow_usage; 1552 ea_catalog_t record_type; 1553 void *buf; 1554 size_t bufsize; 1555 int ret; 1556 1557 mutex_enter(&ac_flow->ac_lock); 1558 if (ac_flow->ac_state == AC_OFF) { 1559 mutex_exit(&ac_flow->ac_lock); 1560 return (ENOTACTIVE); 1561 } 1562 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1563 mutex_exit(&ac_flow->ac_lock); 1564 1565 record_type = EXD_GROUP_FLOW; 1566 1567 flow_usage = exacct_assemble_flow_record(fu, mask, record_type); 1568 if (flow_usage == NULL) { 1569 return (0); 1570 } 1571 1572 /* 1573 * Pack object into buffer and pass to callback. 1574 */ 1575 bufsize = ea_pack_object(flow_usage, NULL, 0); 1576 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1577 if (buf == NULL) { 1578 return (ENOMEM); 1579 } 1580 1581 (void) ea_pack_object(flow_usage, buf, bufsize); 1582 1583 ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual); 1584 1585 /* 1586 * Free all previously allocations. 1587 */ 1588 kmem_free(buf, bufsize); 1589 ea_free_object(flow_usage, EUP_ALLOC); 1590 return (ret); 1591 } 1592 1593 void 1594 exacct_commit_flow(void *arg) 1595 { 1596 flow_usage_t *f = (flow_usage_t *)arg; 1597 size_t size; 1598 ulong_t mask[AC_MASK_SZ]; 1599 struct exacct_globals *acg; 1600 ac_info_t *ac_flow; 1601 1602 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1603 /* 1604 * acctctl module not loaded. Nothing to do. 1605 */ 1606 return; 1607 } 1608 1609 /* 1610 * Even though each zone nominally has its own flow accounting settings 1611 * (ac_flow), these are only maintained by and for the global zone. 1612 * 1613 * If this were to change in the future, this function should grow a 1614 * second zoneid (or zone) argument, and use the corresponding zone's 1615 * settings rather than always using those of the global zone. 1616 */ 1617 acg = zone_getspecific(exacct_zone_key, global_zone); 1618 ac_flow = &acg->ac_flow; 1619 1620 mutex_enter(&ac_flow->ac_lock); 1621 if (ac_flow->ac_state == AC_OFF) { 1622 mutex_exit(&ac_flow->ac_lock); 1623 return; 1624 } 1625 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1626 mutex_exit(&ac_flow->ac_lock); 1627 1628 (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback, 1629 NULL, 0, &size); 1630 } 1631 1632 /* 1633 * int exacct_tag_task(task_t *, void *, size_t, int) 1634 * 1635 * Overview 1636 * exacct_tag_task() provides the exacct record construction and writing 1637 * support required by putacct(2) for task entities. 1638 * 1639 * Return values 1640 * The result of the write operation is returned, unless the extended 1641 * accounting facility is not active, in which case ENOTACTIVE is returned. 1642 * 1643 * Caller's context 1644 * Suitable for KM_SLEEP allocations. 1645 */ 1646 int 1647 exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz, 1648 int flags) 1649 { 1650 int error = 0; 1651 void *buf; 1652 size_t bufsize; 1653 ea_catalog_t cat; 1654 ea_object_t *tag; 1655 1656 mutex_enter(&ac_task->ac_lock); 1657 if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) { 1658 mutex_exit(&ac_task->ac_lock); 1659 return (ENOTACTIVE); 1660 } 1661 mutex_exit(&ac_task->ac_lock); 1662 1663 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG); 1664 (void) ea_attach_item(tag, &tk->tk_tkid, 0, 1665 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1666 (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0, 1667 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1668 if (flags == EP_RAW) 1669 cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG; 1670 else 1671 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG; 1672 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1673 1674 bufsize = ea_pack_object(tag, NULL, 0); 1675 buf = kmem_alloc(bufsize, KM_SLEEP); 1676 (void) ea_pack_object(tag, buf, bufsize); 1677 error = exacct_vn_write(ac_task, buf, bufsize); 1678 kmem_free(buf, bufsize); 1679 ea_free_object(tag, EUP_ALLOC); 1680 return (error); 1681 } 1682 1683 /* 1684 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *) 1685 * 1686 * Overview 1687 * exacct_tag_proc() provides the exacct record construction and writing 1688 * support required by putacct(2) for processes. 1689 * 1690 * Return values 1691 * The result of the write operation is returned, unless the extended 1692 * accounting facility is not active, in which case ENOTACTIVE is returned. 1693 * 1694 * Caller's context 1695 * Suitable for KM_SLEEP allocations. 1696 */ 1697 int 1698 exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf, 1699 size_t ubufsz, int flags, const char *hostname) 1700 { 1701 int error = 0; 1702 void *buf; 1703 size_t bufsize; 1704 ea_catalog_t cat; 1705 ea_object_t *tag; 1706 1707 mutex_enter(&ac_proc->ac_lock); 1708 if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) { 1709 mutex_exit(&ac_proc->ac_lock); 1710 return (ENOTACTIVE); 1711 } 1712 mutex_exit(&ac_proc->ac_lock); 1713 1714 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG); 1715 (void) ea_attach_item(tag, &pid, sizeof (uint32_t), 1716 EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID); 1717 (void) ea_attach_item(tag, &tkid, 0, 1718 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1719 (void) ea_attach_item(tag, (void *)hostname, 0, 1720 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1721 if (flags == EP_RAW) 1722 cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG; 1723 else 1724 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG; 1725 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1726 1727 bufsize = ea_pack_object(tag, NULL, 0); 1728 buf = kmem_alloc(bufsize, KM_SLEEP); 1729 (void) ea_pack_object(tag, buf, bufsize); 1730 error = exacct_vn_write(ac_proc, buf, bufsize); 1731 kmem_free(buf, bufsize); 1732 ea_free_object(tag, EUP_ALLOC); 1733 return (error); 1734 } 1735 1736 /* 1737 * void exacct_init(void) 1738 * 1739 * Overview 1740 * Initialized the extended accounting subsystem. 1741 * 1742 * Return values 1743 * None. 1744 * 1745 * Caller's context 1746 * Suitable for KM_SLEEP allocations. 1747 */ 1748 void 1749 exacct_init() 1750 { 1751 exacct_queue = system_taskq; 1752 exacct_object_cache = kmem_cache_create("exacct_object_cache", 1753 sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1754 task_commit_thread_init(); 1755 } 1756 1757 /* 1758 * exacct_snapshot_proc_mstate() copies a process's microstate accounting data 1759 * and resource usage counters into a given task_usage_t. It differs from 1760 * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t, 1761 * b) p_lock will have been acquired earlier in the call path and c) we 1762 * are here including the process's user and system times. 1763 */ 1764 static void 1765 exacct_snapshot_proc_mstate(proc_t *p, task_usage_t *tu) 1766 { 1767 tu->tu_utime = mstate_aggr_state(p, LMS_USER); 1768 tu->tu_stime = mstate_aggr_state(p, LMS_SYSTEM); 1769 tu->tu_minflt = p->p_ru.minflt; 1770 tu->tu_majflt = p->p_ru.majflt; 1771 tu->tu_sndmsg = p->p_ru.msgsnd; 1772 tu->tu_rcvmsg = p->p_ru.msgrcv; 1773 tu->tu_ioch = p->p_ru.ioch; 1774 tu->tu_iblk = p->p_ru.inblock; 1775 tu->tu_oblk = p->p_ru.oublock; 1776 tu->tu_vcsw = p->p_ru.nvcsw; 1777 tu->tu_icsw = p->p_ru.nivcsw; 1778 tu->tu_nsig = p->p_ru.nsignals; 1779 tu->tu_nswp = p->p_ru.nswap; 1780 tu->tu_nscl = p->p_ru.sysc; 1781 } 1782 1783 /* 1784 * void exacct_move_mstate(proc_t *, task_t *, task_t *) 1785 * 1786 * Overview 1787 * exacct_move_mstate() is called by task_change() and accounts for 1788 * a process's resource usage when it is moved from one task to another. 1789 * 1790 * The process's usage at this point is recorded in the new task so 1791 * that it can be excluded from the calculation of resources consumed 1792 * by that task. 1793 * 1794 * The resource usage inherited by the new task is also added to the 1795 * aggregate maintained by the old task for processes that have exited. 1796 * 1797 * Return values 1798 * None. 1799 * 1800 * Caller's context 1801 * pidlock and p_lock held across exacct_move_mstate(). 1802 */ 1803 void 1804 exacct_move_mstate(proc_t *p, task_t *oldtk, task_t *newtk) 1805 { 1806 task_usage_t tu; 1807 1808 /* Take a snapshot of this process's mstate and RU counters */ 1809 exacct_snapshot_proc_mstate(p, &tu); 1810 1811 /* 1812 * Use the snapshot to increment the aggregate usage of the old 1813 * task, and the inherited usage of the new one. 1814 */ 1815 mutex_enter(&oldtk->tk_usage_lock); 1816 exacct_add_task_mstate(oldtk->tk_usage, &tu); 1817 mutex_exit(&oldtk->tk_usage_lock); 1818 mutex_enter(&newtk->tk_usage_lock); 1819 exacct_add_task_mstate(newtk->tk_inherited, &tu); 1820 mutex_exit(&newtk->tk_usage_lock); 1821 } 1822