1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/exacct.h> 26 #include <sys/exacct_catalog.h> 27 #include <sys/disp.h> 28 #include <sys/task.h> 29 #include <sys/proc.h> 30 #include <sys/cmn_err.h> 31 #include <sys/kmem.h> 32 #include <sys/project.h> 33 #include <sys/systm.h> 34 #include <sys/vnode.h> 35 #include <sys/file.h> 36 #include <sys/acctctl.h> 37 #include <sys/time.h> 38 #include <sys/utsname.h> 39 #include <sys/session.h> 40 #include <sys/sysmacros.h> 41 #include <sys/bitmap.h> 42 #include <sys/msacct.h> 43 44 /* 45 * exacct usage and recording routines 46 * 47 * wracct(2), getacct(2), and the records written at process or task 48 * termination are constructed using the exacct_assemble_[task,proc]_usage() 49 * functions, which take a callback that takes the appropriate action on 50 * the packed exacct record for the task or process. For the process-related 51 * actions, we partition the routines such that the data collecting component 52 * can be performed while holding p_lock, and all sleeping or blocking 53 * operations can be performed without acquiring p_lock. 54 * 55 * putacct(2), which allows an application to construct a customized record 56 * associated with an existing process or task, has its own entry points: 57 * exacct_tag_task() and exacct_tag_proc(). 58 */ 59 60 taskq_t *exacct_queue; 61 kmem_cache_t *exacct_object_cache; 62 63 zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED; 64 65 static const uint32_t exacct_version = EXACCT_VERSION; 66 static const char exacct_header[] = "exacct"; 67 static const char exacct_creator[] = "SunOS"; 68 69 ea_object_t * 70 ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz) 71 { 72 ea_object_t *item; 73 74 item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 75 bzero(item, sizeof (ea_object_t)); 76 (void) ea_set_item(item, catalog, buf, bufsz); 77 return (item); 78 } 79 80 ea_object_t * 81 ea_alloc_group(ea_catalog_t catalog) 82 { 83 ea_object_t *group; 84 85 group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 86 bzero(group, sizeof (ea_object_t)); 87 (void) ea_set_group(group, catalog); 88 return (group); 89 } 90 91 ea_object_t * 92 ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog) 93 { 94 ea_object_t *item; 95 96 item = ea_alloc_item(catalog, buf, bufsz); 97 (void) ea_attach_to_group(grp, item); 98 return (item); 99 } 100 101 /* 102 * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract 103 * microstate accounting data and resource usage counters from one task_usage_t 104 * from those supplied in another. These functions do not operate on *all* 105 * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make 106 * sense. 107 */ 108 static void 109 exacct_add_task_mstate(task_usage_t *tu, task_usage_t *delta) 110 { 111 tu->tu_utime += delta->tu_utime; 112 tu->tu_stime += delta->tu_stime; 113 tu->tu_minflt += delta->tu_minflt; 114 tu->tu_majflt += delta->tu_majflt; 115 tu->tu_sndmsg += delta->tu_sndmsg; 116 tu->tu_rcvmsg += delta->tu_rcvmsg; 117 tu->tu_ioch += delta->tu_ioch; 118 tu->tu_iblk += delta->tu_iblk; 119 tu->tu_oblk += delta->tu_oblk; 120 tu->tu_vcsw += delta->tu_vcsw; 121 tu->tu_icsw += delta->tu_icsw; 122 tu->tu_nsig += delta->tu_nsig; 123 tu->tu_nswp += delta->tu_nswp; 124 tu->tu_nscl += delta->tu_nscl; 125 } 126 127 /* 128 * See the comments for exacct_add_task_mstate(), above. 129 */ 130 static void 131 exacct_sub_task_mstate(task_usage_t *tu, task_usage_t *delta) 132 { 133 tu->tu_utime -= delta->tu_utime; 134 tu->tu_stime -= delta->tu_stime; 135 tu->tu_minflt -= delta->tu_minflt; 136 tu->tu_majflt -= delta->tu_majflt; 137 tu->tu_sndmsg -= delta->tu_sndmsg; 138 tu->tu_rcvmsg -= delta->tu_rcvmsg; 139 tu->tu_ioch -= delta->tu_ioch; 140 tu->tu_iblk -= delta->tu_iblk; 141 tu->tu_oblk -= delta->tu_oblk; 142 tu->tu_vcsw -= delta->tu_vcsw; 143 tu->tu_icsw -= delta->tu_icsw; 144 tu->tu_nsig -= delta->tu_nsig; 145 tu->tu_nswp -= delta->tu_nswp; 146 tu->tu_nscl -= delta->tu_nscl; 147 } 148 149 /* 150 * Wrapper for vn_rdwr() used by exacct_vn_write() and exacct_write_header() 151 * to write to the accounting file without corrupting it in case of an I/O or 152 * filesystem error. 153 */ 154 static int 155 exacct_vn_write_impl(ac_info_t *info, void *buf, ssize_t bufsize) 156 { 157 int error; 158 ssize_t resid; 159 struct vattr va; 160 161 ASSERT(info != NULL); 162 ASSERT(info->ac_vnode != NULL); 163 ASSERT(MUTEX_HELD(&info->ac_lock)); 164 165 /* 166 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting 167 * the present accounting file. 168 */ 169 va.va_mask = AT_SIZE; 170 error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred, NULL); 171 if (error == 0) { 172 error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf, 173 bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T, 174 kcred, &resid); 175 if (error) { 176 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 177 } else if (resid != 0) { 178 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 179 error = ENOSPC; 180 } 181 } 182 return (error); 183 } 184 185 /* 186 * exacct_vn_write() safely writes to an accounting file. acctctl() prevents 187 * the two accounting vnodes from being equal, and the appropriate ac_lock is 188 * held across the call, so we're single threaded through this code for each 189 * file. 190 */ 191 static int 192 exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize) 193 { 194 int error; 195 196 if (info == NULL) 197 return (0); 198 199 mutex_enter(&info->ac_lock); 200 201 /* 202 * Don't do anything unless accounting file is set. 203 */ 204 if (info->ac_vnode == NULL) { 205 mutex_exit(&info->ac_lock); 206 return (0); 207 } 208 error = exacct_vn_write_impl(info, buf, bufsize); 209 mutex_exit(&info->ac_lock); 210 211 return (error); 212 } 213 214 /* 215 * void *exacct_create_header(size_t *) 216 * 217 * Overview 218 * exacct_create_header() constructs an exacct file header identifying the 219 * accounting file as the output of the kernel. exacct_create_header() and 220 * the static write_header() and verify_header() routines in libexacct must 221 * remain synchronized. 222 * 223 * Return values 224 * A pointer to a packed exacct buffer containing the appropriate header is 225 * returned; the size of the buffer is placed in the location indicated by 226 * sizep. 227 * 228 * Caller's context 229 * Suitable for KM_SLEEP allocations. 230 */ 231 void * 232 exacct_create_header(size_t *sizep) 233 { 234 ea_object_t *hdr_grp; 235 uint32_t bskip; 236 void *buf; 237 size_t bufsize; 238 239 hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER); 240 (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0, 241 EXT_UINT32 | EXC_DEFAULT | EXD_VERSION); 242 (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0, 243 EXT_STRING | EXC_DEFAULT | EXD_FILETYPE); 244 (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0, 245 EXT_STRING | EXC_DEFAULT | EXD_CREATOR); 246 (void) ea_attach_item(hdr_grp, uts_nodename(), 0, 247 EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME); 248 249 bufsize = ea_pack_object(hdr_grp, NULL, 0); 250 buf = kmem_alloc(bufsize, KM_SLEEP); 251 (void) ea_pack_object(hdr_grp, buf, bufsize); 252 ea_free_object(hdr_grp, EUP_ALLOC); 253 254 /* 255 * To prevent reading the header when reading the file backwards, 256 * set the large backskip of the header group to 0 (last 4 bytes). 257 */ 258 bskip = 0; 259 exacct_order32(&bskip); 260 bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip), 261 sizeof (bskip)); 262 263 *sizep = bufsize; 264 return (buf); 265 } 266 267 /* 268 * int exacct_write_header(ac_info_t *, void *, size_t) 269 * 270 * Overview 271 * exacct_write_header() writes the given header buffer to the indicated 272 * vnode. 273 * 274 * Return values 275 * The result of the write operation is returned. 276 * 277 * Caller's context 278 * Caller must hold the ac_lock of the appropriate accounting file 279 * information block (ac_info_t). 280 */ 281 int 282 exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize) 283 { 284 if (info != NULL && info->ac_vnode != NULL) 285 return (exacct_vn_write_impl(info, hdr, hdrsize)); 286 287 return (0); 288 } 289 290 static void 291 exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu, 292 task_usage_t **tu_buf) 293 { 294 task_usage_t *oldtu, *newtu; 295 task_usage_t **prevusage; 296 297 ASSERT(MUTEX_HELD(&tk->tk_usage_lock)); 298 if (getzoneid() != GLOBAL_ZONEID) { 299 prevusage = &tk->tk_zoneusage; 300 } else { 301 prevusage = &tk->tk_prevusage; 302 } 303 if ((oldtu = *prevusage) != NULL) { 304 /* 305 * In case we have any accounting information 306 * saved from the previous interval record. 307 */ 308 newtu = *tu_buf; 309 bcopy(tu, newtu, sizeof (task_usage_t)); 310 tu->tu_minflt -= oldtu->tu_minflt; 311 tu->tu_majflt -= oldtu->tu_majflt; 312 tu->tu_sndmsg -= oldtu->tu_sndmsg; 313 tu->tu_rcvmsg -= oldtu->tu_rcvmsg; 314 tu->tu_ioch -= oldtu->tu_ioch; 315 tu->tu_iblk -= oldtu->tu_iblk; 316 tu->tu_oblk -= oldtu->tu_oblk; 317 tu->tu_vcsw -= oldtu->tu_vcsw; 318 tu->tu_icsw -= oldtu->tu_icsw; 319 tu->tu_nsig -= oldtu->tu_nsig; 320 tu->tu_nswp -= oldtu->tu_nswp; 321 tu->tu_nscl -= oldtu->tu_nscl; 322 tu->tu_utime -= oldtu->tu_utime; 323 tu->tu_stime -= oldtu->tu_stime; 324 325 tu->tu_startsec = oldtu->tu_finishsec; 326 tu->tu_startnsec = oldtu->tu_finishnsec; 327 /* 328 * Copy the data from our temporary storage to the task's 329 * previous interval usage structure for future reference. 330 */ 331 bcopy(newtu, oldtu, sizeof (task_usage_t)); 332 } else { 333 /* 334 * Store current statistics in the task's previous interval 335 * usage structure for future references. 336 */ 337 *prevusage = *tu_buf; 338 bcopy(tu, *prevusage, sizeof (task_usage_t)); 339 *tu_buf = NULL; 340 } 341 } 342 343 static void 344 exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu) 345 { 346 timestruc_t ts; 347 proc_t *p; 348 349 ASSERT(MUTEX_HELD(&pidlock)); 350 351 if ((p = tk->tk_memb_list) == NULL) 352 return; 353 354 /* 355 * exacct_snapshot_task_usage() provides an approximate snapshot of the 356 * usage of the potentially many members of the task. Since we don't 357 * guarantee exactness, we don't acquire the p_lock of any of the member 358 * processes. 359 */ 360 do { 361 mutex_enter(&p->p_lock); 362 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 363 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 364 mutex_exit(&p->p_lock); 365 tu->tu_minflt += p->p_ru.minflt; 366 tu->tu_majflt += p->p_ru.majflt; 367 tu->tu_sndmsg += p->p_ru.msgsnd; 368 tu->tu_rcvmsg += p->p_ru.msgrcv; 369 tu->tu_ioch += p->p_ru.ioch; 370 tu->tu_iblk += p->p_ru.inblock; 371 tu->tu_oblk += p->p_ru.oublock; 372 tu->tu_vcsw += p->p_ru.nvcsw; 373 tu->tu_icsw += p->p_ru.nivcsw; 374 tu->tu_nsig += p->p_ru.nsignals; 375 tu->tu_nswp += p->p_ru.nswap; 376 tu->tu_nscl += p->p_ru.sysc; 377 } while ((p = p->p_tasknext) != tk->tk_memb_list); 378 379 /* 380 * The resource usage accounted for so far will include that 381 * contributed by the task's first process. If this process 382 * came from another task, then its accumulated resource usage 383 * will include a contribution from work performed there. 384 * We must therefore subtract any resource usage that was 385 * inherited with the first process. 386 */ 387 exacct_sub_task_mstate(tu, tk->tk_inherited); 388 389 gethrestime(&ts); 390 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 391 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 392 } 393 394 /* 395 * void exacct_update_task_mstate(proc_t *) 396 * 397 * Overview 398 * exacct_update_task_mstate() updates the task usage; it is intended 399 * to be called from proc_exit(). 400 * 401 * Return values 402 * None. 403 * 404 * Caller's context 405 * p_lock must be held at entry. 406 */ 407 void 408 exacct_update_task_mstate(proc_t *p) 409 { 410 task_usage_t *tu; 411 412 mutex_enter(&p->p_task->tk_usage_lock); 413 tu = p->p_task->tk_usage; 414 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 415 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 416 tu->tu_minflt += p->p_ru.minflt; 417 tu->tu_majflt += p->p_ru.majflt; 418 tu->tu_sndmsg += p->p_ru.msgsnd; 419 tu->tu_rcvmsg += p->p_ru.msgrcv; 420 tu->tu_ioch += p->p_ru.ioch; 421 tu->tu_iblk += p->p_ru.inblock; 422 tu->tu_oblk += p->p_ru.oublock; 423 tu->tu_vcsw += p->p_ru.nvcsw; 424 tu->tu_icsw += p->p_ru.nivcsw; 425 tu->tu_nsig += p->p_ru.nsignals; 426 tu->tu_nswp += p->p_ru.nswap; 427 tu->tu_nscl += p->p_ru.sysc; 428 mutex_exit(&p->p_task->tk_usage_lock); 429 } 430 431 static void 432 exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag) 433 { 434 timestruc_t ts; 435 task_usage_t *tu_buf; 436 437 switch (flag) { 438 case EW_PARTIAL: 439 /* 440 * For partial records we must report the sum of current 441 * accounting statistics with previously accumulated 442 * statistics. 443 */ 444 mutex_enter(&pidlock); 445 mutex_enter(&tk->tk_usage_lock); 446 447 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 448 exacct_snapshot_task_usage(tk, tu); 449 450 mutex_exit(&tk->tk_usage_lock); 451 mutex_exit(&pidlock); 452 break; 453 case EW_INTERVAL: 454 /* 455 * We need to allocate spare task_usage_t buffer before 456 * grabbing pidlock because we might need it later in 457 * exacct_get_interval_task_usage(). 458 */ 459 tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 460 mutex_enter(&pidlock); 461 mutex_enter(&tk->tk_usage_lock); 462 463 /* 464 * For interval records, we deduct the previous microstate 465 * accounting data and cpu usage times from previously saved 466 * results and update the previous task usage structure. 467 */ 468 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 469 exacct_snapshot_task_usage(tk, tu); 470 exacct_get_interval_task_usage(tk, tu, &tu_buf); 471 472 mutex_exit(&tk->tk_usage_lock); 473 mutex_exit(&pidlock); 474 475 if (tu_buf != NULL) 476 kmem_free(tu_buf, sizeof (task_usage_t)); 477 break; 478 case EW_FINAL: 479 /* 480 * For final records, we deduct, from the task's current 481 * usage, any usage that was inherited with the arrival 482 * of a process from a previous task. We then record 483 * the task's finish time. 484 */ 485 mutex_enter(&tk->tk_usage_lock); 486 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 487 exacct_sub_task_mstate(tu, tk->tk_inherited); 488 mutex_exit(&tk->tk_usage_lock); 489 490 gethrestime(&ts); 491 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 492 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 493 494 break; 495 } 496 } 497 498 static int 499 exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record, 500 int res) 501 { 502 int attached = 1; 503 504 switch (res) { 505 case AC_TASK_TASKID: 506 (void) ea_attach_item(record, &tk->tk_tkid, 507 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID); 508 break; 509 case AC_TASK_PROJID: 510 (void) ea_attach_item(record, &tk->tk_proj->kpj_id, 511 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID); 512 break; 513 case AC_TASK_CPU: { 514 timestruc_t ts; 515 uint64_t ui; 516 517 hrt2ts(tu->tu_stime, &ts); 518 ui = ts.tv_sec; 519 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 520 EXT_UINT64 | EXD_TASK_CPU_SYS_SEC); 521 ui = ts.tv_nsec; 522 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 523 EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC); 524 525 hrt2ts(tu->tu_utime, &ts); 526 ui = ts.tv_sec; 527 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 528 EXT_UINT64 | EXD_TASK_CPU_USER_SEC); 529 ui = ts.tv_nsec; 530 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 531 EXT_UINT64 | EXD_TASK_CPU_USER_NSEC); 532 } 533 break; 534 case AC_TASK_TIME: 535 (void) ea_attach_item(record, &tu->tu_startsec, 536 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC); 537 (void) ea_attach_item(record, &tu->tu_startnsec, 538 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC); 539 (void) ea_attach_item(record, &tu->tu_finishsec, 540 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC); 541 (void) ea_attach_item(record, &tu->tu_finishnsec, 542 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC); 543 break; 544 case AC_TASK_HOSTNAME: 545 (void) ea_attach_item(record, tk->tk_zone->zone_nodename, 546 strlen(tk->tk_zone->zone_nodename) + 1, 547 EXT_STRING | EXD_TASK_HOSTNAME); 548 break; 549 case AC_TASK_MICROSTATE: 550 (void) ea_attach_item(record, &tu->tu_majflt, 551 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR); 552 (void) ea_attach_item(record, &tu->tu_minflt, 553 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR); 554 (void) ea_attach_item(record, &tu->tu_sndmsg, 555 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND); 556 (void) ea_attach_item(record, &tu->tu_rcvmsg, 557 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV); 558 (void) ea_attach_item(record, &tu->tu_iblk, 559 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN); 560 (void) ea_attach_item(record, &tu->tu_oblk, 561 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT); 562 (void) ea_attach_item(record, &tu->tu_ioch, 563 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR); 564 (void) ea_attach_item(record, &tu->tu_vcsw, 565 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL); 566 (void) ea_attach_item(record, &tu->tu_icsw, 567 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV); 568 (void) ea_attach_item(record, &tu->tu_nsig, 569 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS); 570 (void) ea_attach_item(record, &tu->tu_nswp, 571 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS); 572 (void) ea_attach_item(record, &tu->tu_nscl, 573 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS); 574 break; 575 case AC_TASK_ANCTASKID: 576 (void) ea_attach_item(record, &tu->tu_anctaskid, 577 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID); 578 break; 579 case AC_TASK_ZONENAME: 580 (void) ea_attach_item(record, tk->tk_zone->zone_name, 581 strlen(tk->tk_zone->zone_name) + 1, 582 EXT_STRING | EXD_TASK_ZONENAME); 583 break; 584 default: 585 attached = 0; 586 } 587 return (attached); 588 } 589 590 static ea_object_t * 591 exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask, 592 ea_catalog_t record_type) 593 { 594 int res, count; 595 ea_object_t *record; 596 597 /* 598 * Assemble usage values into group. 599 */ 600 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 601 for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++) 602 if (BT_TEST(mask, res)) 603 count += exacct_attach_task_item(tk, tu, record, res); 604 if (count == 0) { 605 ea_free_object(record, EUP_ALLOC); 606 record = NULL; 607 } 608 return (record); 609 } 610 611 /* 612 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *, 613 * size_t, size_t *), void *, size_t, size_t *, int) 614 * 615 * Overview 616 * exacct_assemble_task_usage() builds the packed exacct buffer for the 617 * indicated task, executes the given callback function, and free the packed 618 * buffer. 619 * 620 * Return values 621 * Returns 0 on success; otherwise the appropriate error code is returned. 622 * 623 * Caller's context 624 * Suitable for KM_SLEEP allocations. 625 */ 626 int 627 exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk, 628 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 629 void *ubuf, size_t ubufsize, size_t *actual, int flag) 630 { 631 ulong_t mask[AC_MASK_SZ]; 632 ea_object_t *task_record; 633 ea_catalog_t record_type; 634 task_usage_t *tu; 635 void *buf; 636 size_t bufsize; 637 int ret; 638 639 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL); 640 641 mutex_enter(&ac_task->ac_lock); 642 if (ac_task->ac_state == AC_OFF) { 643 mutex_exit(&ac_task->ac_lock); 644 return (ENOTACTIVE); 645 } 646 bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ); 647 mutex_exit(&ac_task->ac_lock); 648 649 switch (flag) { 650 case EW_FINAL: 651 record_type = EXD_GROUP_TASK; 652 break; 653 case EW_PARTIAL: 654 record_type = EXD_GROUP_TASK_PARTIAL; 655 break; 656 case EW_INTERVAL: 657 record_type = EXD_GROUP_TASK_INTERVAL; 658 break; 659 } 660 661 /* 662 * Calculate task usage and assemble it into the task record. 663 */ 664 tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 665 exacct_calculate_task_usage(tk, tu, flag); 666 task_record = exacct_assemble_task_record(tk, tu, mask, record_type); 667 if (task_record == NULL) { 668 /* 669 * The current configuration of the accounting system has 670 * resulted in records with no data; accordingly, we don't write 671 * these, but we return success. 672 */ 673 kmem_free(tu, sizeof (task_usage_t)); 674 return (0); 675 } 676 677 /* 678 * Pack object into buffer and run callback on it. 679 */ 680 bufsize = ea_pack_object(task_record, NULL, 0); 681 buf = kmem_alloc(bufsize, KM_SLEEP); 682 (void) ea_pack_object(task_record, buf, bufsize); 683 ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual); 684 685 /* 686 * Free all previously allocated structures. 687 */ 688 kmem_free(buf, bufsize); 689 ea_free_object(task_record, EUP_ALLOC); 690 kmem_free(tu, sizeof (task_usage_t)); 691 return (ret); 692 } 693 694 /* 695 * void exacct_commit_task(void *) 696 * 697 * Overview 698 * exacct_commit_task() calculates the final usage for a task, updating the 699 * task usage if task accounting is active, and writing a task record if task 700 * accounting is active. exacct_commit_task() is intended for being called 701 * from a task queue (taskq_t). 702 * 703 * Return values 704 * None. 705 * 706 * Caller's context 707 * Suitable for KM_SLEEP allocations. 708 */ 709 710 void 711 exacct_commit_task(void *arg) 712 { 713 task_t *tk = (task_t *)arg; 714 size_t size; 715 zone_t *zone = tk->tk_zone; 716 struct exacct_globals *acg; 717 718 ASSERT(tk != task0p); 719 ASSERT(tk->tk_memb_list == NULL); 720 721 /* 722 * Don't do any extra work if the acctctl module isn't loaded. 723 * If acctctl module is loaded when zone is in down state then 724 * zone_getspecific can return NULL for that zone. 725 */ 726 if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) { 727 acg = zone_getspecific(exacct_zone_key, zone); 728 if (acg == NULL) 729 goto err; 730 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 731 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 732 if (tk->tk_zone != global_zone) { 733 acg = zone_getspecific(exacct_zone_key, global_zone); 734 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 735 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 736 } 737 } 738 /* 739 * Release associated project and finalize task. 740 */ 741 err: 742 task_end(tk); 743 } 744 745 static int 746 exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res) 747 { 748 int attached = 1; 749 750 switch (res) { 751 case AC_PROC_PID: 752 (void) ea_attach_item(record, &pu->pu_pid, 753 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID); 754 break; 755 case AC_PROC_UID: 756 (void) ea_attach_item(record, &pu->pu_ruid, 757 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID); 758 break; 759 case AC_PROC_FLAG: 760 (void) ea_attach_item(record, &pu->pu_acflag, 761 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS); 762 break; 763 case AC_PROC_GID: 764 (void) ea_attach_item(record, &pu->pu_rgid, 765 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID); 766 break; 767 case AC_PROC_PROJID: 768 (void) ea_attach_item(record, &pu->pu_projid, 769 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID); 770 break; 771 case AC_PROC_TASKID: 772 (void) ea_attach_item(record, &pu->pu_taskid, 773 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID); 774 break; 775 case AC_PROC_CPU: 776 (void) ea_attach_item(record, &pu->pu_utimesec, 777 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC); 778 (void) ea_attach_item(record, &pu->pu_utimensec, 779 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC); 780 (void) ea_attach_item(record, &pu->pu_stimesec, 781 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC); 782 (void) ea_attach_item(record, &pu->pu_stimensec, 783 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC); 784 break; 785 case AC_PROC_TIME: 786 (void) ea_attach_item(record, &pu->pu_startsec, 787 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC); 788 (void) ea_attach_item(record, &pu->pu_startnsec, 789 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC); 790 (void) ea_attach_item(record, &pu->pu_finishsec, 791 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC); 792 (void) ea_attach_item(record, &pu->pu_finishnsec, 793 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC); 794 break; 795 case AC_PROC_COMMAND: 796 (void) ea_attach_item(record, pu->pu_command, 797 strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND); 798 break; 799 case AC_PROC_HOSTNAME: 800 (void) ea_attach_item(record, pu->pu_nodename, 801 strlen(pu->pu_nodename) + 1, 802 EXT_STRING | EXD_PROC_HOSTNAME); 803 break; 804 case AC_PROC_TTY: 805 (void) ea_attach_item(record, &pu->pu_major, 806 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR); 807 (void) ea_attach_item(record, &pu->pu_minor, 808 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR); 809 break; 810 case AC_PROC_MICROSTATE: 811 (void) ea_attach_item(record, &pu->pu_majflt, 812 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR); 813 (void) ea_attach_item(record, &pu->pu_minflt, 814 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR); 815 (void) ea_attach_item(record, &pu->pu_sndmsg, 816 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND); 817 (void) ea_attach_item(record, &pu->pu_rcvmsg, 818 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV); 819 (void) ea_attach_item(record, &pu->pu_iblk, 820 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN); 821 (void) ea_attach_item(record, &pu->pu_oblk, 822 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT); 823 (void) ea_attach_item(record, &pu->pu_ioch, 824 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR); 825 (void) ea_attach_item(record, &pu->pu_vcsw, 826 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL); 827 (void) ea_attach_item(record, &pu->pu_icsw, 828 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV); 829 (void) ea_attach_item(record, &pu->pu_nsig, 830 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS); 831 (void) ea_attach_item(record, &pu->pu_nswp, 832 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS); 833 (void) ea_attach_item(record, &pu->pu_nscl, 834 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS); 835 break; 836 case AC_PROC_ANCPID: 837 (void) ea_attach_item(record, &pu->pu_ancpid, 838 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID); 839 break; 840 case AC_PROC_WAIT_STATUS: 841 (void) ea_attach_item(record, &pu->pu_wstat, 842 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS); 843 break; 844 case AC_PROC_ZONENAME: 845 (void) ea_attach_item(record, pu->pu_zonename, 846 strlen(pu->pu_zonename) + 1, 847 EXT_STRING | EXD_PROC_ZONENAME); 848 break; 849 case AC_PROC_MEM: 850 (void) ea_attach_item(record, &pu->pu_mem_rss_avg, 851 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K); 852 (void) ea_attach_item(record, &pu->pu_mem_rss_max, 853 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K); 854 break; 855 default: 856 attached = 0; 857 } 858 return (attached); 859 } 860 861 static ea_object_t * 862 exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask, 863 ea_catalog_t record_type) 864 { 865 int res, count; 866 ea_object_t *record; 867 868 /* 869 * Assemble usage values into group. 870 */ 871 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 872 for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++) 873 if (BT_TEST(mask, res)) 874 count += exacct_attach_proc_item(pu, record, res); 875 if (count == 0) { 876 ea_free_object(record, EUP_ALLOC); 877 record = NULL; 878 } 879 return (record); 880 } 881 882 /* 883 * The following two routines assume that process's p_lock is held or 884 * exacct_commit_proc has been called from exit() when all lwps are stopped. 885 */ 886 static void 887 exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu) 888 { 889 kthread_t *t; 890 891 ASSERT(MUTEX_HELD(&p->p_lock)); 892 if ((t = p->p_tlist) == NULL) 893 return; 894 895 do { 896 pu->pu_minflt += t->t_lwp->lwp_ru.minflt; 897 pu->pu_majflt += t->t_lwp->lwp_ru.majflt; 898 pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd; 899 pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv; 900 pu->pu_ioch += t->t_lwp->lwp_ru.ioch; 901 pu->pu_iblk += t->t_lwp->lwp_ru.inblock; 902 pu->pu_oblk += t->t_lwp->lwp_ru.oublock; 903 pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw; 904 pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw; 905 pu->pu_nsig += t->t_lwp->lwp_ru.nsignals; 906 pu->pu_nswp += t->t_lwp->lwp_ru.nswap; 907 pu->pu_nscl += t->t_lwp->lwp_ru.sysc; 908 } while ((t = t->t_forw) != p->p_tlist); 909 } 910 911 static void 912 exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu) 913 { 914 pu->pu_minflt = p->p_ru.minflt; 915 pu->pu_majflt = p->p_ru.majflt; 916 pu->pu_sndmsg = p->p_ru.msgsnd; 917 pu->pu_rcvmsg = p->p_ru.msgrcv; 918 pu->pu_ioch = p->p_ru.ioch; 919 pu->pu_iblk = p->p_ru.inblock; 920 pu->pu_oblk = p->p_ru.oublock; 921 pu->pu_vcsw = p->p_ru.nvcsw; 922 pu->pu_icsw = p->p_ru.nivcsw; 923 pu->pu_nsig = p->p_ru.nsignals; 924 pu->pu_nswp = p->p_ru.nswap; 925 pu->pu_nscl = p->p_ru.sysc; 926 } 927 928 void 929 exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask, 930 int flag, int wstat) 931 { 932 timestruc_t ts, ts_run; 933 934 ASSERT(MUTEX_HELD(&p->p_lock)); 935 936 /* 937 * Convert CPU and execution times to sec/nsec format. 938 */ 939 if (BT_TEST(mask, AC_PROC_CPU)) { 940 hrt2ts(mstate_aggr_state(p, LMS_USER), &ts); 941 pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec; 942 pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec; 943 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts); 944 pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec; 945 pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec; 946 } 947 if (BT_TEST(mask, AC_PROC_TIME)) { 948 gethrestime(&ts); 949 pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 950 pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 951 hrt2ts(gethrtime() - p->p_mstart, &ts_run); 952 ts.tv_sec -= ts_run.tv_sec; 953 ts.tv_nsec -= ts_run.tv_nsec; 954 if (ts.tv_nsec < 0) { 955 ts.tv_sec--; 956 if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) { 957 ts.tv_sec++; 958 ts.tv_nsec -= NANOSEC; 959 } 960 } 961 pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec; 962 pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec; 963 } 964 965 pu->pu_pid = p->p_pidp->pid_id; 966 pu->pu_acflag = p->p_user.u_acflag; 967 pu->pu_projid = p->p_task->tk_proj->kpj_id; 968 pu->pu_taskid = p->p_task->tk_tkid; 969 pu->pu_major = getmajor(p->p_sessp->s_dev); 970 pu->pu_minor = getminor(p->p_sessp->s_dev); 971 pu->pu_ancpid = p->p_ancpid; 972 pu->pu_wstat = wstat; 973 /* 974 * Compute average RSS in K. The denominator is the number of 975 * samples: the number of clock ticks plus the initial value. 976 */ 977 pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) * 978 (PAGESIZE / 1024); 979 pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024); 980 981 mutex_enter(&p->p_crlock); 982 pu->pu_ruid = crgetruid(p->p_cred); 983 pu->pu_rgid = crgetrgid(p->p_cred); 984 mutex_exit(&p->p_crlock); 985 986 bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1); 987 bcopy(p->p_zone->zone_name, pu->pu_zonename, 988 strlen(p->p_zone->zone_name) + 1); 989 bcopy(p->p_zone->zone_nodename, pu->pu_nodename, 990 strlen(p->p_zone->zone_nodename) + 1); 991 992 /* 993 * Calculate microstate accounting data for a process that is still 994 * running. Presently, we explicitly collect all of the LWP usage into 995 * the proc usage structure here. 996 */ 997 if (flag & EW_PARTIAL) 998 exacct_calculate_proc_mstate(p, pu); 999 if (flag & EW_FINAL) 1000 exacct_copy_proc_mstate(p, pu); 1001 } 1002 1003 /* 1004 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void 1005 * *, size_t, size_t *), void *, size_t, size_t *) 1006 * 1007 * Overview 1008 * Assemble record with miscellaneous accounting information about the process 1009 * and execute the callback on it. It is the callback's job to set "actual" to 1010 * the size of record. 1011 * 1012 * Return values 1013 * The result of the callback function, unless the extended process accounting 1014 * feature is not active, in which case ENOTACTIVE is returned. 1015 * 1016 * Caller's context 1017 * Suitable for KM_SLEEP allocations. 1018 */ 1019 int 1020 exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu, 1021 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1022 void *ubuf, size_t ubufsize, size_t *actual, int flag) 1023 { 1024 ulong_t mask[AC_MASK_SZ]; 1025 ea_object_t *proc_record; 1026 ea_catalog_t record_type; 1027 void *buf; 1028 size_t bufsize; 1029 int ret; 1030 1031 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL); 1032 1033 mutex_enter(&ac_proc->ac_lock); 1034 if (ac_proc->ac_state == AC_OFF) { 1035 mutex_exit(&ac_proc->ac_lock); 1036 return (ENOTACTIVE); 1037 } 1038 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1039 mutex_exit(&ac_proc->ac_lock); 1040 1041 switch (flag) { 1042 case EW_FINAL: 1043 record_type = EXD_GROUP_PROC; 1044 break; 1045 case EW_PARTIAL: 1046 record_type = EXD_GROUP_PROC_PARTIAL; 1047 break; 1048 } 1049 1050 proc_record = exacct_assemble_proc_record(pu, mask, record_type); 1051 if (proc_record == NULL) 1052 return (0); 1053 1054 /* 1055 * Pack object into buffer and pass to callback. 1056 */ 1057 bufsize = ea_pack_object(proc_record, NULL, 0); 1058 buf = kmem_alloc(bufsize, KM_SLEEP); 1059 (void) ea_pack_object(proc_record, buf, bufsize); 1060 1061 ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual); 1062 1063 /* 1064 * Free all previously allocations. 1065 */ 1066 kmem_free(buf, bufsize); 1067 ea_free_object(proc_record, EUP_ALLOC); 1068 return (ret); 1069 } 1070 1071 /* 1072 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t, 1073 * size_t *) 1074 * 1075 * Overview 1076 * exacct_commit_callback() writes the indicated buffer to the indicated 1077 * extended accounting file. 1078 * 1079 * Return values 1080 * The result of the write operation is returned. "actual" is updated to 1081 * contain the number of bytes actually written. 1082 * 1083 * Caller's context 1084 * Suitable for a vn_rdwr() operation. 1085 */ 1086 /*ARGSUSED*/ 1087 int 1088 exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize, 1089 void *buf, size_t bufsize, size_t *actual) 1090 { 1091 int error = 0; 1092 1093 *actual = 0; 1094 if ((error = exacct_vn_write(info, buf, bufsize)) == 0) 1095 *actual = bufsize; 1096 return (error); 1097 } 1098 1099 static void 1100 exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat) 1101 { 1102 size_t size; 1103 proc_usage_t *pu; 1104 ulong_t mask[AC_MASK_SZ]; 1105 1106 mutex_enter(&ac_proc->ac_lock); 1107 if (ac_proc->ac_state == AC_ON) { 1108 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1109 mutex_exit(&ac_proc->ac_lock); 1110 } else { 1111 mutex_exit(&ac_proc->ac_lock); 1112 return; 1113 } 1114 1115 mutex_enter(&p->p_lock); 1116 size = strlen(p->p_user.u_comm) + 1; 1117 mutex_exit(&p->p_lock); 1118 1119 pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP); 1120 pu->pu_command = kmem_alloc(size, KM_SLEEP); 1121 mutex_enter(&p->p_lock); 1122 exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat); 1123 mutex_exit(&p->p_lock); 1124 1125 (void) exacct_assemble_proc_usage(ac_proc, pu, 1126 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 1127 1128 kmem_free(pu->pu_command, strlen(pu->pu_command) + 1); 1129 kmem_free(pu, sizeof (proc_usage_t)); 1130 } 1131 1132 /* 1133 * void exacct_commit_proc(proc_t *, int) 1134 * 1135 * Overview 1136 * exacct_commit_proc() calculates the final usage for a process, updating the 1137 * task usage if task accounting is active, and writing a process record if 1138 * process accounting is active. exacct_commit_proc() is intended for being 1139 * called from proc_exit(). 1140 * 1141 * Return values 1142 * None. 1143 * 1144 * Caller's context 1145 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry. 1146 */ 1147 void 1148 exacct_commit_proc(proc_t *p, int wstat) 1149 { 1150 zone_t *zone = p->p_zone; 1151 struct exacct_globals *acg, *gacg = NULL; 1152 1153 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1154 /* 1155 * acctctl module not loaded. Nothing to do. 1156 */ 1157 return; 1158 } 1159 1160 /* 1161 * If acctctl module is loaded when zone is in down state then 1162 * zone_getspecific can return NULL for that zone. 1163 */ 1164 acg = zone_getspecific(exacct_zone_key, zone); 1165 if (acg == NULL) 1166 return; 1167 exacct_do_commit_proc(&acg->ac_proc, p, wstat); 1168 if (zone != global_zone) { 1169 gacg = zone_getspecific(exacct_zone_key, global_zone); 1170 exacct_do_commit_proc(&gacg->ac_proc, p, wstat); 1171 } 1172 } 1173 1174 static int 1175 exacct_attach_netstat_item(net_stat_t *ns, ea_object_t *record, int res) 1176 { 1177 int attached = 1; 1178 1179 switch (res) { 1180 case AC_NET_NAME: 1181 (void) ea_attach_item(record, ns->ns_name, 1182 strlen(ns->ns_name) + 1, EXT_STRING | EXD_NET_STATS_NAME); 1183 break; 1184 case AC_NET_CURTIME: 1185 { 1186 uint64_t now; 1187 timestruc_t ts; 1188 1189 gethrestime(&ts); 1190 now = (uint64_t)(ulong_t)ts.tv_sec; 1191 (void) ea_attach_item(record, &now, sizeof (uint64_t), 1192 EXT_UINT64 | EXD_NET_STATS_CURTIME); 1193 } 1194 break; 1195 case AC_NET_IBYTES: 1196 (void) ea_attach_item(record, &ns->ns_ibytes, 1197 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IBYTES); 1198 break; 1199 case AC_NET_OBYTES: 1200 (void) ea_attach_item(record, &ns->ns_obytes, 1201 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OBYTES); 1202 break; 1203 case AC_NET_IPKTS: 1204 (void) ea_attach_item(record, &ns->ns_ipackets, 1205 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IPKTS); 1206 break; 1207 case AC_NET_OPKTS: 1208 (void) ea_attach_item(record, &ns->ns_opackets, 1209 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OPKTS); 1210 break; 1211 case AC_NET_IERRPKTS: 1212 (void) ea_attach_item(record, &ns->ns_ierrors, 1213 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IERRPKTS); 1214 break; 1215 case AC_NET_OERRPKTS: 1216 (void) ea_attach_item(record, &ns->ns_oerrors, 1217 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OERRPKTS); 1218 break; 1219 default: 1220 attached = 0; 1221 } 1222 return (attached); 1223 } 1224 1225 static int 1226 exacct_attach_netdesc_item(net_desc_t *nd, ea_object_t *record, int res) 1227 { 1228 int attached = 1; 1229 1230 switch (res) { 1231 case AC_NET_NAME: 1232 (void) ea_attach_item(record, nd->nd_name, 1233 strlen(nd->nd_name) + 1, EXT_STRING | EXD_NET_DESC_NAME); 1234 break; 1235 case AC_NET_DEVNAME: 1236 (void) ea_attach_item(record, nd->nd_devname, 1237 strlen(nd->nd_devname) + 1, EXT_STRING | 1238 EXD_NET_DESC_DEVNAME); 1239 break; 1240 case AC_NET_EHOST: 1241 (void) ea_attach_item(record, &nd->nd_ehost, 1242 sizeof (nd->nd_ehost), EXT_RAW | EXD_NET_DESC_EHOST); 1243 break; 1244 case AC_NET_EDEST: 1245 (void) ea_attach_item(record, &nd->nd_edest, 1246 sizeof (nd->nd_edest), EXT_RAW | EXD_NET_DESC_EDEST); 1247 break; 1248 case AC_NET_VLAN_TPID: 1249 (void) ea_attach_item(record, &nd->nd_vlan_tpid, 1250 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TPID); 1251 break; 1252 case AC_NET_VLAN_TCI: 1253 (void) ea_attach_item(record, &nd->nd_vlan_tci, 1254 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TCI); 1255 break; 1256 case AC_NET_SAP: 1257 (void) ea_attach_item(record, &nd->nd_sap, 1258 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_SAP); 1259 break; 1260 case AC_NET_PRIORITY: 1261 (void) ea_attach_item(record, &nd->nd_priority, 1262 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_PRIORITY); 1263 break; 1264 case AC_NET_BWLIMIT: 1265 (void) ea_attach_item(record, &nd->nd_bw_limit, 1266 sizeof (uint64_t), EXT_UINT64 | EXD_NET_DESC_BWLIMIT); 1267 break; 1268 case AC_NET_SADDR: 1269 if (nd->nd_isv4) { 1270 (void) ea_attach_item(record, &nd->nd_saddr[3], 1271 sizeof (uint32_t), EXT_UINT32 | 1272 EXD_NET_DESC_V4SADDR); 1273 } else { 1274 (void) ea_attach_item(record, &nd->nd_saddr, 1275 sizeof (nd->nd_saddr), EXT_RAW | 1276 EXD_NET_DESC_V6SADDR); 1277 } 1278 break; 1279 case AC_NET_DADDR: 1280 if (nd->nd_isv4) { 1281 (void) ea_attach_item(record, &nd->nd_daddr[3], 1282 sizeof (uint32_t), EXT_UINT32 | 1283 EXD_NET_DESC_V4DADDR); 1284 } else { 1285 (void) ea_attach_item(record, &nd->nd_daddr, 1286 sizeof (nd->nd_daddr), EXT_RAW | 1287 EXD_NET_DESC_V6DADDR); 1288 } 1289 break; 1290 case AC_NET_SPORT: 1291 (void) ea_attach_item(record, &nd->nd_sport, 1292 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_SPORT); 1293 break; 1294 case AC_NET_DPORT: 1295 (void) ea_attach_item(record, &nd->nd_dport, 1296 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_DPORT); 1297 break; 1298 case AC_NET_PROTOCOL: 1299 (void) ea_attach_item(record, &nd->nd_protocol, 1300 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_PROTOCOL); 1301 break; 1302 case AC_NET_DSFIELD: 1303 (void) ea_attach_item(record, &nd->nd_dsfield, 1304 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_DSFIELD); 1305 break; 1306 default: 1307 attached = 0; 1308 } 1309 return (attached); 1310 } 1311 1312 static ea_object_t * 1313 exacct_assemble_net_record(void *ninfo, ulong_t *mask, ea_catalog_t record_type, 1314 int what) 1315 { 1316 int res; 1317 int count; 1318 ea_object_t *record; 1319 1320 /* 1321 * Assemble usage values into group. 1322 */ 1323 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1324 for (res = 1, count = 0; res <= AC_NET_MAX_RES; res++) 1325 if (BT_TEST(mask, res)) { 1326 if (what == EX_NET_LNDESC_REC || 1327 what == EX_NET_FLDESC_REC) { 1328 count += exacct_attach_netdesc_item( 1329 (net_desc_t *)ninfo, record, res); 1330 } else { 1331 count += exacct_attach_netstat_item( 1332 (net_stat_t *)ninfo, record, res); 1333 } 1334 } 1335 if (count == 0) { 1336 ea_free_object(record, EUP_ALLOC); 1337 record = NULL; 1338 } 1339 return (record); 1340 } 1341 1342 int 1343 exacct_assemble_net_usage(ac_info_t *ac_net, void *ninfo, 1344 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1345 void *ubuf, size_t ubufsize, size_t *actual, int what) 1346 { 1347 ulong_t mask[AC_MASK_SZ]; 1348 ea_object_t *net_desc; 1349 ea_catalog_t record_type; 1350 void *buf; 1351 size_t bufsize; 1352 int ret; 1353 1354 mutex_enter(&ac_net->ac_lock); 1355 if (ac_net->ac_state == AC_OFF) { 1356 mutex_exit(&ac_net->ac_lock); 1357 return (ENOTACTIVE); 1358 } 1359 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1360 mutex_exit(&ac_net->ac_lock); 1361 1362 switch (what) { 1363 case EX_NET_LNDESC_REC: 1364 record_type = EXD_GROUP_NET_LINK_DESC; 1365 break; 1366 case EX_NET_LNSTAT_REC: 1367 record_type = EXD_GROUP_NET_LINK_STATS; 1368 break; 1369 case EX_NET_FLDESC_REC: 1370 record_type = EXD_GROUP_NET_FLOW_DESC; 1371 break; 1372 case EX_NET_FLSTAT_REC: 1373 record_type = EXD_GROUP_NET_FLOW_STATS; 1374 break; 1375 } 1376 1377 net_desc = exacct_assemble_net_record(ninfo, mask, record_type, what); 1378 if (net_desc == NULL) 1379 return (0); 1380 1381 /* 1382 * Pack object into buffer and pass to callback. 1383 */ 1384 bufsize = ea_pack_object(net_desc, NULL, 0); 1385 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1386 if (buf == NULL) 1387 return (ENOMEM); 1388 1389 (void) ea_pack_object(net_desc, buf, bufsize); 1390 1391 ret = callback(ac_net, ubuf, ubufsize, buf, bufsize, actual); 1392 1393 /* 1394 * Free all previously allocations. 1395 */ 1396 kmem_free(buf, bufsize); 1397 ea_free_object(net_desc, EUP_ALLOC); 1398 return (ret); 1399 } 1400 1401 int 1402 exacct_commit_netinfo(void *arg, int what) 1403 { 1404 size_t size; 1405 ulong_t mask[AC_MASK_SZ]; 1406 struct exacct_globals *acg; 1407 ac_info_t *ac_net; 1408 1409 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1410 /* 1411 * acctctl module not loaded. Nothing to do. 1412 */ 1413 return (ENOTACTIVE); 1414 } 1415 1416 /* 1417 * Even though each zone nominally has its own flow accounting settings 1418 * (ac_flow), these are only maintained by and for the global zone. 1419 * 1420 * If this were to change in the future, this function should grow a 1421 * second zoneid (or zone) argument, and use the corresponding zone's 1422 * settings rather than always using those of the global zone. 1423 */ 1424 acg = zone_getspecific(exacct_zone_key, global_zone); 1425 ac_net = &acg->ac_net; 1426 1427 mutex_enter(&ac_net->ac_lock); 1428 if (ac_net->ac_state == AC_OFF) { 1429 mutex_exit(&ac_net->ac_lock); 1430 return (ENOTACTIVE); 1431 } 1432 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1433 mutex_exit(&ac_net->ac_lock); 1434 1435 return (exacct_assemble_net_usage(ac_net, arg, exacct_commit_callback, 1436 NULL, 0, &size, what)); 1437 } 1438 1439 static int 1440 exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res) 1441 { 1442 int attached = 1; 1443 1444 switch (res) { 1445 case AC_FLOW_SADDR: 1446 if (fu->fu_isv4) { 1447 (void) ea_attach_item(record, &fu->fu_saddr[3], 1448 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR); 1449 } else { 1450 (void) ea_attach_item(record, &fu->fu_saddr, 1451 sizeof (fu->fu_saddr), EXT_RAW | 1452 EXD_FLOW_V6SADDR); 1453 } 1454 break; 1455 case AC_FLOW_DADDR: 1456 if (fu->fu_isv4) { 1457 (void) ea_attach_item(record, &fu->fu_daddr[3], 1458 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR); 1459 } else { 1460 (void) ea_attach_item(record, &fu->fu_daddr, 1461 sizeof (fu->fu_daddr), EXT_RAW | 1462 EXD_FLOW_V6DADDR); 1463 } 1464 break; 1465 case AC_FLOW_SPORT: 1466 (void) ea_attach_item(record, &fu->fu_sport, 1467 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT); 1468 break; 1469 case AC_FLOW_DPORT: 1470 (void) ea_attach_item(record, &fu->fu_dport, 1471 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT); 1472 break; 1473 case AC_FLOW_PROTOCOL: 1474 (void) ea_attach_item(record, &fu->fu_protocol, 1475 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL); 1476 break; 1477 case AC_FLOW_DSFIELD: 1478 (void) ea_attach_item(record, &fu->fu_dsfield, 1479 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD); 1480 break; 1481 case AC_FLOW_CTIME: 1482 (void) ea_attach_item(record, &fu->fu_ctime, 1483 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME); 1484 break; 1485 case AC_FLOW_LSEEN: 1486 (void) ea_attach_item(record, &fu->fu_lseen, 1487 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN); 1488 break; 1489 case AC_FLOW_NBYTES: 1490 (void) ea_attach_item(record, &fu->fu_nbytes, 1491 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES); 1492 break; 1493 case AC_FLOW_NPKTS: 1494 (void) ea_attach_item(record, &fu->fu_npackets, 1495 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS); 1496 break; 1497 case AC_FLOW_PROJID: 1498 if (fu->fu_projid >= 0) { 1499 (void) ea_attach_item(record, &fu->fu_projid, 1500 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID); 1501 } 1502 break; 1503 case AC_FLOW_UID: 1504 if (fu->fu_userid >= 0) { 1505 (void) ea_attach_item(record, &fu->fu_userid, 1506 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID); 1507 } 1508 break; 1509 case AC_FLOW_ANAME: 1510 (void) ea_attach_item(record, fu->fu_aname, 1511 strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME); 1512 break; 1513 default: 1514 attached = 0; 1515 } 1516 return (attached); 1517 } 1518 1519 static ea_object_t * 1520 exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask, 1521 ea_catalog_t record_type) 1522 { 1523 int res, count; 1524 ea_object_t *record; 1525 1526 /* 1527 * Assemble usage values into group. 1528 */ 1529 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1530 for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++) 1531 if (BT_TEST(mask, res)) 1532 count += exacct_attach_flow_item(fu, record, res); 1533 if (count == 0) { 1534 ea_free_object(record, EUP_ALLOC); 1535 record = NULL; 1536 } 1537 return (record); 1538 } 1539 1540 int 1541 exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu, 1542 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1543 void *ubuf, size_t ubufsize, size_t *actual) 1544 { 1545 ulong_t mask[AC_MASK_SZ]; 1546 ea_object_t *flow_usage; 1547 ea_catalog_t record_type; 1548 void *buf; 1549 size_t bufsize; 1550 int ret; 1551 1552 mutex_enter(&ac_flow->ac_lock); 1553 if (ac_flow->ac_state == AC_OFF) { 1554 mutex_exit(&ac_flow->ac_lock); 1555 return (ENOTACTIVE); 1556 } 1557 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1558 mutex_exit(&ac_flow->ac_lock); 1559 1560 record_type = EXD_GROUP_FLOW; 1561 1562 flow_usage = exacct_assemble_flow_record(fu, mask, record_type); 1563 if (flow_usage == NULL) { 1564 return (0); 1565 } 1566 1567 /* 1568 * Pack object into buffer and pass to callback. 1569 */ 1570 bufsize = ea_pack_object(flow_usage, NULL, 0); 1571 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1572 if (buf == NULL) { 1573 return (ENOMEM); 1574 } 1575 1576 (void) ea_pack_object(flow_usage, buf, bufsize); 1577 1578 ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual); 1579 1580 /* 1581 * Free all previously allocations. 1582 */ 1583 kmem_free(buf, bufsize); 1584 ea_free_object(flow_usage, EUP_ALLOC); 1585 return (ret); 1586 } 1587 1588 void 1589 exacct_commit_flow(void *arg) 1590 { 1591 flow_usage_t *f = (flow_usage_t *)arg; 1592 size_t size; 1593 ulong_t mask[AC_MASK_SZ]; 1594 struct exacct_globals *acg; 1595 ac_info_t *ac_flow; 1596 1597 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1598 /* 1599 * acctctl module not loaded. Nothing to do. 1600 */ 1601 return; 1602 } 1603 1604 /* 1605 * Even though each zone nominally has its own flow accounting settings 1606 * (ac_flow), these are only maintained by and for the global zone. 1607 * 1608 * If this were to change in the future, this function should grow a 1609 * second zoneid (or zone) argument, and use the corresponding zone's 1610 * settings rather than always using those of the global zone. 1611 */ 1612 acg = zone_getspecific(exacct_zone_key, global_zone); 1613 ac_flow = &acg->ac_flow; 1614 1615 mutex_enter(&ac_flow->ac_lock); 1616 if (ac_flow->ac_state == AC_OFF) { 1617 mutex_exit(&ac_flow->ac_lock); 1618 return; 1619 } 1620 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1621 mutex_exit(&ac_flow->ac_lock); 1622 1623 (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback, 1624 NULL, 0, &size); 1625 } 1626 1627 /* 1628 * int exacct_tag_task(task_t *, void *, size_t, int) 1629 * 1630 * Overview 1631 * exacct_tag_task() provides the exacct record construction and writing 1632 * support required by putacct(2) for task entities. 1633 * 1634 * Return values 1635 * The result of the write operation is returned, unless the extended 1636 * accounting facility is not active, in which case ENOTACTIVE is returned. 1637 * 1638 * Caller's context 1639 * Suitable for KM_SLEEP allocations. 1640 */ 1641 int 1642 exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz, 1643 int flags) 1644 { 1645 int error = 0; 1646 void *buf; 1647 size_t bufsize; 1648 ea_catalog_t cat; 1649 ea_object_t *tag; 1650 1651 mutex_enter(&ac_task->ac_lock); 1652 if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) { 1653 mutex_exit(&ac_task->ac_lock); 1654 return (ENOTACTIVE); 1655 } 1656 mutex_exit(&ac_task->ac_lock); 1657 1658 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG); 1659 (void) ea_attach_item(tag, &tk->tk_tkid, 0, 1660 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1661 (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0, 1662 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1663 if (flags == EP_RAW) 1664 cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG; 1665 else 1666 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG; 1667 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1668 1669 bufsize = ea_pack_object(tag, NULL, 0); 1670 buf = kmem_alloc(bufsize, KM_SLEEP); 1671 (void) ea_pack_object(tag, buf, bufsize); 1672 error = exacct_vn_write(ac_task, buf, bufsize); 1673 kmem_free(buf, bufsize); 1674 ea_free_object(tag, EUP_ALLOC); 1675 return (error); 1676 } 1677 1678 /* 1679 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *) 1680 * 1681 * Overview 1682 * exacct_tag_proc() provides the exacct record construction and writing 1683 * support required by putacct(2) for processes. 1684 * 1685 * Return values 1686 * The result of the write operation is returned, unless the extended 1687 * accounting facility is not active, in which case ENOTACTIVE is returned. 1688 * 1689 * Caller's context 1690 * Suitable for KM_SLEEP allocations. 1691 */ 1692 int 1693 exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf, 1694 size_t ubufsz, int flags, const char *hostname) 1695 { 1696 int error = 0; 1697 void *buf; 1698 size_t bufsize; 1699 ea_catalog_t cat; 1700 ea_object_t *tag; 1701 1702 mutex_enter(&ac_proc->ac_lock); 1703 if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) { 1704 mutex_exit(&ac_proc->ac_lock); 1705 return (ENOTACTIVE); 1706 } 1707 mutex_exit(&ac_proc->ac_lock); 1708 1709 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG); 1710 (void) ea_attach_item(tag, &pid, sizeof (uint32_t), 1711 EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID); 1712 (void) ea_attach_item(tag, &tkid, 0, 1713 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1714 (void) ea_attach_item(tag, (void *)hostname, 0, 1715 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1716 if (flags == EP_RAW) 1717 cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG; 1718 else 1719 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG; 1720 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1721 1722 bufsize = ea_pack_object(tag, NULL, 0); 1723 buf = kmem_alloc(bufsize, KM_SLEEP); 1724 (void) ea_pack_object(tag, buf, bufsize); 1725 error = exacct_vn_write(ac_proc, buf, bufsize); 1726 kmem_free(buf, bufsize); 1727 ea_free_object(tag, EUP_ALLOC); 1728 return (error); 1729 } 1730 1731 /* 1732 * void exacct_init(void) 1733 * 1734 * Overview 1735 * Initialized the extended accounting subsystem. 1736 * 1737 * Return values 1738 * None. 1739 * 1740 * Caller's context 1741 * Suitable for KM_SLEEP allocations. 1742 */ 1743 void 1744 exacct_init() 1745 { 1746 exacct_queue = system_taskq; 1747 exacct_object_cache = kmem_cache_create("exacct_object_cache", 1748 sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1749 task_commit_thread_init(); 1750 } 1751 1752 /* 1753 * exacct_snapshot_proc_mstate() copies a process's microstate accounting data 1754 * and resource usage counters into a given task_usage_t. It differs from 1755 * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t, 1756 * b) p_lock will have been acquired earlier in the call path and c) we 1757 * are here including the process's user and system times. 1758 */ 1759 static void 1760 exacct_snapshot_proc_mstate(proc_t *p, task_usage_t *tu) 1761 { 1762 tu->tu_utime = mstate_aggr_state(p, LMS_USER); 1763 tu->tu_stime = mstate_aggr_state(p, LMS_SYSTEM); 1764 tu->tu_minflt = p->p_ru.minflt; 1765 tu->tu_majflt = p->p_ru.majflt; 1766 tu->tu_sndmsg = p->p_ru.msgsnd; 1767 tu->tu_rcvmsg = p->p_ru.msgrcv; 1768 tu->tu_ioch = p->p_ru.ioch; 1769 tu->tu_iblk = p->p_ru.inblock; 1770 tu->tu_oblk = p->p_ru.oublock; 1771 tu->tu_vcsw = p->p_ru.nvcsw; 1772 tu->tu_icsw = p->p_ru.nivcsw; 1773 tu->tu_nsig = p->p_ru.nsignals; 1774 tu->tu_nswp = p->p_ru.nswap; 1775 tu->tu_nscl = p->p_ru.sysc; 1776 } 1777 1778 /* 1779 * void exacct_move_mstate(proc_t *, task_t *, task_t *) 1780 * 1781 * Overview 1782 * exacct_move_mstate() is called by task_change() and accounts for 1783 * a process's resource usage when it is moved from one task to another. 1784 * 1785 * The process's usage at this point is recorded in the new task so 1786 * that it can be excluded from the calculation of resources consumed 1787 * by that task. 1788 * 1789 * The resource usage inherited by the new task is also added to the 1790 * aggregate maintained by the old task for processes that have exited. 1791 * 1792 * Return values 1793 * None. 1794 * 1795 * Caller's context 1796 * pidlock and p_lock held across exacct_move_mstate(). 1797 */ 1798 void 1799 exacct_move_mstate(proc_t *p, task_t *oldtk, task_t *newtk) 1800 { 1801 task_usage_t tu; 1802 1803 /* Take a snapshot of this process's mstate and RU counters */ 1804 exacct_snapshot_proc_mstate(p, &tu); 1805 1806 /* 1807 * Use the snapshot to increment the aggregate usage of the old 1808 * task, and the inherited usage of the new one. 1809 */ 1810 mutex_enter(&oldtk->tk_usage_lock); 1811 exacct_add_task_mstate(oldtk->tk_usage, &tu); 1812 mutex_exit(&oldtk->tk_usage_lock); 1813 mutex_enter(&newtk->tk_usage_lock); 1814 exacct_add_task_mstate(newtk->tk_inherited, &tu); 1815 mutex_exit(&newtk->tk_usage_lock); 1816 } 1817