1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/exacct.h> 26 #include <sys/exacct_catalog.h> 27 #include <sys/disp.h> 28 #include <sys/task.h> 29 #include <sys/proc.h> 30 #include <sys/cmn_err.h> 31 #include <sys/kmem.h> 32 #include <sys/project.h> 33 #include <sys/systm.h> 34 #include <sys/vnode.h> 35 #include <sys/file.h> 36 #include <sys/acctctl.h> 37 #include <sys/time.h> 38 #include <sys/utsname.h> 39 #include <sys/session.h> 40 #include <sys/sysmacros.h> 41 #include <sys/bitmap.h> 42 #include <sys/msacct.h> 43 44 /* 45 * exacct usage and recording routines 46 * 47 * wracct(2), getacct(2), and the records written at process or task 48 * termination are constructed using the exacct_assemble_[task,proc]_usage() 49 * functions, which take a callback that takes the appropriate action on 50 * the packed exacct record for the task or process. For the process-related 51 * actions, we partition the routines such that the data collecting component 52 * can be performed while holding p_lock, and all sleeping or blocking 53 * operations can be performed without acquiring p_lock. 54 * 55 * putacct(2), which allows an application to construct a customized record 56 * associated with an existing process or task, has its own entry points: 57 * exacct_tag_task() and exacct_tag_proc(). 58 */ 59 60 taskq_t *exacct_queue; 61 kmem_cache_t *exacct_object_cache; 62 63 zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED; 64 65 static const uint32_t exacct_version = EXACCT_VERSION; 66 static const char exacct_header[] = "exacct"; 67 static const char exacct_creator[] = "SunOS"; 68 69 ea_object_t * 70 ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz) 71 { 72 ea_object_t *item; 73 74 item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 75 bzero(item, sizeof (ea_object_t)); 76 (void) ea_set_item(item, catalog, buf, bufsz); 77 return (item); 78 } 79 80 ea_object_t * 81 ea_alloc_group(ea_catalog_t catalog) 82 { 83 ea_object_t *group; 84 85 group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 86 bzero(group, sizeof (ea_object_t)); 87 (void) ea_set_group(group, catalog); 88 return (group); 89 } 90 91 ea_object_t * 92 ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog) 93 { 94 ea_object_t *item; 95 96 item = ea_alloc_item(catalog, buf, bufsz); 97 (void) ea_attach_to_group(grp, item); 98 return (item); 99 } 100 101 /* 102 * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract 103 * microstate accounting data and resource usage counters from one task_usage_t 104 * from those supplied in another. These functions do not operate on *all* 105 * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make 106 * sense. 107 */ 108 static void 109 exacct_add_task_mstate(task_usage_t *tu, task_usage_t *delta) 110 { 111 tu->tu_utime += delta->tu_utime; 112 tu->tu_stime += delta->tu_stime; 113 tu->tu_minflt += delta->tu_minflt; 114 tu->tu_majflt += delta->tu_majflt; 115 tu->tu_sndmsg += delta->tu_sndmsg; 116 tu->tu_rcvmsg += delta->tu_rcvmsg; 117 tu->tu_ioch += delta->tu_ioch; 118 tu->tu_iblk += delta->tu_iblk; 119 tu->tu_oblk += delta->tu_oblk; 120 tu->tu_vcsw += delta->tu_vcsw; 121 tu->tu_icsw += delta->tu_icsw; 122 tu->tu_nsig += delta->tu_nsig; 123 tu->tu_nswp += delta->tu_nswp; 124 tu->tu_nscl += delta->tu_nscl; 125 } 126 127 /* 128 * See the comments for exacct_add_task_mstate(), above. 129 */ 130 static void 131 exacct_sub_task_mstate(task_usage_t *tu, task_usage_t *delta) 132 { 133 tu->tu_utime -= delta->tu_utime; 134 tu->tu_stime -= delta->tu_stime; 135 tu->tu_minflt -= delta->tu_minflt; 136 tu->tu_majflt -= delta->tu_majflt; 137 tu->tu_sndmsg -= delta->tu_sndmsg; 138 tu->tu_rcvmsg -= delta->tu_rcvmsg; 139 tu->tu_ioch -= delta->tu_ioch; 140 tu->tu_iblk -= delta->tu_iblk; 141 tu->tu_oblk -= delta->tu_oblk; 142 tu->tu_vcsw -= delta->tu_vcsw; 143 tu->tu_icsw -= delta->tu_icsw; 144 tu->tu_nsig -= delta->tu_nsig; 145 tu->tu_nswp -= delta->tu_nswp; 146 tu->tu_nscl -= delta->tu_nscl; 147 } 148 149 /* 150 * Wrapper for vn_rdwr() used by exacct_vn_write() and exacct_write_header() 151 * to write to the accounting file without corrupting it in case of an I/O or 152 * filesystem error. 153 */ 154 static int 155 exacct_vn_write_impl(ac_info_t *info, void *buf, ssize_t bufsize) 156 { 157 int error; 158 ssize_t resid; 159 struct vattr va; 160 161 ASSERT(info != NULL); 162 ASSERT(info->ac_vnode != NULL); 163 ASSERT(MUTEX_HELD(&info->ac_lock)); 164 165 /* 166 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting 167 * the present accounting file. 168 */ 169 va.va_mask = AT_SIZE; 170 error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred, NULL); 171 if (error == 0) { 172 error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf, 173 bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T, 174 kcred, &resid); 175 if (error) { 176 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 177 } else if (resid != 0) { 178 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 179 error = ENOSPC; 180 } 181 } 182 return (error); 183 } 184 185 /* 186 * exacct_vn_write() safely writes to an accounting file. acctctl() prevents 187 * the two accounting vnodes from being equal, and the appropriate ac_lock is 188 * held across the call, so we're single threaded through this code for each 189 * file. 190 */ 191 static int 192 exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize) 193 { 194 int error; 195 196 if (info == NULL) 197 return (0); 198 199 mutex_enter(&info->ac_lock); 200 201 /* 202 * Don't do anything unless accounting file is set. 203 */ 204 if (info->ac_vnode == NULL) { 205 mutex_exit(&info->ac_lock); 206 return (0); 207 } 208 error = exacct_vn_write_impl(info, buf, bufsize); 209 mutex_exit(&info->ac_lock); 210 211 return (error); 212 } 213 214 /* 215 * void *exacct_create_header(size_t *) 216 * 217 * Overview 218 * exacct_create_header() constructs an exacct file header identifying the 219 * accounting file as the output of the kernel. exacct_create_header() and 220 * the static write_header() and verify_header() routines in libexacct must 221 * remain synchronized. 222 * 223 * Return values 224 * A pointer to a packed exacct buffer containing the appropriate header is 225 * returned; the size of the buffer is placed in the location indicated by 226 * sizep. 227 * 228 * Caller's context 229 * Suitable for KM_SLEEP allocations. 230 */ 231 void * 232 exacct_create_header(size_t *sizep) 233 { 234 ea_object_t *hdr_grp; 235 uint32_t bskip; 236 void *buf; 237 size_t bufsize; 238 239 hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER); 240 (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0, 241 EXT_UINT32 | EXC_DEFAULT | EXD_VERSION); 242 (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0, 243 EXT_STRING | EXC_DEFAULT | EXD_FILETYPE); 244 (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0, 245 EXT_STRING | EXC_DEFAULT | EXD_CREATOR); 246 (void) ea_attach_item(hdr_grp, uts_nodename(), 0, 247 EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME); 248 249 bufsize = ea_pack_object(hdr_grp, NULL, 0); 250 buf = kmem_alloc(bufsize, KM_SLEEP); 251 (void) ea_pack_object(hdr_grp, buf, bufsize); 252 ea_free_object(hdr_grp, EUP_ALLOC); 253 254 /* 255 * To prevent reading the header when reading the file backwards, 256 * set the large backskip of the header group to 0 (last 4 bytes). 257 */ 258 bskip = 0; 259 exacct_order32(&bskip); 260 bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip), 261 sizeof (bskip)); 262 263 *sizep = bufsize; 264 return (buf); 265 } 266 267 /* 268 * int exacct_write_header(ac_info_t *, void *, size_t) 269 * 270 * Overview 271 * exacct_write_header() writes the given header buffer to the indicated 272 * vnode. 273 * 274 * Return values 275 * The result of the write operation is returned. 276 * 277 * Caller's context 278 * Caller must hold the ac_lock of the appropriate accounting file 279 * information block (ac_info_t). 280 */ 281 int 282 exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize) 283 { 284 if (info != NULL && info->ac_vnode != NULL) 285 return (exacct_vn_write_impl(info, hdr, hdrsize)); 286 287 return (0); 288 } 289 290 static void 291 exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu, 292 task_usage_t **tu_buf) 293 { 294 task_usage_t *oldtu, *newtu; 295 task_usage_t **prevusage; 296 297 ASSERT(MUTEX_HELD(&tk->tk_usage_lock)); 298 if (getzoneid() != GLOBAL_ZONEID) { 299 prevusage = &tk->tk_zoneusage; 300 } else { 301 prevusage = &tk->tk_prevusage; 302 } 303 if ((oldtu = *prevusage) != NULL) { 304 /* 305 * In case we have any accounting information 306 * saved from the previous interval record. 307 */ 308 newtu = *tu_buf; 309 bcopy(tu, newtu, sizeof (task_usage_t)); 310 tu->tu_minflt -= oldtu->tu_minflt; 311 tu->tu_majflt -= oldtu->tu_majflt; 312 tu->tu_sndmsg -= oldtu->tu_sndmsg; 313 tu->tu_rcvmsg -= oldtu->tu_rcvmsg; 314 tu->tu_ioch -= oldtu->tu_ioch; 315 tu->tu_iblk -= oldtu->tu_iblk; 316 tu->tu_oblk -= oldtu->tu_oblk; 317 tu->tu_vcsw -= oldtu->tu_vcsw; 318 tu->tu_icsw -= oldtu->tu_icsw; 319 tu->tu_nsig -= oldtu->tu_nsig; 320 tu->tu_nswp -= oldtu->tu_nswp; 321 tu->tu_nscl -= oldtu->tu_nscl; 322 tu->tu_utime -= oldtu->tu_utime; 323 tu->tu_stime -= oldtu->tu_stime; 324 325 tu->tu_startsec = oldtu->tu_finishsec; 326 tu->tu_startnsec = oldtu->tu_finishnsec; 327 /* 328 * Copy the data from our temporary storage to the task's 329 * previous interval usage structure for future reference. 330 */ 331 bcopy(newtu, oldtu, sizeof (task_usage_t)); 332 } else { 333 /* 334 * Store current statistics in the task's previous interval 335 * usage structure for future references. 336 */ 337 *prevusage = *tu_buf; 338 bcopy(tu, *prevusage, sizeof (task_usage_t)); 339 *tu_buf = NULL; 340 } 341 } 342 343 static void 344 exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu) 345 { 346 timestruc_t ts; 347 proc_t *p; 348 349 ASSERT(MUTEX_HELD(&pidlock)); 350 351 if ((p = tk->tk_memb_list) == NULL) 352 return; 353 354 /* 355 * exacct_snapshot_task_usage() provides an approximate snapshot of the 356 * usage of the potentially many members of the task. Since we don't 357 * guarantee exactness, we don't acquire the p_lock of any of the member 358 * processes. 359 */ 360 do { 361 mutex_enter(&p->p_lock); 362 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 363 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 364 mutex_exit(&p->p_lock); 365 tu->tu_minflt += p->p_ru.minflt; 366 tu->tu_majflt += p->p_ru.majflt; 367 tu->tu_sndmsg += p->p_ru.msgsnd; 368 tu->tu_rcvmsg += p->p_ru.msgrcv; 369 tu->tu_ioch += p->p_ru.ioch; 370 tu->tu_iblk += p->p_ru.inblock; 371 tu->tu_oblk += p->p_ru.oublock; 372 tu->tu_vcsw += p->p_ru.nvcsw; 373 tu->tu_icsw += p->p_ru.nivcsw; 374 tu->tu_nsig += p->p_ru.nsignals; 375 tu->tu_nswp += p->p_ru.nswap; 376 tu->tu_nscl += p->p_ru.sysc; 377 } while ((p = p->p_tasknext) != tk->tk_memb_list); 378 379 /* 380 * The resource usage accounted for so far will include that 381 * contributed by the task's first process. If this process 382 * came from another task, then its accumulated resource usage 383 * will include a contribution from work performed there. 384 * We must therefore subtract any resource usage that was 385 * inherited with the first process. 386 */ 387 exacct_sub_task_mstate(tu, tk->tk_inherited); 388 389 gethrestime(&ts); 390 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 391 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 392 } 393 394 /* 395 * void exacct_update_task_mstate(proc_t *) 396 * 397 * Overview 398 * exacct_update_task_mstate() updates the task usage; it is intended 399 * to be called from proc_exit(). 400 * 401 * Return values 402 * None. 403 * 404 * Caller's context 405 * p_lock must be held at entry. 406 */ 407 void 408 exacct_update_task_mstate(proc_t *p) 409 { 410 task_usage_t *tu; 411 412 mutex_enter(&p->p_task->tk_usage_lock); 413 tu = p->p_task->tk_usage; 414 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 415 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 416 tu->tu_minflt += p->p_ru.minflt; 417 tu->tu_majflt += p->p_ru.majflt; 418 tu->tu_sndmsg += p->p_ru.msgsnd; 419 tu->tu_rcvmsg += p->p_ru.msgrcv; 420 tu->tu_ioch += p->p_ru.ioch; 421 tu->tu_iblk += p->p_ru.inblock; 422 tu->tu_oblk += p->p_ru.oublock; 423 tu->tu_vcsw += p->p_ru.nvcsw; 424 tu->tu_icsw += p->p_ru.nivcsw; 425 tu->tu_nsig += p->p_ru.nsignals; 426 tu->tu_nswp += p->p_ru.nswap; 427 tu->tu_nscl += p->p_ru.sysc; 428 mutex_exit(&p->p_task->tk_usage_lock); 429 } 430 431 static void 432 exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag) 433 { 434 timestruc_t ts; 435 task_usage_t *tu_buf; 436 437 switch (flag) { 438 case EW_PARTIAL: 439 /* 440 * For partial records we must report the sum of current 441 * accounting statistics with previously accumulated 442 * statistics. 443 */ 444 mutex_enter(&pidlock); 445 mutex_enter(&tk->tk_usage_lock); 446 447 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 448 exacct_snapshot_task_usage(tk, tu); 449 450 mutex_exit(&tk->tk_usage_lock); 451 mutex_exit(&pidlock); 452 break; 453 case EW_INTERVAL: 454 /* 455 * We need to allocate spare task_usage_t buffer before 456 * grabbing pidlock because we might need it later in 457 * exacct_get_interval_task_usage(). 458 */ 459 tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 460 mutex_enter(&pidlock); 461 mutex_enter(&tk->tk_usage_lock); 462 463 /* 464 * For interval records, we deduct the previous microstate 465 * accounting data and cpu usage times from previously saved 466 * results and update the previous task usage structure. 467 */ 468 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 469 exacct_snapshot_task_usage(tk, tu); 470 exacct_get_interval_task_usage(tk, tu, &tu_buf); 471 472 mutex_exit(&tk->tk_usage_lock); 473 mutex_exit(&pidlock); 474 475 if (tu_buf != NULL) 476 kmem_free(tu_buf, sizeof (task_usage_t)); 477 break; 478 case EW_FINAL: 479 /* 480 * For final records, we deduct, from the task's current 481 * usage, any usage that was inherited with the arrival 482 * of a process from a previous task. We then record 483 * the task's finish time. 484 */ 485 mutex_enter(&tk->tk_usage_lock); 486 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 487 exacct_sub_task_mstate(tu, tk->tk_inherited); 488 mutex_exit(&tk->tk_usage_lock); 489 490 gethrestime(&ts); 491 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 492 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 493 494 break; 495 } 496 } 497 498 static int 499 exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record, 500 int res) 501 { 502 int attached = 1; 503 504 switch (res) { 505 case AC_TASK_TASKID: 506 (void) ea_attach_item(record, &tk->tk_tkid, 507 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID); 508 break; 509 case AC_TASK_PROJID: 510 (void) ea_attach_item(record, &tk->tk_proj->kpj_id, 511 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID); 512 break; 513 case AC_TASK_CPU: { 514 timestruc_t ts; 515 uint64_t ui; 516 517 hrt2ts(tu->tu_stime, &ts); 518 ui = ts.tv_sec; 519 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 520 EXT_UINT64 | EXD_TASK_CPU_SYS_SEC); 521 ui = ts.tv_nsec; 522 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 523 EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC); 524 525 hrt2ts(tu->tu_utime, &ts); 526 ui = ts.tv_sec; 527 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 528 EXT_UINT64 | EXD_TASK_CPU_USER_SEC); 529 ui = ts.tv_nsec; 530 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 531 EXT_UINT64 | EXD_TASK_CPU_USER_NSEC); 532 } 533 break; 534 case AC_TASK_TIME: 535 (void) ea_attach_item(record, &tu->tu_startsec, 536 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC); 537 (void) ea_attach_item(record, &tu->tu_startnsec, 538 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC); 539 (void) ea_attach_item(record, &tu->tu_finishsec, 540 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC); 541 (void) ea_attach_item(record, &tu->tu_finishnsec, 542 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC); 543 break; 544 case AC_TASK_HOSTNAME: 545 (void) ea_attach_item(record, tk->tk_zone->zone_nodename, 546 strlen(tk->tk_zone->zone_nodename) + 1, 547 EXT_STRING | EXD_TASK_HOSTNAME); 548 break; 549 case AC_TASK_MICROSTATE: 550 (void) ea_attach_item(record, &tu->tu_majflt, 551 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR); 552 (void) ea_attach_item(record, &tu->tu_minflt, 553 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR); 554 (void) ea_attach_item(record, &tu->tu_sndmsg, 555 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND); 556 (void) ea_attach_item(record, &tu->tu_rcvmsg, 557 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV); 558 (void) ea_attach_item(record, &tu->tu_iblk, 559 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN); 560 (void) ea_attach_item(record, &tu->tu_oblk, 561 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT); 562 (void) ea_attach_item(record, &tu->tu_ioch, 563 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR); 564 (void) ea_attach_item(record, &tu->tu_vcsw, 565 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL); 566 (void) ea_attach_item(record, &tu->tu_icsw, 567 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV); 568 (void) ea_attach_item(record, &tu->tu_nsig, 569 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS); 570 (void) ea_attach_item(record, &tu->tu_nswp, 571 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS); 572 (void) ea_attach_item(record, &tu->tu_nscl, 573 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS); 574 break; 575 case AC_TASK_ANCTASKID: 576 (void) ea_attach_item(record, &tu->tu_anctaskid, 577 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID); 578 break; 579 case AC_TASK_ZONENAME: 580 (void) ea_attach_item(record, tk->tk_zone->zone_name, 581 strlen(tk->tk_zone->zone_name) + 1, 582 EXT_STRING | EXD_TASK_ZONENAME); 583 break; 584 default: 585 attached = 0; 586 } 587 return (attached); 588 } 589 590 static ea_object_t * 591 exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask, 592 ea_catalog_t record_type) 593 { 594 int res, count; 595 ea_object_t *record; 596 597 /* 598 * Assemble usage values into group. 599 */ 600 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 601 for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++) 602 if (BT_TEST(mask, res)) 603 count += exacct_attach_task_item(tk, tu, record, res); 604 if (count == 0) { 605 ea_free_object(record, EUP_ALLOC); 606 record = NULL; 607 } 608 return (record); 609 } 610 611 /* 612 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *, 613 * size_t, size_t *), void *, size_t, size_t *, int) 614 * 615 * Overview 616 * exacct_assemble_task_usage() builds the packed exacct buffer for the 617 * indicated task, executes the given callback function, and free the packed 618 * buffer. 619 * 620 * Return values 621 * Returns 0 on success; otherwise the appropriate error code is returned. 622 * 623 * Caller's context 624 * Suitable for KM_SLEEP allocations. 625 */ 626 int 627 exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk, 628 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 629 void *ubuf, size_t ubufsize, size_t *actual, int flag) 630 { 631 ulong_t mask[AC_MASK_SZ]; 632 ea_object_t *task_record; 633 ea_catalog_t record_type; 634 task_usage_t *tu; 635 void *buf; 636 size_t bufsize; 637 int ret; 638 639 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL); 640 641 mutex_enter(&ac_task->ac_lock); 642 if (ac_task->ac_state == AC_OFF) { 643 mutex_exit(&ac_task->ac_lock); 644 return (ENOTACTIVE); 645 } 646 bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ); 647 mutex_exit(&ac_task->ac_lock); 648 649 switch (flag) { 650 case EW_FINAL: 651 record_type = EXD_GROUP_TASK; 652 break; 653 case EW_PARTIAL: 654 record_type = EXD_GROUP_TASK_PARTIAL; 655 break; 656 case EW_INTERVAL: 657 record_type = EXD_GROUP_TASK_INTERVAL; 658 break; 659 } 660 661 /* 662 * Calculate task usage and assemble it into the task record. 663 */ 664 tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 665 exacct_calculate_task_usage(tk, tu, flag); 666 task_record = exacct_assemble_task_record(tk, tu, mask, record_type); 667 if (task_record == NULL) { 668 /* 669 * The current configuration of the accounting system has 670 * resulted in records with no data; accordingly, we don't write 671 * these, but we return success. 672 */ 673 kmem_free(tu, sizeof (task_usage_t)); 674 return (0); 675 } 676 677 /* 678 * Pack object into buffer and run callback on it. 679 */ 680 bufsize = ea_pack_object(task_record, NULL, 0); 681 buf = kmem_alloc(bufsize, KM_SLEEP); 682 (void) ea_pack_object(task_record, buf, bufsize); 683 ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual); 684 685 /* 686 * Free all previously allocated structures. 687 */ 688 kmem_free(buf, bufsize); 689 ea_free_object(task_record, EUP_ALLOC); 690 kmem_free(tu, sizeof (task_usage_t)); 691 return (ret); 692 } 693 694 /* 695 * void exacct_commit_task(void *) 696 * 697 * Overview 698 * exacct_commit_task() calculates the final usage for a task, updating the 699 * task usage if task accounting is active, and writing a task record if task 700 * accounting is active. exacct_commit_task() is intended for being called 701 * from a task queue (taskq_t). 702 * 703 * Return values 704 * None. 705 * 706 * Caller's context 707 * Suitable for KM_SLEEP allocations. 708 */ 709 710 void 711 exacct_commit_task(void *arg) 712 { 713 task_t *tk = (task_t *)arg; 714 size_t size; 715 zone_t *zone = tk->tk_zone; 716 struct exacct_globals *acg; 717 718 ASSERT(tk != task0p); 719 ASSERT(tk->tk_memb_list == NULL); 720 721 /* 722 * Don't do any extra work if the acctctl module isn't loaded. 723 */ 724 if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) { 725 acg = zone_getspecific(exacct_zone_key, zone); 726 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 727 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 728 if (tk->tk_zone != global_zone) { 729 acg = zone_getspecific(exacct_zone_key, global_zone); 730 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 731 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 732 } 733 } 734 /* 735 * Release associated project and finalize task. 736 */ 737 task_end(tk); 738 } 739 740 static int 741 exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res) 742 { 743 int attached = 1; 744 745 switch (res) { 746 case AC_PROC_PID: 747 (void) ea_attach_item(record, &pu->pu_pid, 748 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID); 749 break; 750 case AC_PROC_UID: 751 (void) ea_attach_item(record, &pu->pu_ruid, 752 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID); 753 break; 754 case AC_PROC_FLAG: 755 (void) ea_attach_item(record, &pu->pu_acflag, 756 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS); 757 break; 758 case AC_PROC_GID: 759 (void) ea_attach_item(record, &pu->pu_rgid, 760 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID); 761 break; 762 case AC_PROC_PROJID: 763 (void) ea_attach_item(record, &pu->pu_projid, 764 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID); 765 break; 766 case AC_PROC_TASKID: 767 (void) ea_attach_item(record, &pu->pu_taskid, 768 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID); 769 break; 770 case AC_PROC_CPU: 771 (void) ea_attach_item(record, &pu->pu_utimesec, 772 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC); 773 (void) ea_attach_item(record, &pu->pu_utimensec, 774 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC); 775 (void) ea_attach_item(record, &pu->pu_stimesec, 776 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC); 777 (void) ea_attach_item(record, &pu->pu_stimensec, 778 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC); 779 break; 780 case AC_PROC_TIME: 781 (void) ea_attach_item(record, &pu->pu_startsec, 782 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC); 783 (void) ea_attach_item(record, &pu->pu_startnsec, 784 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC); 785 (void) ea_attach_item(record, &pu->pu_finishsec, 786 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC); 787 (void) ea_attach_item(record, &pu->pu_finishnsec, 788 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC); 789 break; 790 case AC_PROC_COMMAND: 791 (void) ea_attach_item(record, pu->pu_command, 792 strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND); 793 break; 794 case AC_PROC_HOSTNAME: 795 (void) ea_attach_item(record, pu->pu_nodename, 796 strlen(pu->pu_nodename) + 1, 797 EXT_STRING | EXD_PROC_HOSTNAME); 798 break; 799 case AC_PROC_TTY: 800 (void) ea_attach_item(record, &pu->pu_major, 801 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR); 802 (void) ea_attach_item(record, &pu->pu_minor, 803 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR); 804 break; 805 case AC_PROC_MICROSTATE: 806 (void) ea_attach_item(record, &pu->pu_majflt, 807 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR); 808 (void) ea_attach_item(record, &pu->pu_minflt, 809 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR); 810 (void) ea_attach_item(record, &pu->pu_sndmsg, 811 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND); 812 (void) ea_attach_item(record, &pu->pu_rcvmsg, 813 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV); 814 (void) ea_attach_item(record, &pu->pu_iblk, 815 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN); 816 (void) ea_attach_item(record, &pu->pu_oblk, 817 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT); 818 (void) ea_attach_item(record, &pu->pu_ioch, 819 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR); 820 (void) ea_attach_item(record, &pu->pu_vcsw, 821 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL); 822 (void) ea_attach_item(record, &pu->pu_icsw, 823 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV); 824 (void) ea_attach_item(record, &pu->pu_nsig, 825 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS); 826 (void) ea_attach_item(record, &pu->pu_nswp, 827 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS); 828 (void) ea_attach_item(record, &pu->pu_nscl, 829 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS); 830 break; 831 case AC_PROC_ANCPID: 832 (void) ea_attach_item(record, &pu->pu_ancpid, 833 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID); 834 break; 835 case AC_PROC_WAIT_STATUS: 836 (void) ea_attach_item(record, &pu->pu_wstat, 837 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS); 838 break; 839 case AC_PROC_ZONENAME: 840 (void) ea_attach_item(record, pu->pu_zonename, 841 strlen(pu->pu_zonename) + 1, 842 EXT_STRING | EXD_PROC_ZONENAME); 843 break; 844 case AC_PROC_MEM: 845 (void) ea_attach_item(record, &pu->pu_mem_rss_avg, 846 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K); 847 (void) ea_attach_item(record, &pu->pu_mem_rss_max, 848 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K); 849 break; 850 default: 851 attached = 0; 852 } 853 return (attached); 854 } 855 856 static ea_object_t * 857 exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask, 858 ea_catalog_t record_type) 859 { 860 int res, count; 861 ea_object_t *record; 862 863 /* 864 * Assemble usage values into group. 865 */ 866 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 867 for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++) 868 if (BT_TEST(mask, res)) 869 count += exacct_attach_proc_item(pu, record, res); 870 if (count == 0) { 871 ea_free_object(record, EUP_ALLOC); 872 record = NULL; 873 } 874 return (record); 875 } 876 877 /* 878 * The following two routines assume that process's p_lock is held or 879 * exacct_commit_proc has been called from exit() when all lwps are stopped. 880 */ 881 static void 882 exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu) 883 { 884 kthread_t *t; 885 886 ASSERT(MUTEX_HELD(&p->p_lock)); 887 if ((t = p->p_tlist) == NULL) 888 return; 889 890 do { 891 pu->pu_minflt += t->t_lwp->lwp_ru.minflt; 892 pu->pu_majflt += t->t_lwp->lwp_ru.majflt; 893 pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd; 894 pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv; 895 pu->pu_ioch += t->t_lwp->lwp_ru.ioch; 896 pu->pu_iblk += t->t_lwp->lwp_ru.inblock; 897 pu->pu_oblk += t->t_lwp->lwp_ru.oublock; 898 pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw; 899 pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw; 900 pu->pu_nsig += t->t_lwp->lwp_ru.nsignals; 901 pu->pu_nswp += t->t_lwp->lwp_ru.nswap; 902 pu->pu_nscl += t->t_lwp->lwp_ru.sysc; 903 } while ((t = t->t_forw) != p->p_tlist); 904 } 905 906 static void 907 exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu) 908 { 909 pu->pu_minflt = p->p_ru.minflt; 910 pu->pu_majflt = p->p_ru.majflt; 911 pu->pu_sndmsg = p->p_ru.msgsnd; 912 pu->pu_rcvmsg = p->p_ru.msgrcv; 913 pu->pu_ioch = p->p_ru.ioch; 914 pu->pu_iblk = p->p_ru.inblock; 915 pu->pu_oblk = p->p_ru.oublock; 916 pu->pu_vcsw = p->p_ru.nvcsw; 917 pu->pu_icsw = p->p_ru.nivcsw; 918 pu->pu_nsig = p->p_ru.nsignals; 919 pu->pu_nswp = p->p_ru.nswap; 920 pu->pu_nscl = p->p_ru.sysc; 921 } 922 923 void 924 exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask, 925 int flag, int wstat) 926 { 927 timestruc_t ts, ts_run; 928 929 ASSERT(MUTEX_HELD(&p->p_lock)); 930 931 /* 932 * Convert CPU and execution times to sec/nsec format. 933 */ 934 if (BT_TEST(mask, AC_PROC_CPU)) { 935 hrt2ts(mstate_aggr_state(p, LMS_USER), &ts); 936 pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec; 937 pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec; 938 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts); 939 pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec; 940 pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec; 941 } 942 if (BT_TEST(mask, AC_PROC_TIME)) { 943 gethrestime(&ts); 944 pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 945 pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 946 hrt2ts(gethrtime() - p->p_mstart, &ts_run); 947 ts.tv_sec -= ts_run.tv_sec; 948 ts.tv_nsec -= ts_run.tv_nsec; 949 if (ts.tv_nsec < 0) { 950 ts.tv_sec--; 951 if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) { 952 ts.tv_sec++; 953 ts.tv_nsec -= NANOSEC; 954 } 955 } 956 pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec; 957 pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec; 958 } 959 960 pu->pu_pid = p->p_pidp->pid_id; 961 pu->pu_acflag = p->p_user.u_acflag; 962 pu->pu_projid = p->p_task->tk_proj->kpj_id; 963 pu->pu_taskid = p->p_task->tk_tkid; 964 pu->pu_major = getmajor(p->p_sessp->s_dev); 965 pu->pu_minor = getminor(p->p_sessp->s_dev); 966 pu->pu_ancpid = p->p_ancpid; 967 pu->pu_wstat = wstat; 968 /* 969 * Compute average RSS in K. The denominator is the number of 970 * samples: the number of clock ticks plus the initial value. 971 */ 972 pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) * 973 (PAGESIZE / 1024); 974 pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024); 975 976 mutex_enter(&p->p_crlock); 977 pu->pu_ruid = crgetruid(p->p_cred); 978 pu->pu_rgid = crgetrgid(p->p_cred); 979 mutex_exit(&p->p_crlock); 980 981 bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1); 982 bcopy(p->p_zone->zone_name, pu->pu_zonename, 983 strlen(p->p_zone->zone_name) + 1); 984 bcopy(p->p_zone->zone_nodename, pu->pu_nodename, 985 strlen(p->p_zone->zone_nodename) + 1); 986 987 /* 988 * Calculate microstate accounting data for a process that is still 989 * running. Presently, we explicitly collect all of the LWP usage into 990 * the proc usage structure here. 991 */ 992 if (flag & EW_PARTIAL) 993 exacct_calculate_proc_mstate(p, pu); 994 if (flag & EW_FINAL) 995 exacct_copy_proc_mstate(p, pu); 996 } 997 998 /* 999 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void 1000 * *, size_t, size_t *), void *, size_t, size_t *) 1001 * 1002 * Overview 1003 * Assemble record with miscellaneous accounting information about the process 1004 * and execute the callback on it. It is the callback's job to set "actual" to 1005 * the size of record. 1006 * 1007 * Return values 1008 * The result of the callback function, unless the extended process accounting 1009 * feature is not active, in which case ENOTACTIVE is returned. 1010 * 1011 * Caller's context 1012 * Suitable for KM_SLEEP allocations. 1013 */ 1014 int 1015 exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu, 1016 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1017 void *ubuf, size_t ubufsize, size_t *actual, int flag) 1018 { 1019 ulong_t mask[AC_MASK_SZ]; 1020 ea_object_t *proc_record; 1021 ea_catalog_t record_type; 1022 void *buf; 1023 size_t bufsize; 1024 int ret; 1025 1026 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL); 1027 1028 mutex_enter(&ac_proc->ac_lock); 1029 if (ac_proc->ac_state == AC_OFF) { 1030 mutex_exit(&ac_proc->ac_lock); 1031 return (ENOTACTIVE); 1032 } 1033 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1034 mutex_exit(&ac_proc->ac_lock); 1035 1036 switch (flag) { 1037 case EW_FINAL: 1038 record_type = EXD_GROUP_PROC; 1039 break; 1040 case EW_PARTIAL: 1041 record_type = EXD_GROUP_PROC_PARTIAL; 1042 break; 1043 } 1044 1045 proc_record = exacct_assemble_proc_record(pu, mask, record_type); 1046 if (proc_record == NULL) 1047 return (0); 1048 1049 /* 1050 * Pack object into buffer and pass to callback. 1051 */ 1052 bufsize = ea_pack_object(proc_record, NULL, 0); 1053 buf = kmem_alloc(bufsize, KM_SLEEP); 1054 (void) ea_pack_object(proc_record, buf, bufsize); 1055 1056 ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual); 1057 1058 /* 1059 * Free all previously allocations. 1060 */ 1061 kmem_free(buf, bufsize); 1062 ea_free_object(proc_record, EUP_ALLOC); 1063 return (ret); 1064 } 1065 1066 /* 1067 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t, 1068 * size_t *) 1069 * 1070 * Overview 1071 * exacct_commit_callback() writes the indicated buffer to the indicated 1072 * extended accounting file. 1073 * 1074 * Return values 1075 * The result of the write operation is returned. "actual" is updated to 1076 * contain the number of bytes actually written. 1077 * 1078 * Caller's context 1079 * Suitable for a vn_rdwr() operation. 1080 */ 1081 /*ARGSUSED*/ 1082 int 1083 exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize, 1084 void *buf, size_t bufsize, size_t *actual) 1085 { 1086 int error = 0; 1087 1088 *actual = 0; 1089 if ((error = exacct_vn_write(info, buf, bufsize)) == 0) 1090 *actual = bufsize; 1091 return (error); 1092 } 1093 1094 static void 1095 exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat) 1096 { 1097 size_t size; 1098 proc_usage_t *pu; 1099 ulong_t mask[AC_MASK_SZ]; 1100 1101 mutex_enter(&ac_proc->ac_lock); 1102 if (ac_proc->ac_state == AC_ON) { 1103 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1104 mutex_exit(&ac_proc->ac_lock); 1105 } else { 1106 mutex_exit(&ac_proc->ac_lock); 1107 return; 1108 } 1109 1110 mutex_enter(&p->p_lock); 1111 size = strlen(p->p_user.u_comm) + 1; 1112 mutex_exit(&p->p_lock); 1113 1114 pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP); 1115 pu->pu_command = kmem_alloc(size, KM_SLEEP); 1116 mutex_enter(&p->p_lock); 1117 exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat); 1118 mutex_exit(&p->p_lock); 1119 1120 (void) exacct_assemble_proc_usage(ac_proc, pu, 1121 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 1122 1123 kmem_free(pu->pu_command, strlen(pu->pu_command) + 1); 1124 kmem_free(pu, sizeof (proc_usage_t)); 1125 } 1126 1127 /* 1128 * void exacct_commit_proc(proc_t *, int) 1129 * 1130 * Overview 1131 * exacct_commit_proc() calculates the final usage for a process, updating the 1132 * task usage if task accounting is active, and writing a process record if 1133 * process accounting is active. exacct_commit_proc() is intended for being 1134 * called from proc_exit(). 1135 * 1136 * Return values 1137 * None. 1138 * 1139 * Caller's context 1140 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry. 1141 */ 1142 void 1143 exacct_commit_proc(proc_t *p, int wstat) 1144 { 1145 zone_t *zone = p->p_zone; 1146 struct exacct_globals *acg, *gacg = NULL; 1147 1148 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1149 /* 1150 * acctctl module not loaded. Nothing to do. 1151 */ 1152 return; 1153 } 1154 acg = zone_getspecific(exacct_zone_key, zone); 1155 exacct_do_commit_proc(&acg->ac_proc, p, wstat); 1156 if (zone != global_zone) { 1157 gacg = zone_getspecific(exacct_zone_key, global_zone); 1158 exacct_do_commit_proc(&gacg->ac_proc, p, wstat); 1159 } 1160 } 1161 1162 static int 1163 exacct_attach_netstat_item(net_stat_t *ns, ea_object_t *record, int res) 1164 { 1165 int attached = 1; 1166 1167 switch (res) { 1168 case AC_NET_NAME: 1169 (void) ea_attach_item(record, ns->ns_name, 1170 strlen(ns->ns_name) + 1, EXT_STRING | EXD_NET_STATS_NAME); 1171 break; 1172 case AC_NET_CURTIME: 1173 { 1174 uint64_t now; 1175 timestruc_t ts; 1176 1177 gethrestime(&ts); 1178 now = (uint64_t)(ulong_t)ts.tv_sec; 1179 (void) ea_attach_item(record, &now, sizeof (uint64_t), 1180 EXT_UINT64 | EXD_NET_STATS_CURTIME); 1181 } 1182 break; 1183 case AC_NET_IBYTES: 1184 (void) ea_attach_item(record, &ns->ns_ibytes, 1185 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IBYTES); 1186 break; 1187 case AC_NET_OBYTES: 1188 (void) ea_attach_item(record, &ns->ns_obytes, 1189 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OBYTES); 1190 break; 1191 case AC_NET_IPKTS: 1192 (void) ea_attach_item(record, &ns->ns_ipackets, 1193 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IPKTS); 1194 break; 1195 case AC_NET_OPKTS: 1196 (void) ea_attach_item(record, &ns->ns_opackets, 1197 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OPKTS); 1198 break; 1199 case AC_NET_IERRPKTS: 1200 (void) ea_attach_item(record, &ns->ns_ierrors, 1201 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IERRPKTS); 1202 break; 1203 case AC_NET_OERRPKTS: 1204 (void) ea_attach_item(record, &ns->ns_oerrors, 1205 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OERRPKTS); 1206 break; 1207 default: 1208 attached = 0; 1209 } 1210 return (attached); 1211 } 1212 1213 static int 1214 exacct_attach_netdesc_item(net_desc_t *nd, ea_object_t *record, int res) 1215 { 1216 int attached = 1; 1217 1218 switch (res) { 1219 case AC_NET_NAME: 1220 (void) ea_attach_item(record, nd->nd_name, 1221 strlen(nd->nd_name) + 1, EXT_STRING | EXD_NET_DESC_NAME); 1222 break; 1223 case AC_NET_DEVNAME: 1224 (void) ea_attach_item(record, nd->nd_devname, 1225 strlen(nd->nd_devname) + 1, EXT_STRING | 1226 EXD_NET_DESC_DEVNAME); 1227 break; 1228 case AC_NET_EHOST: 1229 (void) ea_attach_item(record, &nd->nd_ehost, 1230 sizeof (nd->nd_ehost), EXT_RAW | EXD_NET_DESC_EHOST); 1231 break; 1232 case AC_NET_EDEST: 1233 (void) ea_attach_item(record, &nd->nd_edest, 1234 sizeof (nd->nd_edest), EXT_RAW | EXD_NET_DESC_EDEST); 1235 break; 1236 case AC_NET_VLAN_TPID: 1237 (void) ea_attach_item(record, &nd->nd_vlan_tpid, 1238 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TPID); 1239 break; 1240 case AC_NET_VLAN_TCI: 1241 (void) ea_attach_item(record, &nd->nd_vlan_tci, 1242 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TCI); 1243 break; 1244 case AC_NET_SAP: 1245 (void) ea_attach_item(record, &nd->nd_sap, 1246 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_SAP); 1247 break; 1248 case AC_NET_PRIORITY: 1249 (void) ea_attach_item(record, &nd->nd_priority, 1250 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_PRIORITY); 1251 break; 1252 case AC_NET_BWLIMIT: 1253 (void) ea_attach_item(record, &nd->nd_bw_limit, 1254 sizeof (uint64_t), EXT_UINT64 | EXD_NET_DESC_BWLIMIT); 1255 break; 1256 case AC_NET_SADDR: 1257 if (nd->nd_isv4) { 1258 (void) ea_attach_item(record, &nd->nd_saddr[3], 1259 sizeof (uint32_t), EXT_UINT32 | 1260 EXD_NET_DESC_V4SADDR); 1261 } else { 1262 (void) ea_attach_item(record, &nd->nd_saddr, 1263 sizeof (nd->nd_saddr), EXT_RAW | 1264 EXD_NET_DESC_V6SADDR); 1265 } 1266 break; 1267 case AC_NET_DADDR: 1268 if (nd->nd_isv4) { 1269 (void) ea_attach_item(record, &nd->nd_daddr[3], 1270 sizeof (uint32_t), EXT_UINT32 | 1271 EXD_NET_DESC_V4DADDR); 1272 } else { 1273 (void) ea_attach_item(record, &nd->nd_daddr, 1274 sizeof (nd->nd_daddr), EXT_RAW | 1275 EXD_NET_DESC_V6DADDR); 1276 } 1277 break; 1278 case AC_NET_SPORT: 1279 (void) ea_attach_item(record, &nd->nd_sport, 1280 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_SPORT); 1281 break; 1282 case AC_NET_DPORT: 1283 (void) ea_attach_item(record, &nd->nd_dport, 1284 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_DPORT); 1285 break; 1286 case AC_NET_PROTOCOL: 1287 (void) ea_attach_item(record, &nd->nd_protocol, 1288 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_PROTOCOL); 1289 break; 1290 case AC_NET_DSFIELD: 1291 (void) ea_attach_item(record, &nd->nd_dsfield, 1292 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_DSFIELD); 1293 break; 1294 default: 1295 attached = 0; 1296 } 1297 return (attached); 1298 } 1299 1300 static ea_object_t * 1301 exacct_assemble_net_record(void *ninfo, ulong_t *mask, ea_catalog_t record_type, 1302 int what) 1303 { 1304 int res; 1305 int count; 1306 ea_object_t *record; 1307 1308 /* 1309 * Assemble usage values into group. 1310 */ 1311 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1312 for (res = 1, count = 0; res <= AC_NET_MAX_RES; res++) 1313 if (BT_TEST(mask, res)) { 1314 if (what == EX_NET_LNDESC_REC || 1315 what == EX_NET_FLDESC_REC) { 1316 count += exacct_attach_netdesc_item( 1317 (net_desc_t *)ninfo, record, res); 1318 } else { 1319 count += exacct_attach_netstat_item( 1320 (net_stat_t *)ninfo, record, res); 1321 } 1322 } 1323 if (count == 0) { 1324 ea_free_object(record, EUP_ALLOC); 1325 record = NULL; 1326 } 1327 return (record); 1328 } 1329 1330 int 1331 exacct_assemble_net_usage(ac_info_t *ac_net, void *ninfo, 1332 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1333 void *ubuf, size_t ubufsize, size_t *actual, int what) 1334 { 1335 ulong_t mask[AC_MASK_SZ]; 1336 ea_object_t *net_desc; 1337 ea_catalog_t record_type; 1338 void *buf; 1339 size_t bufsize; 1340 int ret; 1341 1342 mutex_enter(&ac_net->ac_lock); 1343 if (ac_net->ac_state == AC_OFF) { 1344 mutex_exit(&ac_net->ac_lock); 1345 return (ENOTACTIVE); 1346 } 1347 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1348 mutex_exit(&ac_net->ac_lock); 1349 1350 switch (what) { 1351 case EX_NET_LNDESC_REC: 1352 record_type = EXD_GROUP_NET_LINK_DESC; 1353 break; 1354 case EX_NET_LNSTAT_REC: 1355 record_type = EXD_GROUP_NET_LINK_STATS; 1356 break; 1357 case EX_NET_FLDESC_REC: 1358 record_type = EXD_GROUP_NET_FLOW_DESC; 1359 break; 1360 case EX_NET_FLSTAT_REC: 1361 record_type = EXD_GROUP_NET_FLOW_STATS; 1362 break; 1363 } 1364 1365 net_desc = exacct_assemble_net_record(ninfo, mask, record_type, what); 1366 if (net_desc == NULL) 1367 return (0); 1368 1369 /* 1370 * Pack object into buffer and pass to callback. 1371 */ 1372 bufsize = ea_pack_object(net_desc, NULL, 0); 1373 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1374 if (buf == NULL) 1375 return (ENOMEM); 1376 1377 (void) ea_pack_object(net_desc, buf, bufsize); 1378 1379 ret = callback(ac_net, ubuf, ubufsize, buf, bufsize, actual); 1380 1381 /* 1382 * Free all previously allocations. 1383 */ 1384 kmem_free(buf, bufsize); 1385 ea_free_object(net_desc, EUP_ALLOC); 1386 return (ret); 1387 } 1388 1389 int 1390 exacct_commit_netinfo(void *arg, int what) 1391 { 1392 size_t size; 1393 ulong_t mask[AC_MASK_SZ]; 1394 struct exacct_globals *acg; 1395 ac_info_t *ac_net; 1396 1397 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1398 /* 1399 * acctctl module not loaded. Nothing to do. 1400 */ 1401 return (ENOTACTIVE); 1402 } 1403 1404 /* 1405 * Even though each zone nominally has its own flow accounting settings 1406 * (ac_flow), these are only maintained by and for the global zone. 1407 * 1408 * If this were to change in the future, this function should grow a 1409 * second zoneid (or zone) argument, and use the corresponding zone's 1410 * settings rather than always using those of the global zone. 1411 */ 1412 acg = zone_getspecific(exacct_zone_key, global_zone); 1413 ac_net = &acg->ac_net; 1414 1415 mutex_enter(&ac_net->ac_lock); 1416 if (ac_net->ac_state == AC_OFF) { 1417 mutex_exit(&ac_net->ac_lock); 1418 return (ENOTACTIVE); 1419 } 1420 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1421 mutex_exit(&ac_net->ac_lock); 1422 1423 return (exacct_assemble_net_usage(ac_net, arg, exacct_commit_callback, 1424 NULL, 0, &size, what)); 1425 } 1426 1427 static int 1428 exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res) 1429 { 1430 int attached = 1; 1431 1432 switch (res) { 1433 case AC_FLOW_SADDR: 1434 if (fu->fu_isv4) { 1435 (void) ea_attach_item(record, &fu->fu_saddr[3], 1436 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR); 1437 } else { 1438 (void) ea_attach_item(record, &fu->fu_saddr, 1439 sizeof (fu->fu_saddr), EXT_RAW | 1440 EXD_FLOW_V6SADDR); 1441 } 1442 break; 1443 case AC_FLOW_DADDR: 1444 if (fu->fu_isv4) { 1445 (void) ea_attach_item(record, &fu->fu_daddr[3], 1446 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR); 1447 } else { 1448 (void) ea_attach_item(record, &fu->fu_daddr, 1449 sizeof (fu->fu_daddr), EXT_RAW | 1450 EXD_FLOW_V6DADDR); 1451 } 1452 break; 1453 case AC_FLOW_SPORT: 1454 (void) ea_attach_item(record, &fu->fu_sport, 1455 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT); 1456 break; 1457 case AC_FLOW_DPORT: 1458 (void) ea_attach_item(record, &fu->fu_dport, 1459 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT); 1460 break; 1461 case AC_FLOW_PROTOCOL: 1462 (void) ea_attach_item(record, &fu->fu_protocol, 1463 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL); 1464 break; 1465 case AC_FLOW_DSFIELD: 1466 (void) ea_attach_item(record, &fu->fu_dsfield, 1467 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD); 1468 break; 1469 case AC_FLOW_CTIME: 1470 (void) ea_attach_item(record, &fu->fu_ctime, 1471 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME); 1472 break; 1473 case AC_FLOW_LSEEN: 1474 (void) ea_attach_item(record, &fu->fu_lseen, 1475 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN); 1476 break; 1477 case AC_FLOW_NBYTES: 1478 (void) ea_attach_item(record, &fu->fu_nbytes, 1479 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES); 1480 break; 1481 case AC_FLOW_NPKTS: 1482 (void) ea_attach_item(record, &fu->fu_npackets, 1483 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS); 1484 break; 1485 case AC_FLOW_PROJID: 1486 if (fu->fu_projid >= 0) { 1487 (void) ea_attach_item(record, &fu->fu_projid, 1488 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID); 1489 } 1490 break; 1491 case AC_FLOW_UID: 1492 if (fu->fu_userid >= 0) { 1493 (void) ea_attach_item(record, &fu->fu_userid, 1494 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID); 1495 } 1496 break; 1497 case AC_FLOW_ANAME: 1498 (void) ea_attach_item(record, fu->fu_aname, 1499 strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME); 1500 break; 1501 default: 1502 attached = 0; 1503 } 1504 return (attached); 1505 } 1506 1507 static ea_object_t * 1508 exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask, 1509 ea_catalog_t record_type) 1510 { 1511 int res, count; 1512 ea_object_t *record; 1513 1514 /* 1515 * Assemble usage values into group. 1516 */ 1517 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1518 for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++) 1519 if (BT_TEST(mask, res)) 1520 count += exacct_attach_flow_item(fu, record, res); 1521 if (count == 0) { 1522 ea_free_object(record, EUP_ALLOC); 1523 record = NULL; 1524 } 1525 return (record); 1526 } 1527 1528 int 1529 exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu, 1530 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1531 void *ubuf, size_t ubufsize, size_t *actual) 1532 { 1533 ulong_t mask[AC_MASK_SZ]; 1534 ea_object_t *flow_usage; 1535 ea_catalog_t record_type; 1536 void *buf; 1537 size_t bufsize; 1538 int ret; 1539 1540 mutex_enter(&ac_flow->ac_lock); 1541 if (ac_flow->ac_state == AC_OFF) { 1542 mutex_exit(&ac_flow->ac_lock); 1543 return (ENOTACTIVE); 1544 } 1545 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1546 mutex_exit(&ac_flow->ac_lock); 1547 1548 record_type = EXD_GROUP_FLOW; 1549 1550 flow_usage = exacct_assemble_flow_record(fu, mask, record_type); 1551 if (flow_usage == NULL) { 1552 return (0); 1553 } 1554 1555 /* 1556 * Pack object into buffer and pass to callback. 1557 */ 1558 bufsize = ea_pack_object(flow_usage, NULL, 0); 1559 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1560 if (buf == NULL) { 1561 return (ENOMEM); 1562 } 1563 1564 (void) ea_pack_object(flow_usage, buf, bufsize); 1565 1566 ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual); 1567 1568 /* 1569 * Free all previously allocations. 1570 */ 1571 kmem_free(buf, bufsize); 1572 ea_free_object(flow_usage, EUP_ALLOC); 1573 return (ret); 1574 } 1575 1576 void 1577 exacct_commit_flow(void *arg) 1578 { 1579 flow_usage_t *f = (flow_usage_t *)arg; 1580 size_t size; 1581 ulong_t mask[AC_MASK_SZ]; 1582 struct exacct_globals *acg; 1583 ac_info_t *ac_flow; 1584 1585 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1586 /* 1587 * acctctl module not loaded. Nothing to do. 1588 */ 1589 return; 1590 } 1591 1592 /* 1593 * Even though each zone nominally has its own flow accounting settings 1594 * (ac_flow), these are only maintained by and for the global zone. 1595 * 1596 * If this were to change in the future, this function should grow a 1597 * second zoneid (or zone) argument, and use the corresponding zone's 1598 * settings rather than always using those of the global zone. 1599 */ 1600 acg = zone_getspecific(exacct_zone_key, global_zone); 1601 ac_flow = &acg->ac_flow; 1602 1603 mutex_enter(&ac_flow->ac_lock); 1604 if (ac_flow->ac_state == AC_OFF) { 1605 mutex_exit(&ac_flow->ac_lock); 1606 return; 1607 } 1608 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1609 mutex_exit(&ac_flow->ac_lock); 1610 1611 (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback, 1612 NULL, 0, &size); 1613 } 1614 1615 /* 1616 * int exacct_tag_task(task_t *, void *, size_t, int) 1617 * 1618 * Overview 1619 * exacct_tag_task() provides the exacct record construction and writing 1620 * support required by putacct(2) for task entities. 1621 * 1622 * Return values 1623 * The result of the write operation is returned, unless the extended 1624 * accounting facility is not active, in which case ENOTACTIVE is returned. 1625 * 1626 * Caller's context 1627 * Suitable for KM_SLEEP allocations. 1628 */ 1629 int 1630 exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz, 1631 int flags) 1632 { 1633 int error = 0; 1634 void *buf; 1635 size_t bufsize; 1636 ea_catalog_t cat; 1637 ea_object_t *tag; 1638 1639 mutex_enter(&ac_task->ac_lock); 1640 if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) { 1641 mutex_exit(&ac_task->ac_lock); 1642 return (ENOTACTIVE); 1643 } 1644 mutex_exit(&ac_task->ac_lock); 1645 1646 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG); 1647 (void) ea_attach_item(tag, &tk->tk_tkid, 0, 1648 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1649 (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0, 1650 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1651 if (flags == EP_RAW) 1652 cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG; 1653 else 1654 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG; 1655 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1656 1657 bufsize = ea_pack_object(tag, NULL, 0); 1658 buf = kmem_alloc(bufsize, KM_SLEEP); 1659 (void) ea_pack_object(tag, buf, bufsize); 1660 error = exacct_vn_write(ac_task, buf, bufsize); 1661 kmem_free(buf, bufsize); 1662 ea_free_object(tag, EUP_ALLOC); 1663 return (error); 1664 } 1665 1666 /* 1667 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *) 1668 * 1669 * Overview 1670 * exacct_tag_proc() provides the exacct record construction and writing 1671 * support required by putacct(2) for processes. 1672 * 1673 * Return values 1674 * The result of the write operation is returned, unless the extended 1675 * accounting facility is not active, in which case ENOTACTIVE is returned. 1676 * 1677 * Caller's context 1678 * Suitable for KM_SLEEP allocations. 1679 */ 1680 int 1681 exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf, 1682 size_t ubufsz, int flags, const char *hostname) 1683 { 1684 int error = 0; 1685 void *buf; 1686 size_t bufsize; 1687 ea_catalog_t cat; 1688 ea_object_t *tag; 1689 1690 mutex_enter(&ac_proc->ac_lock); 1691 if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) { 1692 mutex_exit(&ac_proc->ac_lock); 1693 return (ENOTACTIVE); 1694 } 1695 mutex_exit(&ac_proc->ac_lock); 1696 1697 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG); 1698 (void) ea_attach_item(tag, &pid, sizeof (uint32_t), 1699 EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID); 1700 (void) ea_attach_item(tag, &tkid, 0, 1701 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1702 (void) ea_attach_item(tag, (void *)hostname, 0, 1703 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1704 if (flags == EP_RAW) 1705 cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG; 1706 else 1707 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG; 1708 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1709 1710 bufsize = ea_pack_object(tag, NULL, 0); 1711 buf = kmem_alloc(bufsize, KM_SLEEP); 1712 (void) ea_pack_object(tag, buf, bufsize); 1713 error = exacct_vn_write(ac_proc, buf, bufsize); 1714 kmem_free(buf, bufsize); 1715 ea_free_object(tag, EUP_ALLOC); 1716 return (error); 1717 } 1718 1719 /* 1720 * void exacct_init(void) 1721 * 1722 * Overview 1723 * Initialized the extended accounting subsystem. 1724 * 1725 * Return values 1726 * None. 1727 * 1728 * Caller's context 1729 * Suitable for KM_SLEEP allocations. 1730 */ 1731 void 1732 exacct_init() 1733 { 1734 exacct_queue = system_taskq; 1735 exacct_object_cache = kmem_cache_create("exacct_object_cache", 1736 sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1737 task_commit_thread_init(); 1738 } 1739 1740 /* 1741 * exacct_snapshot_proc_mstate() copies a process's microstate accounting data 1742 * and resource usage counters into a given task_usage_t. It differs from 1743 * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t, 1744 * b) p_lock will have been acquired earlier in the call path and c) we 1745 * are here including the process's user and system times. 1746 */ 1747 static void 1748 exacct_snapshot_proc_mstate(proc_t *p, task_usage_t *tu) 1749 { 1750 tu->tu_utime = mstate_aggr_state(p, LMS_USER); 1751 tu->tu_stime = mstate_aggr_state(p, LMS_SYSTEM); 1752 tu->tu_minflt = p->p_ru.minflt; 1753 tu->tu_majflt = p->p_ru.majflt; 1754 tu->tu_sndmsg = p->p_ru.msgsnd; 1755 tu->tu_rcvmsg = p->p_ru.msgrcv; 1756 tu->tu_ioch = p->p_ru.ioch; 1757 tu->tu_iblk = p->p_ru.inblock; 1758 tu->tu_oblk = p->p_ru.oublock; 1759 tu->tu_vcsw = p->p_ru.nvcsw; 1760 tu->tu_icsw = p->p_ru.nivcsw; 1761 tu->tu_nsig = p->p_ru.nsignals; 1762 tu->tu_nswp = p->p_ru.nswap; 1763 tu->tu_nscl = p->p_ru.sysc; 1764 } 1765 1766 /* 1767 * void exacct_move_mstate(proc_t *, task_t *, task_t *) 1768 * 1769 * Overview 1770 * exacct_move_mstate() is called by task_change() and accounts for 1771 * a process's resource usage when it is moved from one task to another. 1772 * 1773 * The process's usage at this point is recorded in the new task so 1774 * that it can be excluded from the calculation of resources consumed 1775 * by that task. 1776 * 1777 * The resource usage inherited by the new task is also added to the 1778 * aggregate maintained by the old task for processes that have exited. 1779 * 1780 * Return values 1781 * None. 1782 * 1783 * Caller's context 1784 * pidlock and p_lock held across exacct_move_mstate(). 1785 */ 1786 void 1787 exacct_move_mstate(proc_t *p, task_t *oldtk, task_t *newtk) 1788 { 1789 task_usage_t tu; 1790 1791 /* Take a snapshot of this process's mstate and RU counters */ 1792 exacct_snapshot_proc_mstate(p, &tu); 1793 1794 /* 1795 * Use the snapshot to increment the aggregate usage of the old 1796 * task, and the inherited usage of the new one. 1797 */ 1798 mutex_enter(&oldtk->tk_usage_lock); 1799 exacct_add_task_mstate(oldtk->tk_usage, &tu); 1800 mutex_exit(&oldtk->tk_usage_lock); 1801 mutex_enter(&newtk->tk_usage_lock); 1802 exacct_add_task_mstate(newtk->tk_inherited, &tu); 1803 mutex_exit(&newtk->tk_usage_lock); 1804 } 1805