1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/exacct.h> 27 #include <sys/exacct_catalog.h> 28 #include <sys/disp.h> 29 #include <sys/task.h> 30 #include <sys/proc.h> 31 #include <sys/cmn_err.h> 32 #include <sys/kmem.h> 33 #include <sys/project.h> 34 #include <sys/systm.h> 35 #include <sys/vnode.h> 36 #include <sys/file.h> 37 #include <sys/acctctl.h> 38 #include <sys/time.h> 39 #include <sys/utsname.h> 40 #include <sys/session.h> 41 #include <sys/sysmacros.h> 42 #include <sys/bitmap.h> 43 #include <sys/msacct.h> 44 #include <sys/mac.h> 45 46 /* 47 * exacct usage and recording routines 48 * 49 * wracct(2), getacct(2), and the records written at process or task 50 * termination are constructed using the exacct_assemble_[task,proc]_usage() 51 * functions, which take a callback that takes the appropriate action on 52 * the packed exacct record for the task or process. For the process-related 53 * actions, we partition the routines such that the data collecting component 54 * can be performed while holding p_lock, and all sleeping or blocking 55 * operations can be performed without acquiring p_lock. 56 * 57 * putacct(2), which allows an application to construct a customized record 58 * associated with an existing process or task, has its own entry points: 59 * exacct_tag_task() and exacct_tag_proc(). 60 */ 61 62 taskq_t *exacct_queue; 63 kmem_cache_t *exacct_object_cache; 64 65 zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED; 66 67 static const uint32_t exacct_version = EXACCT_VERSION; 68 static const char exacct_header[] = "exacct"; 69 static const char exacct_creator[] = "SunOS"; 70 71 ea_object_t * 72 ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz) 73 { 74 ea_object_t *item; 75 76 item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 77 bzero(item, sizeof (ea_object_t)); 78 (void) ea_set_item(item, catalog, buf, bufsz); 79 return (item); 80 } 81 82 ea_object_t * 83 ea_alloc_group(ea_catalog_t catalog) 84 { 85 ea_object_t *group; 86 87 group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 88 bzero(group, sizeof (ea_object_t)); 89 (void) ea_set_group(group, catalog); 90 return (group); 91 } 92 93 ea_object_t * 94 ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog) 95 { 96 ea_object_t *item; 97 98 item = ea_alloc_item(catalog, buf, bufsz); 99 (void) ea_attach_to_group(grp, item); 100 return (item); 101 } 102 103 /* 104 * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract 105 * microstate accounting data and resource usage counters from one task_usage_t 106 * from those supplied in another. These functions do not operate on *all* 107 * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make 108 * sense. 109 */ 110 static void 111 exacct_add_task_mstate(task_usage_t *tu, task_usage_t *delta) 112 { 113 tu->tu_utime += delta->tu_utime; 114 tu->tu_stime += delta->tu_stime; 115 tu->tu_minflt += delta->tu_minflt; 116 tu->tu_majflt += delta->tu_majflt; 117 tu->tu_sndmsg += delta->tu_sndmsg; 118 tu->tu_rcvmsg += delta->tu_rcvmsg; 119 tu->tu_ioch += delta->tu_ioch; 120 tu->tu_iblk += delta->tu_iblk; 121 tu->tu_oblk += delta->tu_oblk; 122 tu->tu_vcsw += delta->tu_vcsw; 123 tu->tu_icsw += delta->tu_icsw; 124 tu->tu_nsig += delta->tu_nsig; 125 tu->tu_nswp += delta->tu_nswp; 126 tu->tu_nscl += delta->tu_nscl; 127 } 128 129 /* 130 * See the comments for exacct_add_task_mstate(), above. 131 */ 132 static void 133 exacct_sub_task_mstate(task_usage_t *tu, task_usage_t *delta) 134 { 135 tu->tu_utime -= delta->tu_utime; 136 tu->tu_stime -= delta->tu_stime; 137 tu->tu_minflt -= delta->tu_minflt; 138 tu->tu_majflt -= delta->tu_majflt; 139 tu->tu_sndmsg -= delta->tu_sndmsg; 140 tu->tu_rcvmsg -= delta->tu_rcvmsg; 141 tu->tu_ioch -= delta->tu_ioch; 142 tu->tu_iblk -= delta->tu_iblk; 143 tu->tu_oblk -= delta->tu_oblk; 144 tu->tu_vcsw -= delta->tu_vcsw; 145 tu->tu_icsw -= delta->tu_icsw; 146 tu->tu_nsig -= delta->tu_nsig; 147 tu->tu_nswp -= delta->tu_nswp; 148 tu->tu_nscl -= delta->tu_nscl; 149 } 150 151 /* 152 * Wrapper for vn_rdwr() used by exacct_vn_write() and exacct_write_header() 153 * to write to the accounting file without corrupting it in case of an I/O or 154 * filesystem error. 155 */ 156 static int 157 exacct_vn_write_impl(ac_info_t *info, void *buf, ssize_t bufsize) 158 { 159 int error; 160 ssize_t resid; 161 struct vattr va; 162 163 ASSERT(info != NULL); 164 ASSERT(info->ac_vnode != NULL); 165 ASSERT(MUTEX_HELD(&info->ac_lock)); 166 167 /* 168 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting 169 * the present accounting file. 170 */ 171 va.va_mask = AT_SIZE; 172 error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred, NULL); 173 if (error == 0) { 174 error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf, 175 bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T, 176 kcred, &resid); 177 if (error) { 178 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 179 } else if (resid != 0) { 180 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 181 error = ENOSPC; 182 } 183 } 184 return (error); 185 } 186 187 /* 188 * exacct_vn_write() safely writes to an accounting file. acctctl() prevents 189 * the two accounting vnodes from being equal, and the appropriate ac_lock is 190 * held across the call, so we're single threaded through this code for each 191 * file. 192 */ 193 static int 194 exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize) 195 { 196 int error; 197 198 if (info == NULL) 199 return (0); 200 201 mutex_enter(&info->ac_lock); 202 203 /* 204 * Don't do anything unless accounting file is set. 205 */ 206 if (info->ac_vnode == NULL) { 207 mutex_exit(&info->ac_lock); 208 return (0); 209 } 210 error = exacct_vn_write_impl(info, buf, bufsize); 211 mutex_exit(&info->ac_lock); 212 213 return (error); 214 } 215 216 /* 217 * void *exacct_create_header(size_t *) 218 * 219 * Overview 220 * exacct_create_header() constructs an exacct file header identifying the 221 * accounting file as the output of the kernel. exacct_create_header() and 222 * the static write_header() and verify_header() routines in libexacct must 223 * remain synchronized. 224 * 225 * Return values 226 * A pointer to a packed exacct buffer containing the appropriate header is 227 * returned; the size of the buffer is placed in the location indicated by 228 * sizep. 229 * 230 * Caller's context 231 * Suitable for KM_SLEEP allocations. 232 */ 233 void * 234 exacct_create_header(size_t *sizep) 235 { 236 ea_object_t *hdr_grp; 237 uint32_t bskip; 238 void *buf; 239 size_t bufsize; 240 241 hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER); 242 (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0, 243 EXT_UINT32 | EXC_DEFAULT | EXD_VERSION); 244 (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0, 245 EXT_STRING | EXC_DEFAULT | EXD_FILETYPE); 246 (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0, 247 EXT_STRING | EXC_DEFAULT | EXD_CREATOR); 248 (void) ea_attach_item(hdr_grp, uts_nodename(), 0, 249 EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME); 250 251 bufsize = ea_pack_object(hdr_grp, NULL, 0); 252 buf = kmem_alloc(bufsize, KM_SLEEP); 253 (void) ea_pack_object(hdr_grp, buf, bufsize); 254 ea_free_object(hdr_grp, EUP_ALLOC); 255 256 /* 257 * To prevent reading the header when reading the file backwards, 258 * set the large backskip of the header group to 0 (last 4 bytes). 259 */ 260 bskip = 0; 261 exacct_order32(&bskip); 262 bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip), 263 sizeof (bskip)); 264 265 *sizep = bufsize; 266 return (buf); 267 } 268 269 /* 270 * int exacct_write_header(ac_info_t *, void *, size_t) 271 * 272 * Overview 273 * exacct_write_header() writes the given header buffer to the indicated 274 * vnode. 275 * 276 * Return values 277 * The result of the write operation is returned. 278 * 279 * Caller's context 280 * Caller must hold the ac_lock of the appropriate accounting file 281 * information block (ac_info_t). 282 */ 283 int 284 exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize) 285 { 286 if (info != NULL && info->ac_vnode != NULL) 287 return (exacct_vn_write_impl(info, hdr, hdrsize)); 288 289 return (0); 290 } 291 292 static void 293 exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu, 294 task_usage_t **tu_buf) 295 { 296 task_usage_t *oldtu, *newtu; 297 task_usage_t **prevusage; 298 299 ASSERT(MUTEX_HELD(&tk->tk_usage_lock)); 300 if (getzoneid() != GLOBAL_ZONEID) { 301 prevusage = &tk->tk_zoneusage; 302 } else { 303 prevusage = &tk->tk_prevusage; 304 } 305 if ((oldtu = *prevusage) != NULL) { 306 /* 307 * In case we have any accounting information 308 * saved from the previous interval record. 309 */ 310 newtu = *tu_buf; 311 bcopy(tu, newtu, sizeof (task_usage_t)); 312 tu->tu_minflt -= oldtu->tu_minflt; 313 tu->tu_majflt -= oldtu->tu_majflt; 314 tu->tu_sndmsg -= oldtu->tu_sndmsg; 315 tu->tu_rcvmsg -= oldtu->tu_rcvmsg; 316 tu->tu_ioch -= oldtu->tu_ioch; 317 tu->tu_iblk -= oldtu->tu_iblk; 318 tu->tu_oblk -= oldtu->tu_oblk; 319 tu->tu_vcsw -= oldtu->tu_vcsw; 320 tu->tu_icsw -= oldtu->tu_icsw; 321 tu->tu_nsig -= oldtu->tu_nsig; 322 tu->tu_nswp -= oldtu->tu_nswp; 323 tu->tu_nscl -= oldtu->tu_nscl; 324 tu->tu_utime -= oldtu->tu_utime; 325 tu->tu_stime -= oldtu->tu_stime; 326 327 tu->tu_startsec = oldtu->tu_finishsec; 328 tu->tu_startnsec = oldtu->tu_finishnsec; 329 /* 330 * Copy the data from our temporary storage to the task's 331 * previous interval usage structure for future reference. 332 */ 333 bcopy(newtu, oldtu, sizeof (task_usage_t)); 334 } else { 335 /* 336 * Store current statistics in the task's previous interval 337 * usage structure for future references. 338 */ 339 *prevusage = *tu_buf; 340 bcopy(tu, *prevusage, sizeof (task_usage_t)); 341 *tu_buf = NULL; 342 } 343 } 344 345 static void 346 exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu) 347 { 348 timestruc_t ts; 349 proc_t *p; 350 351 ASSERT(MUTEX_HELD(&pidlock)); 352 353 if ((p = tk->tk_memb_list) == NULL) 354 return; 355 356 /* 357 * exacct_snapshot_task_usage() provides an approximate snapshot of the 358 * usage of the potentially many members of the task. Since we don't 359 * guarantee exactness, we don't acquire the p_lock of any of the member 360 * processes. 361 */ 362 do { 363 mutex_enter(&p->p_lock); 364 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 365 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 366 mutex_exit(&p->p_lock); 367 tu->tu_minflt += p->p_ru.minflt; 368 tu->tu_majflt += p->p_ru.majflt; 369 tu->tu_sndmsg += p->p_ru.msgsnd; 370 tu->tu_rcvmsg += p->p_ru.msgrcv; 371 tu->tu_ioch += p->p_ru.ioch; 372 tu->tu_iblk += p->p_ru.inblock; 373 tu->tu_oblk += p->p_ru.oublock; 374 tu->tu_vcsw += p->p_ru.nvcsw; 375 tu->tu_icsw += p->p_ru.nivcsw; 376 tu->tu_nsig += p->p_ru.nsignals; 377 tu->tu_nswp += p->p_ru.nswap; 378 tu->tu_nscl += p->p_ru.sysc; 379 } while ((p = p->p_tasknext) != tk->tk_memb_list); 380 381 /* 382 * The resource usage accounted for so far will include that 383 * contributed by the task's first process. If this process 384 * came from another task, then its accumulated resource usage 385 * will include a contribution from work performed there. 386 * We must therefore subtract any resource usage that was 387 * inherited with the first process. 388 */ 389 exacct_sub_task_mstate(tu, tk->tk_inherited); 390 391 gethrestime(&ts); 392 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 393 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 394 } 395 396 /* 397 * void exacct_update_task_mstate(proc_t *) 398 * 399 * Overview 400 * exacct_update_task_mstate() updates the task usage; it is intended 401 * to be called from proc_exit(). 402 * 403 * Return values 404 * None. 405 * 406 * Caller's context 407 * p_lock must be held at entry. 408 */ 409 void 410 exacct_update_task_mstate(proc_t *p) 411 { 412 task_usage_t *tu; 413 414 mutex_enter(&p->p_task->tk_usage_lock); 415 tu = p->p_task->tk_usage; 416 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 417 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 418 tu->tu_minflt += p->p_ru.minflt; 419 tu->tu_majflt += p->p_ru.majflt; 420 tu->tu_sndmsg += p->p_ru.msgsnd; 421 tu->tu_rcvmsg += p->p_ru.msgrcv; 422 tu->tu_ioch += p->p_ru.ioch; 423 tu->tu_iblk += p->p_ru.inblock; 424 tu->tu_oblk += p->p_ru.oublock; 425 tu->tu_vcsw += p->p_ru.nvcsw; 426 tu->tu_icsw += p->p_ru.nivcsw; 427 tu->tu_nsig += p->p_ru.nsignals; 428 tu->tu_nswp += p->p_ru.nswap; 429 tu->tu_nscl += p->p_ru.sysc; 430 mutex_exit(&p->p_task->tk_usage_lock); 431 } 432 433 static void 434 exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag) 435 { 436 timestruc_t ts; 437 task_usage_t *tu_buf; 438 439 switch (flag) { 440 case EW_PARTIAL: 441 /* 442 * For partial records we must report the sum of current 443 * accounting statistics with previously accumulated 444 * statistics. 445 */ 446 mutex_enter(&pidlock); 447 mutex_enter(&tk->tk_usage_lock); 448 449 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 450 exacct_snapshot_task_usage(tk, tu); 451 452 mutex_exit(&tk->tk_usage_lock); 453 mutex_exit(&pidlock); 454 break; 455 case EW_INTERVAL: 456 /* 457 * We need to allocate spare task_usage_t buffer before 458 * grabbing pidlock because we might need it later in 459 * exacct_get_interval_task_usage(). 460 */ 461 tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 462 mutex_enter(&pidlock); 463 mutex_enter(&tk->tk_usage_lock); 464 465 /* 466 * For interval records, we deduct the previous microstate 467 * accounting data and cpu usage times from previously saved 468 * results and update the previous task usage structure. 469 */ 470 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 471 exacct_snapshot_task_usage(tk, tu); 472 exacct_get_interval_task_usage(tk, tu, &tu_buf); 473 474 mutex_exit(&tk->tk_usage_lock); 475 mutex_exit(&pidlock); 476 477 if (tu_buf != NULL) 478 kmem_free(tu_buf, sizeof (task_usage_t)); 479 break; 480 case EW_FINAL: 481 /* 482 * For final records, we deduct, from the task's current 483 * usage, any usage that was inherited with the arrival 484 * of a process from a previous task. We then record 485 * the task's finish time. 486 */ 487 mutex_enter(&tk->tk_usage_lock); 488 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 489 exacct_sub_task_mstate(tu, tk->tk_inherited); 490 mutex_exit(&tk->tk_usage_lock); 491 492 gethrestime(&ts); 493 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 494 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 495 496 break; 497 } 498 } 499 500 static int 501 exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record, 502 int res) 503 { 504 int attached = 1; 505 506 switch (res) { 507 case AC_TASK_TASKID: 508 (void) ea_attach_item(record, &tk->tk_tkid, 509 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID); 510 break; 511 case AC_TASK_PROJID: 512 (void) ea_attach_item(record, &tk->tk_proj->kpj_id, 513 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID); 514 break; 515 case AC_TASK_CPU: { 516 timestruc_t ts; 517 uint64_t ui; 518 519 hrt2ts(tu->tu_stime, &ts); 520 ui = ts.tv_sec; 521 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 522 EXT_UINT64 | EXD_TASK_CPU_SYS_SEC); 523 ui = ts.tv_nsec; 524 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 525 EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC); 526 527 hrt2ts(tu->tu_utime, &ts); 528 ui = ts.tv_sec; 529 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 530 EXT_UINT64 | EXD_TASK_CPU_USER_SEC); 531 ui = ts.tv_nsec; 532 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 533 EXT_UINT64 | EXD_TASK_CPU_USER_NSEC); 534 } 535 break; 536 case AC_TASK_TIME: 537 (void) ea_attach_item(record, &tu->tu_startsec, 538 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC); 539 (void) ea_attach_item(record, &tu->tu_startnsec, 540 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC); 541 (void) ea_attach_item(record, &tu->tu_finishsec, 542 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC); 543 (void) ea_attach_item(record, &tu->tu_finishnsec, 544 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC); 545 break; 546 case AC_TASK_HOSTNAME: 547 (void) ea_attach_item(record, tk->tk_zone->zone_nodename, 548 strlen(tk->tk_zone->zone_nodename) + 1, 549 EXT_STRING | EXD_TASK_HOSTNAME); 550 break; 551 case AC_TASK_MICROSTATE: 552 (void) ea_attach_item(record, &tu->tu_majflt, 553 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR); 554 (void) ea_attach_item(record, &tu->tu_minflt, 555 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR); 556 (void) ea_attach_item(record, &tu->tu_sndmsg, 557 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND); 558 (void) ea_attach_item(record, &tu->tu_rcvmsg, 559 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV); 560 (void) ea_attach_item(record, &tu->tu_iblk, 561 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN); 562 (void) ea_attach_item(record, &tu->tu_oblk, 563 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT); 564 (void) ea_attach_item(record, &tu->tu_ioch, 565 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR); 566 (void) ea_attach_item(record, &tu->tu_vcsw, 567 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL); 568 (void) ea_attach_item(record, &tu->tu_icsw, 569 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV); 570 (void) ea_attach_item(record, &tu->tu_nsig, 571 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS); 572 (void) ea_attach_item(record, &tu->tu_nswp, 573 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS); 574 (void) ea_attach_item(record, &tu->tu_nscl, 575 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS); 576 break; 577 case AC_TASK_ANCTASKID: 578 (void) ea_attach_item(record, &tu->tu_anctaskid, 579 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID); 580 break; 581 case AC_TASK_ZONENAME: 582 (void) ea_attach_item(record, tk->tk_zone->zone_name, 583 strlen(tk->tk_zone->zone_name) + 1, 584 EXT_STRING | EXD_TASK_ZONENAME); 585 break; 586 default: 587 attached = 0; 588 } 589 return (attached); 590 } 591 592 static ea_object_t * 593 exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask, 594 ea_catalog_t record_type) 595 { 596 int res, count; 597 ea_object_t *record; 598 599 /* 600 * Assemble usage values into group. 601 */ 602 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 603 for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++) 604 if (BT_TEST(mask, res)) 605 count += exacct_attach_task_item(tk, tu, record, res); 606 if (count == 0) { 607 ea_free_object(record, EUP_ALLOC); 608 record = NULL; 609 } 610 return (record); 611 } 612 613 /* 614 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *, 615 * size_t, size_t *), void *, size_t, size_t *, int) 616 * 617 * Overview 618 * exacct_assemble_task_usage() builds the packed exacct buffer for the 619 * indicated task, executes the given callback function, and free the packed 620 * buffer. 621 * 622 * Return values 623 * Returns 0 on success; otherwise the appropriate error code is returned. 624 * 625 * Caller's context 626 * Suitable for KM_SLEEP allocations. 627 */ 628 int 629 exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk, 630 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 631 void *ubuf, size_t ubufsize, size_t *actual, int flag) 632 { 633 ulong_t mask[AC_MASK_SZ]; 634 ea_object_t *task_record; 635 ea_catalog_t record_type; 636 task_usage_t *tu; 637 void *buf; 638 size_t bufsize; 639 int ret; 640 641 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL); 642 643 mutex_enter(&ac_task->ac_lock); 644 if (ac_task->ac_state == AC_OFF) { 645 mutex_exit(&ac_task->ac_lock); 646 return (ENOTACTIVE); 647 } 648 bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ); 649 mutex_exit(&ac_task->ac_lock); 650 651 switch (flag) { 652 case EW_FINAL: 653 record_type = EXD_GROUP_TASK; 654 break; 655 case EW_PARTIAL: 656 record_type = EXD_GROUP_TASK_PARTIAL; 657 break; 658 case EW_INTERVAL: 659 record_type = EXD_GROUP_TASK_INTERVAL; 660 break; 661 } 662 663 /* 664 * Calculate task usage and assemble it into the task record. 665 */ 666 tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 667 exacct_calculate_task_usage(tk, tu, flag); 668 task_record = exacct_assemble_task_record(tk, tu, mask, record_type); 669 if (task_record == NULL) { 670 /* 671 * The current configuration of the accounting system has 672 * resulted in records with no data; accordingly, we don't write 673 * these, but we return success. 674 */ 675 kmem_free(tu, sizeof (task_usage_t)); 676 return (0); 677 } 678 679 /* 680 * Pack object into buffer and run callback on it. 681 */ 682 bufsize = ea_pack_object(task_record, NULL, 0); 683 buf = kmem_alloc(bufsize, KM_SLEEP); 684 (void) ea_pack_object(task_record, buf, bufsize); 685 ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual); 686 687 /* 688 * Free all previously allocated structures. 689 */ 690 kmem_free(buf, bufsize); 691 ea_free_object(task_record, EUP_ALLOC); 692 kmem_free(tu, sizeof (task_usage_t)); 693 return (ret); 694 } 695 696 /* 697 * void exacct_commit_task(void *) 698 * 699 * Overview 700 * exacct_commit_task() calculates the final usage for a task, updating the 701 * task usage if task accounting is active, and writing a task record if task 702 * accounting is active. exacct_commit_task() is intended for being called 703 * from a task queue (taskq_t). 704 * 705 * Return values 706 * None. 707 * 708 * Caller's context 709 * Suitable for KM_SLEEP allocations. 710 */ 711 712 void 713 exacct_commit_task(void *arg) 714 { 715 task_t *tk = (task_t *)arg; 716 size_t size; 717 zone_t *zone = tk->tk_zone; 718 struct exacct_globals *acg; 719 720 ASSERT(tk != task0p); 721 ASSERT(tk->tk_memb_list == NULL); 722 723 /* 724 * Don't do any extra work if the acctctl module isn't loaded. 725 */ 726 if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) { 727 acg = zone_getspecific(exacct_zone_key, zone); 728 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 729 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 730 if (tk->tk_zone != global_zone) { 731 acg = zone_getspecific(exacct_zone_key, global_zone); 732 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 733 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 734 } 735 } 736 /* 737 * Release associated project and finalize task. 738 */ 739 task_end(tk); 740 } 741 742 static int 743 exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res) 744 { 745 int attached = 1; 746 747 switch (res) { 748 case AC_PROC_PID: 749 (void) ea_attach_item(record, &pu->pu_pid, 750 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID); 751 break; 752 case AC_PROC_UID: 753 (void) ea_attach_item(record, &pu->pu_ruid, 754 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID); 755 break; 756 case AC_PROC_FLAG: 757 (void) ea_attach_item(record, &pu->pu_acflag, 758 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS); 759 break; 760 case AC_PROC_GID: 761 (void) ea_attach_item(record, &pu->pu_rgid, 762 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID); 763 break; 764 case AC_PROC_PROJID: 765 (void) ea_attach_item(record, &pu->pu_projid, 766 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID); 767 break; 768 case AC_PROC_TASKID: 769 (void) ea_attach_item(record, &pu->pu_taskid, 770 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID); 771 break; 772 case AC_PROC_CPU: 773 (void) ea_attach_item(record, &pu->pu_utimesec, 774 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC); 775 (void) ea_attach_item(record, &pu->pu_utimensec, 776 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC); 777 (void) ea_attach_item(record, &pu->pu_stimesec, 778 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC); 779 (void) ea_attach_item(record, &pu->pu_stimensec, 780 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC); 781 break; 782 case AC_PROC_TIME: 783 (void) ea_attach_item(record, &pu->pu_startsec, 784 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC); 785 (void) ea_attach_item(record, &pu->pu_startnsec, 786 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC); 787 (void) ea_attach_item(record, &pu->pu_finishsec, 788 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC); 789 (void) ea_attach_item(record, &pu->pu_finishnsec, 790 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC); 791 break; 792 case AC_PROC_COMMAND: 793 (void) ea_attach_item(record, pu->pu_command, 794 strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND); 795 break; 796 case AC_PROC_HOSTNAME: 797 (void) ea_attach_item(record, pu->pu_nodename, 798 strlen(pu->pu_nodename) + 1, 799 EXT_STRING | EXD_PROC_HOSTNAME); 800 break; 801 case AC_PROC_TTY: 802 (void) ea_attach_item(record, &pu->pu_major, 803 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR); 804 (void) ea_attach_item(record, &pu->pu_minor, 805 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR); 806 break; 807 case AC_PROC_MICROSTATE: 808 (void) ea_attach_item(record, &pu->pu_majflt, 809 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR); 810 (void) ea_attach_item(record, &pu->pu_minflt, 811 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR); 812 (void) ea_attach_item(record, &pu->pu_sndmsg, 813 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND); 814 (void) ea_attach_item(record, &pu->pu_rcvmsg, 815 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV); 816 (void) ea_attach_item(record, &pu->pu_iblk, 817 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN); 818 (void) ea_attach_item(record, &pu->pu_oblk, 819 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT); 820 (void) ea_attach_item(record, &pu->pu_ioch, 821 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR); 822 (void) ea_attach_item(record, &pu->pu_vcsw, 823 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL); 824 (void) ea_attach_item(record, &pu->pu_icsw, 825 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV); 826 (void) ea_attach_item(record, &pu->pu_nsig, 827 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS); 828 (void) ea_attach_item(record, &pu->pu_nswp, 829 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS); 830 (void) ea_attach_item(record, &pu->pu_nscl, 831 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS); 832 break; 833 case AC_PROC_ANCPID: 834 (void) ea_attach_item(record, &pu->pu_ancpid, 835 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID); 836 break; 837 case AC_PROC_WAIT_STATUS: 838 (void) ea_attach_item(record, &pu->pu_wstat, 839 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS); 840 break; 841 case AC_PROC_ZONENAME: 842 (void) ea_attach_item(record, pu->pu_zonename, 843 strlen(pu->pu_zonename) + 1, 844 EXT_STRING | EXD_PROC_ZONENAME); 845 break; 846 case AC_PROC_MEM: 847 (void) ea_attach_item(record, &pu->pu_mem_rss_avg, 848 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K); 849 (void) ea_attach_item(record, &pu->pu_mem_rss_max, 850 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K); 851 break; 852 default: 853 attached = 0; 854 } 855 return (attached); 856 } 857 858 static ea_object_t * 859 exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask, 860 ea_catalog_t record_type) 861 { 862 int res, count; 863 ea_object_t *record; 864 865 /* 866 * Assemble usage values into group. 867 */ 868 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 869 for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++) 870 if (BT_TEST(mask, res)) 871 count += exacct_attach_proc_item(pu, record, res); 872 if (count == 0) { 873 ea_free_object(record, EUP_ALLOC); 874 record = NULL; 875 } 876 return (record); 877 } 878 879 /* 880 * The following two routines assume that process's p_lock is held or 881 * exacct_commit_proc has been called from exit() when all lwps are stopped. 882 */ 883 static void 884 exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu) 885 { 886 kthread_t *t; 887 888 ASSERT(MUTEX_HELD(&p->p_lock)); 889 if ((t = p->p_tlist) == NULL) 890 return; 891 892 do { 893 pu->pu_minflt += t->t_lwp->lwp_ru.minflt; 894 pu->pu_majflt += t->t_lwp->lwp_ru.majflt; 895 pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd; 896 pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv; 897 pu->pu_ioch += t->t_lwp->lwp_ru.ioch; 898 pu->pu_iblk += t->t_lwp->lwp_ru.inblock; 899 pu->pu_oblk += t->t_lwp->lwp_ru.oublock; 900 pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw; 901 pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw; 902 pu->pu_nsig += t->t_lwp->lwp_ru.nsignals; 903 pu->pu_nswp += t->t_lwp->lwp_ru.nswap; 904 pu->pu_nscl += t->t_lwp->lwp_ru.sysc; 905 } while ((t = t->t_forw) != p->p_tlist); 906 } 907 908 static void 909 exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu) 910 { 911 pu->pu_minflt = p->p_ru.minflt; 912 pu->pu_majflt = p->p_ru.majflt; 913 pu->pu_sndmsg = p->p_ru.msgsnd; 914 pu->pu_rcvmsg = p->p_ru.msgrcv; 915 pu->pu_ioch = p->p_ru.ioch; 916 pu->pu_iblk = p->p_ru.inblock; 917 pu->pu_oblk = p->p_ru.oublock; 918 pu->pu_vcsw = p->p_ru.nvcsw; 919 pu->pu_icsw = p->p_ru.nivcsw; 920 pu->pu_nsig = p->p_ru.nsignals; 921 pu->pu_nswp = p->p_ru.nswap; 922 pu->pu_nscl = p->p_ru.sysc; 923 } 924 925 void 926 exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask, 927 int flag, int wstat) 928 { 929 timestruc_t ts, ts_run; 930 931 ASSERT(MUTEX_HELD(&p->p_lock)); 932 933 /* 934 * Convert CPU and execution times to sec/nsec format. 935 */ 936 if (BT_TEST(mask, AC_PROC_CPU)) { 937 hrt2ts(mstate_aggr_state(p, LMS_USER), &ts); 938 pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec; 939 pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec; 940 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts); 941 pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec; 942 pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec; 943 } 944 if (BT_TEST(mask, AC_PROC_TIME)) { 945 gethrestime(&ts); 946 pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 947 pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 948 hrt2ts(gethrtime() - p->p_mstart, &ts_run); 949 ts.tv_sec -= ts_run.tv_sec; 950 ts.tv_nsec -= ts_run.tv_nsec; 951 if (ts.tv_nsec < 0) { 952 ts.tv_sec--; 953 if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) { 954 ts.tv_sec++; 955 ts.tv_nsec -= NANOSEC; 956 } 957 } 958 pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec; 959 pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec; 960 } 961 962 pu->pu_pid = p->p_pidp->pid_id; 963 pu->pu_acflag = p->p_user.u_acflag; 964 pu->pu_projid = p->p_task->tk_proj->kpj_id; 965 pu->pu_taskid = p->p_task->tk_tkid; 966 pu->pu_major = getmajor(p->p_sessp->s_dev); 967 pu->pu_minor = getminor(p->p_sessp->s_dev); 968 pu->pu_ancpid = p->p_ancpid; 969 pu->pu_wstat = wstat; 970 /* 971 * Compute average RSS in K. The denominator is the number of 972 * samples: the number of clock ticks plus the initial value. 973 */ 974 pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) * 975 (PAGESIZE / 1024); 976 pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024); 977 978 mutex_enter(&p->p_crlock); 979 pu->pu_ruid = crgetruid(p->p_cred); 980 pu->pu_rgid = crgetrgid(p->p_cred); 981 mutex_exit(&p->p_crlock); 982 983 bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1); 984 bcopy(p->p_zone->zone_name, pu->pu_zonename, 985 strlen(p->p_zone->zone_name) + 1); 986 bcopy(p->p_zone->zone_nodename, pu->pu_nodename, 987 strlen(p->p_zone->zone_nodename) + 1); 988 989 /* 990 * Calculate microstate accounting data for a process that is still 991 * running. Presently, we explicitly collect all of the LWP usage into 992 * the proc usage structure here. 993 */ 994 if (flag & EW_PARTIAL) 995 exacct_calculate_proc_mstate(p, pu); 996 if (flag & EW_FINAL) 997 exacct_copy_proc_mstate(p, pu); 998 } 999 1000 /* 1001 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void 1002 * *, size_t, size_t *), void *, size_t, size_t *) 1003 * 1004 * Overview 1005 * Assemble record with miscellaneous accounting information about the process 1006 * and execute the callback on it. It is the callback's job to set "actual" to 1007 * the size of record. 1008 * 1009 * Return values 1010 * The result of the callback function, unless the extended process accounting 1011 * feature is not active, in which case ENOTACTIVE is returned. 1012 * 1013 * Caller's context 1014 * Suitable for KM_SLEEP allocations. 1015 */ 1016 int 1017 exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu, 1018 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1019 void *ubuf, size_t ubufsize, size_t *actual, int flag) 1020 { 1021 ulong_t mask[AC_MASK_SZ]; 1022 ea_object_t *proc_record; 1023 ea_catalog_t record_type; 1024 void *buf; 1025 size_t bufsize; 1026 int ret; 1027 1028 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL); 1029 1030 mutex_enter(&ac_proc->ac_lock); 1031 if (ac_proc->ac_state == AC_OFF) { 1032 mutex_exit(&ac_proc->ac_lock); 1033 return (ENOTACTIVE); 1034 } 1035 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1036 mutex_exit(&ac_proc->ac_lock); 1037 1038 switch (flag) { 1039 case EW_FINAL: 1040 record_type = EXD_GROUP_PROC; 1041 break; 1042 case EW_PARTIAL: 1043 record_type = EXD_GROUP_PROC_PARTIAL; 1044 break; 1045 } 1046 1047 proc_record = exacct_assemble_proc_record(pu, mask, record_type); 1048 if (proc_record == NULL) 1049 return (0); 1050 1051 /* 1052 * Pack object into buffer and pass to callback. 1053 */ 1054 bufsize = ea_pack_object(proc_record, NULL, 0); 1055 buf = kmem_alloc(bufsize, KM_SLEEP); 1056 (void) ea_pack_object(proc_record, buf, bufsize); 1057 1058 ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual); 1059 1060 /* 1061 * Free all previously allocations. 1062 */ 1063 kmem_free(buf, bufsize); 1064 ea_free_object(proc_record, EUP_ALLOC); 1065 return (ret); 1066 } 1067 1068 /* 1069 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t, 1070 * size_t *) 1071 * 1072 * Overview 1073 * exacct_commit_callback() writes the indicated buffer to the indicated 1074 * extended accounting file. 1075 * 1076 * Return values 1077 * The result of the write operation is returned. "actual" is updated to 1078 * contain the number of bytes actually written. 1079 * 1080 * Caller's context 1081 * Suitable for a vn_rdwr() operation. 1082 */ 1083 /*ARGSUSED*/ 1084 int 1085 exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize, 1086 void *buf, size_t bufsize, size_t *actual) 1087 { 1088 int error = 0; 1089 1090 *actual = 0; 1091 if ((error = exacct_vn_write(info, buf, bufsize)) == 0) 1092 *actual = bufsize; 1093 return (error); 1094 } 1095 1096 static void 1097 exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat) 1098 { 1099 size_t size; 1100 proc_usage_t *pu; 1101 ulong_t mask[AC_MASK_SZ]; 1102 1103 mutex_enter(&ac_proc->ac_lock); 1104 if (ac_proc->ac_state == AC_ON) { 1105 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1106 mutex_exit(&ac_proc->ac_lock); 1107 } else { 1108 mutex_exit(&ac_proc->ac_lock); 1109 return; 1110 } 1111 1112 mutex_enter(&p->p_lock); 1113 size = strlen(p->p_user.u_comm) + 1; 1114 mutex_exit(&p->p_lock); 1115 1116 pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP); 1117 pu->pu_command = kmem_alloc(size, KM_SLEEP); 1118 mutex_enter(&p->p_lock); 1119 exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat); 1120 mutex_exit(&p->p_lock); 1121 1122 (void) exacct_assemble_proc_usage(ac_proc, pu, 1123 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 1124 1125 kmem_free(pu->pu_command, strlen(pu->pu_command) + 1); 1126 kmem_free(pu, sizeof (proc_usage_t)); 1127 } 1128 1129 /* 1130 * void exacct_commit_proc(proc_t *, int) 1131 * 1132 * Overview 1133 * exacct_commit_proc() calculates the final usage for a process, updating the 1134 * task usage if task accounting is active, and writing a process record if 1135 * process accounting is active. exacct_commit_proc() is intended for being 1136 * called from proc_exit(). 1137 * 1138 * Return values 1139 * None. 1140 * 1141 * Caller's context 1142 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry. 1143 */ 1144 void 1145 exacct_commit_proc(proc_t *p, int wstat) 1146 { 1147 zone_t *zone = p->p_zone; 1148 struct exacct_globals *acg, *gacg = NULL; 1149 1150 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1151 /* 1152 * acctctl module not loaded. Nothing to do. 1153 */ 1154 return; 1155 } 1156 acg = zone_getspecific(exacct_zone_key, zone); 1157 exacct_do_commit_proc(&acg->ac_proc, p, wstat); 1158 if (zone != global_zone) { 1159 gacg = zone_getspecific(exacct_zone_key, global_zone); 1160 exacct_do_commit_proc(&gacg->ac_proc, p, wstat); 1161 } 1162 } 1163 1164 static int 1165 exacct_attach_netstat_item(net_stat_t *ns, ea_object_t *record, int res) 1166 { 1167 int attached = 1; 1168 1169 switch (res) { 1170 case AC_NET_NAME: 1171 (void) ea_attach_item(record, ns->ns_name, 1172 strlen(ns->ns_name) + 1, EXT_STRING | EXD_NET_STATS_NAME); 1173 break; 1174 case AC_NET_CURTIME: 1175 { 1176 uint64_t now; 1177 timestruc_t ts; 1178 1179 gethrestime(&ts); 1180 now = (uint64_t)(ulong_t)ts.tv_sec; 1181 (void) ea_attach_item(record, &now, sizeof (uint64_t), 1182 EXT_UINT64 | EXD_NET_STATS_CURTIME); 1183 } 1184 break; 1185 case AC_NET_IBYTES: 1186 (void) ea_attach_item(record, &ns->ns_ibytes, 1187 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IBYTES); 1188 break; 1189 case AC_NET_OBYTES: 1190 (void) ea_attach_item(record, &ns->ns_obytes, 1191 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OBYTES); 1192 break; 1193 case AC_NET_IPKTS: 1194 (void) ea_attach_item(record, &ns->ns_ipackets, 1195 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IPKTS); 1196 break; 1197 case AC_NET_OPKTS: 1198 (void) ea_attach_item(record, &ns->ns_opackets, 1199 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OPKTS); 1200 break; 1201 case AC_NET_IERRPKTS: 1202 (void) ea_attach_item(record, &ns->ns_ierrors, 1203 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IERRPKTS); 1204 break; 1205 case AC_NET_OERRPKTS: 1206 (void) ea_attach_item(record, &ns->ns_oerrors, 1207 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OERRPKTS); 1208 break; 1209 default: 1210 attached = 0; 1211 } 1212 return (attached); 1213 } 1214 1215 static int 1216 exacct_attach_netdesc_item(net_desc_t *nd, ea_object_t *record, int res) 1217 { 1218 int attached = 1; 1219 1220 switch (res) { 1221 case AC_NET_NAME: 1222 (void) ea_attach_item(record, nd->nd_name, 1223 strlen(nd->nd_name) + 1, EXT_STRING | EXD_NET_DESC_NAME); 1224 break; 1225 case AC_NET_DEVNAME: 1226 (void) ea_attach_item(record, nd->nd_devname, 1227 strlen(nd->nd_devname) + 1, EXT_STRING | 1228 EXD_NET_DESC_DEVNAME); 1229 break; 1230 case AC_NET_EHOST: 1231 (void) ea_attach_item(record, &nd->nd_ehost, 1232 sizeof (nd->nd_ehost), EXT_RAW | EXD_NET_DESC_EHOST); 1233 break; 1234 case AC_NET_EDEST: 1235 (void) ea_attach_item(record, &nd->nd_edest, 1236 sizeof (nd->nd_edest), EXT_RAW | EXD_NET_DESC_EDEST); 1237 break; 1238 case AC_NET_VLAN_TPID: 1239 (void) ea_attach_item(record, &nd->nd_vlan_tpid, 1240 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TPID); 1241 break; 1242 case AC_NET_VLAN_TCI: 1243 (void) ea_attach_item(record, &nd->nd_vlan_tci, 1244 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TCI); 1245 break; 1246 case AC_NET_SAP: 1247 (void) ea_attach_item(record, &nd->nd_sap, 1248 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_SAP); 1249 break; 1250 case AC_NET_PRIORITY: 1251 (void) ea_attach_item(record, &nd->nd_priority, 1252 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_PRIORITY); 1253 break; 1254 case AC_NET_BWLIMIT: 1255 (void) ea_attach_item(record, &nd->nd_bw_limit, 1256 sizeof (uint64_t), EXT_UINT64 | EXD_NET_DESC_BWLIMIT); 1257 break; 1258 case AC_NET_SADDR: 1259 if (nd->nd_isv4) { 1260 (void) ea_attach_item(record, &nd->nd_saddr[3], 1261 sizeof (uint32_t), EXT_UINT32 | 1262 EXD_NET_DESC_V4SADDR); 1263 } else { 1264 (void) ea_attach_item(record, &nd->nd_saddr, 1265 sizeof (nd->nd_saddr), EXT_RAW | 1266 EXD_NET_DESC_V6SADDR); 1267 } 1268 break; 1269 case AC_NET_DADDR: 1270 if (nd->nd_isv4) { 1271 (void) ea_attach_item(record, &nd->nd_daddr[3], 1272 sizeof (uint32_t), EXT_UINT32 | 1273 EXD_NET_DESC_V4DADDR); 1274 } else { 1275 (void) ea_attach_item(record, &nd->nd_daddr, 1276 sizeof (nd->nd_daddr), EXT_RAW | 1277 EXD_NET_DESC_V6DADDR); 1278 } 1279 break; 1280 case AC_NET_SPORT: 1281 (void) ea_attach_item(record, &nd->nd_sport, 1282 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_SPORT); 1283 break; 1284 case AC_NET_DPORT: 1285 (void) ea_attach_item(record, &nd->nd_dport, 1286 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_DPORT); 1287 break; 1288 case AC_NET_PROTOCOL: 1289 (void) ea_attach_item(record, &nd->nd_protocol, 1290 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_PROTOCOL); 1291 break; 1292 case AC_NET_DSFIELD: 1293 (void) ea_attach_item(record, &nd->nd_dsfield, 1294 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_DSFIELD); 1295 break; 1296 default: 1297 attached = 0; 1298 } 1299 return (attached); 1300 } 1301 1302 static ea_object_t * 1303 exacct_assemble_net_record(void *ninfo, ulong_t *mask, ea_catalog_t record_type, 1304 int what) 1305 { 1306 int res; 1307 int count; 1308 ea_object_t *record; 1309 1310 /* 1311 * Assemble usage values into group. 1312 */ 1313 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1314 for (res = 1, count = 0; res <= AC_NET_MAX_RES; res++) 1315 if (BT_TEST(mask, res)) { 1316 if (what == EX_NET_LNDESC_REC || 1317 what == EX_NET_FLDESC_REC) { 1318 count += exacct_attach_netdesc_item( 1319 (net_desc_t *)ninfo, record, res); 1320 } else { 1321 count += exacct_attach_netstat_item( 1322 (net_stat_t *)ninfo, record, res); 1323 } 1324 } 1325 if (count == 0) { 1326 ea_free_object(record, EUP_ALLOC); 1327 record = NULL; 1328 } 1329 return (record); 1330 } 1331 1332 int 1333 exacct_assemble_net_usage(ac_info_t *ac_net, void *ninfo, 1334 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1335 void *ubuf, size_t ubufsize, size_t *actual, int what) 1336 { 1337 ulong_t mask[AC_MASK_SZ]; 1338 ea_object_t *net_desc; 1339 ea_catalog_t record_type; 1340 void *buf; 1341 size_t bufsize; 1342 int ret; 1343 1344 mutex_enter(&ac_net->ac_lock); 1345 if (ac_net->ac_state == AC_OFF) { 1346 mutex_exit(&ac_net->ac_lock); 1347 return (ENOTACTIVE); 1348 } 1349 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1350 mutex_exit(&ac_net->ac_lock); 1351 1352 switch (what) { 1353 case EX_NET_LNDESC_REC: 1354 record_type = EXD_GROUP_NET_LINK_DESC; 1355 break; 1356 case EX_NET_LNSTAT_REC: 1357 record_type = EXD_GROUP_NET_LINK_STATS; 1358 break; 1359 case EX_NET_FLDESC_REC: 1360 record_type = EXD_GROUP_NET_FLOW_DESC; 1361 break; 1362 case EX_NET_FLSTAT_REC: 1363 record_type = EXD_GROUP_NET_FLOW_STATS; 1364 break; 1365 } 1366 1367 net_desc = exacct_assemble_net_record(ninfo, mask, record_type, what); 1368 if (net_desc == NULL) 1369 return (0); 1370 1371 /* 1372 * Pack object into buffer and pass to callback. 1373 */ 1374 bufsize = ea_pack_object(net_desc, NULL, 0); 1375 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1376 if (buf == NULL) 1377 return (ENOMEM); 1378 1379 (void) ea_pack_object(net_desc, buf, bufsize); 1380 1381 ret = callback(ac_net, ubuf, ubufsize, buf, bufsize, actual); 1382 1383 /* 1384 * Free all previously allocations. 1385 */ 1386 kmem_free(buf, bufsize); 1387 ea_free_object(net_desc, EUP_ALLOC); 1388 return (ret); 1389 } 1390 1391 int 1392 exacct_commit_netinfo(void *arg, int what) 1393 { 1394 size_t size; 1395 ulong_t mask[AC_MASK_SZ]; 1396 struct exacct_globals *acg; 1397 ac_info_t *ac_net; 1398 1399 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1400 /* 1401 * acctctl module not loaded. Nothing to do. 1402 */ 1403 return (ENOTACTIVE); 1404 } 1405 1406 /* 1407 * Even though each zone nominally has its own flow accounting settings 1408 * (ac_flow), these are only maintained by and for the global zone. 1409 * 1410 * If this were to change in the future, this function should grow a 1411 * second zoneid (or zone) argument, and use the corresponding zone's 1412 * settings rather than always using those of the global zone. 1413 */ 1414 acg = zone_getspecific(exacct_zone_key, global_zone); 1415 ac_net = &acg->ac_net; 1416 1417 mutex_enter(&ac_net->ac_lock); 1418 if (ac_net->ac_state == AC_OFF) { 1419 mutex_exit(&ac_net->ac_lock); 1420 return (ENOTACTIVE); 1421 } 1422 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1423 mutex_exit(&ac_net->ac_lock); 1424 1425 return (exacct_assemble_net_usage(ac_net, arg, exacct_commit_callback, 1426 NULL, 0, &size, what)); 1427 } 1428 1429 static int 1430 exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res) 1431 { 1432 int attached = 1; 1433 1434 switch (res) { 1435 case AC_FLOW_SADDR: 1436 if (fu->fu_isv4) { 1437 (void) ea_attach_item(record, &fu->fu_saddr[3], 1438 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR); 1439 } else { 1440 (void) ea_attach_item(record, &fu->fu_saddr, 1441 sizeof (fu->fu_saddr), EXT_RAW | 1442 EXD_FLOW_V6SADDR); 1443 } 1444 break; 1445 case AC_FLOW_DADDR: 1446 if (fu->fu_isv4) { 1447 (void) ea_attach_item(record, &fu->fu_daddr[3], 1448 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR); 1449 } else { 1450 (void) ea_attach_item(record, &fu->fu_daddr, 1451 sizeof (fu->fu_daddr), EXT_RAW | 1452 EXD_FLOW_V6DADDR); 1453 } 1454 break; 1455 case AC_FLOW_SPORT: 1456 (void) ea_attach_item(record, &fu->fu_sport, 1457 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT); 1458 break; 1459 case AC_FLOW_DPORT: 1460 (void) ea_attach_item(record, &fu->fu_dport, 1461 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT); 1462 break; 1463 case AC_FLOW_PROTOCOL: 1464 (void) ea_attach_item(record, &fu->fu_protocol, 1465 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL); 1466 break; 1467 case AC_FLOW_DSFIELD: 1468 (void) ea_attach_item(record, &fu->fu_dsfield, 1469 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD); 1470 break; 1471 case AC_FLOW_CTIME: 1472 (void) ea_attach_item(record, &fu->fu_ctime, 1473 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME); 1474 break; 1475 case AC_FLOW_LSEEN: 1476 (void) ea_attach_item(record, &fu->fu_lseen, 1477 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN); 1478 break; 1479 case AC_FLOW_NBYTES: 1480 (void) ea_attach_item(record, &fu->fu_nbytes, 1481 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES); 1482 break; 1483 case AC_FLOW_NPKTS: 1484 (void) ea_attach_item(record, &fu->fu_npackets, 1485 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS); 1486 break; 1487 case AC_FLOW_PROJID: 1488 if (fu->fu_projid >= 0) { 1489 (void) ea_attach_item(record, &fu->fu_projid, 1490 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID); 1491 } 1492 break; 1493 case AC_FLOW_UID: 1494 if (fu->fu_userid >= 0) { 1495 (void) ea_attach_item(record, &fu->fu_userid, 1496 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID); 1497 } 1498 break; 1499 case AC_FLOW_ANAME: 1500 (void) ea_attach_item(record, fu->fu_aname, 1501 strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME); 1502 break; 1503 default: 1504 attached = 0; 1505 } 1506 return (attached); 1507 } 1508 1509 static ea_object_t * 1510 exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask, 1511 ea_catalog_t record_type) 1512 { 1513 int res, count; 1514 ea_object_t *record; 1515 1516 /* 1517 * Assemble usage values into group. 1518 */ 1519 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1520 for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++) 1521 if (BT_TEST(mask, res)) 1522 count += exacct_attach_flow_item(fu, record, res); 1523 if (count == 0) { 1524 ea_free_object(record, EUP_ALLOC); 1525 record = NULL; 1526 } 1527 return (record); 1528 } 1529 1530 int 1531 exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu, 1532 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1533 void *ubuf, size_t ubufsize, size_t *actual) 1534 { 1535 ulong_t mask[AC_MASK_SZ]; 1536 ea_object_t *flow_usage; 1537 ea_catalog_t record_type; 1538 void *buf; 1539 size_t bufsize; 1540 int ret; 1541 1542 mutex_enter(&ac_flow->ac_lock); 1543 if (ac_flow->ac_state == AC_OFF) { 1544 mutex_exit(&ac_flow->ac_lock); 1545 return (ENOTACTIVE); 1546 } 1547 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1548 mutex_exit(&ac_flow->ac_lock); 1549 1550 record_type = EXD_GROUP_FLOW; 1551 1552 flow_usage = exacct_assemble_flow_record(fu, mask, record_type); 1553 if (flow_usage == NULL) { 1554 return (0); 1555 } 1556 1557 /* 1558 * Pack object into buffer and pass to callback. 1559 */ 1560 bufsize = ea_pack_object(flow_usage, NULL, 0); 1561 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1562 if (buf == NULL) { 1563 return (ENOMEM); 1564 } 1565 1566 (void) ea_pack_object(flow_usage, buf, bufsize); 1567 1568 ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual); 1569 1570 /* 1571 * Free all previously allocations. 1572 */ 1573 kmem_free(buf, bufsize); 1574 ea_free_object(flow_usage, EUP_ALLOC); 1575 return (ret); 1576 } 1577 1578 void 1579 exacct_commit_flow(void *arg) 1580 { 1581 flow_usage_t *f = (flow_usage_t *)arg; 1582 size_t size; 1583 ulong_t mask[AC_MASK_SZ]; 1584 struct exacct_globals *acg; 1585 ac_info_t *ac_flow; 1586 1587 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1588 /* 1589 * acctctl module not loaded. Nothing to do. 1590 */ 1591 return; 1592 } 1593 1594 /* 1595 * Even though each zone nominally has its own flow accounting settings 1596 * (ac_flow), these are only maintained by and for the global zone. 1597 * 1598 * If this were to change in the future, this function should grow a 1599 * second zoneid (or zone) argument, and use the corresponding zone's 1600 * settings rather than always using those of the global zone. 1601 */ 1602 acg = zone_getspecific(exacct_zone_key, global_zone); 1603 ac_flow = &acg->ac_flow; 1604 1605 mutex_enter(&ac_flow->ac_lock); 1606 if (ac_flow->ac_state == AC_OFF) { 1607 mutex_exit(&ac_flow->ac_lock); 1608 return; 1609 } 1610 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1611 mutex_exit(&ac_flow->ac_lock); 1612 1613 (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback, 1614 NULL, 0, &size); 1615 } 1616 1617 /* 1618 * int exacct_tag_task(task_t *, void *, size_t, int) 1619 * 1620 * Overview 1621 * exacct_tag_task() provides the exacct record construction and writing 1622 * support required by putacct(2) for task entities. 1623 * 1624 * Return values 1625 * The result of the write operation is returned, unless the extended 1626 * accounting facility is not active, in which case ENOTACTIVE is returned. 1627 * 1628 * Caller's context 1629 * Suitable for KM_SLEEP allocations. 1630 */ 1631 int 1632 exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz, 1633 int flags) 1634 { 1635 int error = 0; 1636 void *buf; 1637 size_t bufsize; 1638 ea_catalog_t cat; 1639 ea_object_t *tag; 1640 1641 mutex_enter(&ac_task->ac_lock); 1642 if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) { 1643 mutex_exit(&ac_task->ac_lock); 1644 return (ENOTACTIVE); 1645 } 1646 mutex_exit(&ac_task->ac_lock); 1647 1648 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG); 1649 (void) ea_attach_item(tag, &tk->tk_tkid, 0, 1650 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1651 (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0, 1652 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1653 if (flags == EP_RAW) 1654 cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG; 1655 else 1656 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG; 1657 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1658 1659 bufsize = ea_pack_object(tag, NULL, 0); 1660 buf = kmem_alloc(bufsize, KM_SLEEP); 1661 (void) ea_pack_object(tag, buf, bufsize); 1662 error = exacct_vn_write(ac_task, buf, bufsize); 1663 kmem_free(buf, bufsize); 1664 ea_free_object(tag, EUP_ALLOC); 1665 return (error); 1666 } 1667 1668 /* 1669 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *) 1670 * 1671 * Overview 1672 * exacct_tag_proc() provides the exacct record construction and writing 1673 * support required by putacct(2) for processes. 1674 * 1675 * Return values 1676 * The result of the write operation is returned, unless the extended 1677 * accounting facility is not active, in which case ENOTACTIVE is returned. 1678 * 1679 * Caller's context 1680 * Suitable for KM_SLEEP allocations. 1681 */ 1682 int 1683 exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf, 1684 size_t ubufsz, int flags, const char *hostname) 1685 { 1686 int error = 0; 1687 void *buf; 1688 size_t bufsize; 1689 ea_catalog_t cat; 1690 ea_object_t *tag; 1691 1692 mutex_enter(&ac_proc->ac_lock); 1693 if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) { 1694 mutex_exit(&ac_proc->ac_lock); 1695 return (ENOTACTIVE); 1696 } 1697 mutex_exit(&ac_proc->ac_lock); 1698 1699 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG); 1700 (void) ea_attach_item(tag, &pid, sizeof (uint32_t), 1701 EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID); 1702 (void) ea_attach_item(tag, &tkid, 0, 1703 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1704 (void) ea_attach_item(tag, (void *)hostname, 0, 1705 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1706 if (flags == EP_RAW) 1707 cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG; 1708 else 1709 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG; 1710 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1711 1712 bufsize = ea_pack_object(tag, NULL, 0); 1713 buf = kmem_alloc(bufsize, KM_SLEEP); 1714 (void) ea_pack_object(tag, buf, bufsize); 1715 error = exacct_vn_write(ac_proc, buf, bufsize); 1716 kmem_free(buf, bufsize); 1717 ea_free_object(tag, EUP_ALLOC); 1718 return (error); 1719 } 1720 1721 /* 1722 * void exacct_init(void) 1723 * 1724 * Overview 1725 * Initialized the extended accounting subsystem. 1726 * 1727 * Return values 1728 * None. 1729 * 1730 * Caller's context 1731 * Suitable for KM_SLEEP allocations. 1732 */ 1733 void 1734 exacct_init() 1735 { 1736 exacct_queue = system_taskq; 1737 exacct_object_cache = kmem_cache_create("exacct_object_cache", 1738 sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1739 } 1740 1741 /* 1742 * exacct_snapshot_proc_mstate() copies a process's microstate accounting data 1743 * and resource usage counters into a given task_usage_t. It differs from 1744 * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t, 1745 * b) p_lock will have been acquired earlier in the call path and c) we 1746 * are here including the process's user and system times. 1747 */ 1748 static void 1749 exacct_snapshot_proc_mstate(proc_t *p, task_usage_t *tu) 1750 { 1751 tu->tu_utime = mstate_aggr_state(p, LMS_USER); 1752 tu->tu_stime = mstate_aggr_state(p, LMS_SYSTEM); 1753 tu->tu_minflt = p->p_ru.minflt; 1754 tu->tu_majflt = p->p_ru.majflt; 1755 tu->tu_sndmsg = p->p_ru.msgsnd; 1756 tu->tu_rcvmsg = p->p_ru.msgrcv; 1757 tu->tu_ioch = p->p_ru.ioch; 1758 tu->tu_iblk = p->p_ru.inblock; 1759 tu->tu_oblk = p->p_ru.oublock; 1760 tu->tu_vcsw = p->p_ru.nvcsw; 1761 tu->tu_icsw = p->p_ru.nivcsw; 1762 tu->tu_nsig = p->p_ru.nsignals; 1763 tu->tu_nswp = p->p_ru.nswap; 1764 tu->tu_nscl = p->p_ru.sysc; 1765 } 1766 1767 /* 1768 * void exacct_move_mstate(proc_t *, task_t *, task_t *) 1769 * 1770 * Overview 1771 * exacct_move_mstate() is called by task_change() and accounts for 1772 * a process's resource usage when it is moved from one task to another. 1773 * 1774 * The process's usage at this point is recorded in the new task so 1775 * that it can be excluded from the calculation of resources consumed 1776 * by that task. 1777 * 1778 * The resource usage inherited by the new task is also added to the 1779 * aggregate maintained by the old task for processes that have exited. 1780 * 1781 * Return values 1782 * None. 1783 * 1784 * Caller's context 1785 * pidlock and p_lock held across exacct_move_mstate(). 1786 */ 1787 void 1788 exacct_move_mstate(proc_t *p, task_t *oldtk, task_t *newtk) 1789 { 1790 task_usage_t tu; 1791 1792 /* Take a snapshot of this process's mstate and RU counters */ 1793 exacct_snapshot_proc_mstate(p, &tu); 1794 1795 /* 1796 * Use the snapshot to increment the aggregate usage of the old 1797 * task, and the inherited usage of the new one. 1798 */ 1799 mutex_enter(&oldtk->tk_usage_lock); 1800 exacct_add_task_mstate(oldtk->tk_usage, &tu); 1801 mutex_exit(&oldtk->tk_usage_lock); 1802 mutex_enter(&newtk->tk_usage_lock); 1803 exacct_add_task_mstate(newtk->tk_inherited, &tu); 1804 mutex_exit(&newtk->tk_usage_lock); 1805 } 1806