1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/exacct.h> 27 #include <sys/exacct_catalog.h> 28 #include <sys/disp.h> 29 #include <sys/task.h> 30 #include <sys/proc.h> 31 #include <sys/cmn_err.h> 32 #include <sys/kmem.h> 33 #include <sys/project.h> 34 #include <sys/systm.h> 35 #include <sys/vnode.h> 36 #include <sys/file.h> 37 #include <sys/acctctl.h> 38 #include <sys/time.h> 39 #include <sys/utsname.h> 40 #include <sys/session.h> 41 #include <sys/sysmacros.h> 42 #include <sys/bitmap.h> 43 #include <sys/msacct.h> 44 45 /* 46 * exacct usage and recording routines 47 * 48 * wracct(2), getacct(2), and the records written at process or task 49 * termination are constructed using the exacct_assemble_[task,proc]_usage() 50 * functions, which take a callback that takes the appropriate action on 51 * the packed exacct record for the task or process. For the process-related 52 * actions, we partition the routines such that the data collecting component 53 * can be performed while holding p_lock, and all sleeping or blocking 54 * operations can be performed without acquiring p_lock. 55 * 56 * putacct(2), which allows an application to construct a customized record 57 * associated with an existing process or task, has its own entry points: 58 * exacct_tag_task() and exacct_tag_proc(). 59 */ 60 61 taskq_t *exacct_queue; 62 kmem_cache_t *exacct_object_cache; 63 64 zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED; 65 66 static const uint32_t exacct_version = EXACCT_VERSION; 67 static const char exacct_header[] = "exacct"; 68 static const char exacct_creator[] = "SunOS"; 69 70 ea_object_t * 71 ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz) 72 { 73 ea_object_t *item; 74 75 item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 76 bzero(item, sizeof (ea_object_t)); 77 (void) ea_set_item(item, catalog, buf, bufsz); 78 return (item); 79 } 80 81 ea_object_t * 82 ea_alloc_group(ea_catalog_t catalog) 83 { 84 ea_object_t *group; 85 86 group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 87 bzero(group, sizeof (ea_object_t)); 88 (void) ea_set_group(group, catalog); 89 return (group); 90 } 91 92 ea_object_t * 93 ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog) 94 { 95 ea_object_t *item; 96 97 item = ea_alloc_item(catalog, buf, bufsz); 98 (void) ea_attach_to_group(grp, item); 99 return (item); 100 } 101 102 /* 103 * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract 104 * microstate accounting data and resource usage counters from one task_usage_t 105 * from those supplied in another. These functions do not operate on *all* 106 * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make 107 * sense. 108 */ 109 static void 110 exacct_add_task_mstate(task_usage_t *tu, task_usage_t *delta) 111 { 112 tu->tu_utime += delta->tu_utime; 113 tu->tu_stime += delta->tu_stime; 114 tu->tu_minflt += delta->tu_minflt; 115 tu->tu_majflt += delta->tu_majflt; 116 tu->tu_sndmsg += delta->tu_sndmsg; 117 tu->tu_rcvmsg += delta->tu_rcvmsg; 118 tu->tu_ioch += delta->tu_ioch; 119 tu->tu_iblk += delta->tu_iblk; 120 tu->tu_oblk += delta->tu_oblk; 121 tu->tu_vcsw += delta->tu_vcsw; 122 tu->tu_icsw += delta->tu_icsw; 123 tu->tu_nsig += delta->tu_nsig; 124 tu->tu_nswp += delta->tu_nswp; 125 tu->tu_nscl += delta->tu_nscl; 126 } 127 128 /* 129 * See the comments for exacct_add_task_mstate(), above. 130 */ 131 static void 132 exacct_sub_task_mstate(task_usage_t *tu, task_usage_t *delta) 133 { 134 tu->tu_utime -= delta->tu_utime; 135 tu->tu_stime -= delta->tu_stime; 136 tu->tu_minflt -= delta->tu_minflt; 137 tu->tu_majflt -= delta->tu_majflt; 138 tu->tu_sndmsg -= delta->tu_sndmsg; 139 tu->tu_rcvmsg -= delta->tu_rcvmsg; 140 tu->tu_ioch -= delta->tu_ioch; 141 tu->tu_iblk -= delta->tu_iblk; 142 tu->tu_oblk -= delta->tu_oblk; 143 tu->tu_vcsw -= delta->tu_vcsw; 144 tu->tu_icsw -= delta->tu_icsw; 145 tu->tu_nsig -= delta->tu_nsig; 146 tu->tu_nswp -= delta->tu_nswp; 147 tu->tu_nscl -= delta->tu_nscl; 148 } 149 150 /* 151 * Wrapper for vn_rdwr() used by exacct_vn_write() and exacct_write_header() 152 * to write to the accounting file without corrupting it in case of an I/O or 153 * filesystem error. 154 */ 155 static int 156 exacct_vn_write_impl(ac_info_t *info, void *buf, ssize_t bufsize) 157 { 158 int error; 159 ssize_t resid; 160 struct vattr va; 161 162 ASSERT(info != NULL); 163 ASSERT(info->ac_vnode != NULL); 164 ASSERT(MUTEX_HELD(&info->ac_lock)); 165 166 /* 167 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting 168 * the present accounting file. 169 */ 170 va.va_mask = AT_SIZE; 171 error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred, NULL); 172 if (error == 0) { 173 error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf, 174 bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T, 175 kcred, &resid); 176 if (error) { 177 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 178 } else if (resid != 0) { 179 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 180 error = ENOSPC; 181 } 182 } 183 return (error); 184 } 185 186 /* 187 * exacct_vn_write() safely writes to an accounting file. acctctl() prevents 188 * the two accounting vnodes from being equal, and the appropriate ac_lock is 189 * held across the call, so we're single threaded through this code for each 190 * file. 191 */ 192 static int 193 exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize) 194 { 195 int error; 196 197 if (info == NULL) 198 return (0); 199 200 mutex_enter(&info->ac_lock); 201 202 /* 203 * Don't do anything unless accounting file is set. 204 */ 205 if (info->ac_vnode == NULL) { 206 mutex_exit(&info->ac_lock); 207 return (0); 208 } 209 error = exacct_vn_write_impl(info, buf, bufsize); 210 mutex_exit(&info->ac_lock); 211 212 return (error); 213 } 214 215 /* 216 * void *exacct_create_header(size_t *) 217 * 218 * Overview 219 * exacct_create_header() constructs an exacct file header identifying the 220 * accounting file as the output of the kernel. exacct_create_header() and 221 * the static write_header() and verify_header() routines in libexacct must 222 * remain synchronized. 223 * 224 * Return values 225 * A pointer to a packed exacct buffer containing the appropriate header is 226 * returned; the size of the buffer is placed in the location indicated by 227 * sizep. 228 * 229 * Caller's context 230 * Suitable for KM_SLEEP allocations. 231 */ 232 void * 233 exacct_create_header(size_t *sizep) 234 { 235 ea_object_t *hdr_grp; 236 uint32_t bskip; 237 void *buf; 238 size_t bufsize; 239 240 hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER); 241 (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0, 242 EXT_UINT32 | EXC_DEFAULT | EXD_VERSION); 243 (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0, 244 EXT_STRING | EXC_DEFAULT | EXD_FILETYPE); 245 (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0, 246 EXT_STRING | EXC_DEFAULT | EXD_CREATOR); 247 (void) ea_attach_item(hdr_grp, uts_nodename(), 0, 248 EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME); 249 250 bufsize = ea_pack_object(hdr_grp, NULL, 0); 251 buf = kmem_alloc(bufsize, KM_SLEEP); 252 (void) ea_pack_object(hdr_grp, buf, bufsize); 253 ea_free_object(hdr_grp, EUP_ALLOC); 254 255 /* 256 * To prevent reading the header when reading the file backwards, 257 * set the large backskip of the header group to 0 (last 4 bytes). 258 */ 259 bskip = 0; 260 exacct_order32(&bskip); 261 bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip), 262 sizeof (bskip)); 263 264 *sizep = bufsize; 265 return (buf); 266 } 267 268 /* 269 * int exacct_write_header(ac_info_t *, void *, size_t) 270 * 271 * Overview 272 * exacct_write_header() writes the given header buffer to the indicated 273 * vnode. 274 * 275 * Return values 276 * The result of the write operation is returned. 277 * 278 * Caller's context 279 * Caller must hold the ac_lock of the appropriate accounting file 280 * information block (ac_info_t). 281 */ 282 int 283 exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize) 284 { 285 if (info != NULL && info->ac_vnode != NULL) 286 return (exacct_vn_write_impl(info, hdr, hdrsize)); 287 288 return (0); 289 } 290 291 static void 292 exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu, 293 task_usage_t **tu_buf) 294 { 295 task_usage_t *oldtu, *newtu; 296 task_usage_t **prevusage; 297 298 ASSERT(MUTEX_HELD(&tk->tk_usage_lock)); 299 if (getzoneid() != GLOBAL_ZONEID) { 300 prevusage = &tk->tk_zoneusage; 301 } else { 302 prevusage = &tk->tk_prevusage; 303 } 304 if ((oldtu = *prevusage) != NULL) { 305 /* 306 * In case we have any accounting information 307 * saved from the previous interval record. 308 */ 309 newtu = *tu_buf; 310 bcopy(tu, newtu, sizeof (task_usage_t)); 311 tu->tu_minflt -= oldtu->tu_minflt; 312 tu->tu_majflt -= oldtu->tu_majflt; 313 tu->tu_sndmsg -= oldtu->tu_sndmsg; 314 tu->tu_rcvmsg -= oldtu->tu_rcvmsg; 315 tu->tu_ioch -= oldtu->tu_ioch; 316 tu->tu_iblk -= oldtu->tu_iblk; 317 tu->tu_oblk -= oldtu->tu_oblk; 318 tu->tu_vcsw -= oldtu->tu_vcsw; 319 tu->tu_icsw -= oldtu->tu_icsw; 320 tu->tu_nsig -= oldtu->tu_nsig; 321 tu->tu_nswp -= oldtu->tu_nswp; 322 tu->tu_nscl -= oldtu->tu_nscl; 323 tu->tu_utime -= oldtu->tu_utime; 324 tu->tu_stime -= oldtu->tu_stime; 325 326 tu->tu_startsec = oldtu->tu_finishsec; 327 tu->tu_startnsec = oldtu->tu_finishnsec; 328 /* 329 * Copy the data from our temporary storage to the task's 330 * previous interval usage structure for future reference. 331 */ 332 bcopy(newtu, oldtu, sizeof (task_usage_t)); 333 } else { 334 /* 335 * Store current statistics in the task's previous interval 336 * usage structure for future references. 337 */ 338 *prevusage = *tu_buf; 339 bcopy(tu, *prevusage, sizeof (task_usage_t)); 340 *tu_buf = NULL; 341 } 342 } 343 344 static void 345 exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu) 346 { 347 timestruc_t ts; 348 proc_t *p; 349 350 ASSERT(MUTEX_HELD(&pidlock)); 351 352 if ((p = tk->tk_memb_list) == NULL) 353 return; 354 355 /* 356 * exacct_snapshot_task_usage() provides an approximate snapshot of the 357 * usage of the potentially many members of the task. Since we don't 358 * guarantee exactness, we don't acquire the p_lock of any of the member 359 * processes. 360 */ 361 do { 362 mutex_enter(&p->p_lock); 363 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 364 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 365 mutex_exit(&p->p_lock); 366 tu->tu_minflt += p->p_ru.minflt; 367 tu->tu_majflt += p->p_ru.majflt; 368 tu->tu_sndmsg += p->p_ru.msgsnd; 369 tu->tu_rcvmsg += p->p_ru.msgrcv; 370 tu->tu_ioch += p->p_ru.ioch; 371 tu->tu_iblk += p->p_ru.inblock; 372 tu->tu_oblk += p->p_ru.oublock; 373 tu->tu_vcsw += p->p_ru.nvcsw; 374 tu->tu_icsw += p->p_ru.nivcsw; 375 tu->tu_nsig += p->p_ru.nsignals; 376 tu->tu_nswp += p->p_ru.nswap; 377 tu->tu_nscl += p->p_ru.sysc; 378 } while ((p = p->p_tasknext) != tk->tk_memb_list); 379 380 /* 381 * The resource usage accounted for so far will include that 382 * contributed by the task's first process. If this process 383 * came from another task, then its accumulated resource usage 384 * will include a contribution from work performed there. 385 * We must therefore subtract any resource usage that was 386 * inherited with the first process. 387 */ 388 exacct_sub_task_mstate(tu, tk->tk_inherited); 389 390 gethrestime(&ts); 391 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 392 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 393 } 394 395 /* 396 * void exacct_update_task_mstate(proc_t *) 397 * 398 * Overview 399 * exacct_update_task_mstate() updates the task usage; it is intended 400 * to be called from proc_exit(). 401 * 402 * Return values 403 * None. 404 * 405 * Caller's context 406 * p_lock must be held at entry. 407 */ 408 void 409 exacct_update_task_mstate(proc_t *p) 410 { 411 task_usage_t *tu; 412 413 mutex_enter(&p->p_task->tk_usage_lock); 414 tu = p->p_task->tk_usage; 415 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 416 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 417 tu->tu_minflt += p->p_ru.minflt; 418 tu->tu_majflt += p->p_ru.majflt; 419 tu->tu_sndmsg += p->p_ru.msgsnd; 420 tu->tu_rcvmsg += p->p_ru.msgrcv; 421 tu->tu_ioch += p->p_ru.ioch; 422 tu->tu_iblk += p->p_ru.inblock; 423 tu->tu_oblk += p->p_ru.oublock; 424 tu->tu_vcsw += p->p_ru.nvcsw; 425 tu->tu_icsw += p->p_ru.nivcsw; 426 tu->tu_nsig += p->p_ru.nsignals; 427 tu->tu_nswp += p->p_ru.nswap; 428 tu->tu_nscl += p->p_ru.sysc; 429 mutex_exit(&p->p_task->tk_usage_lock); 430 } 431 432 static void 433 exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag) 434 { 435 timestruc_t ts; 436 task_usage_t *tu_buf; 437 438 switch (flag) { 439 case EW_PARTIAL: 440 /* 441 * For partial records we must report the sum of current 442 * accounting statistics with previously accumulated 443 * statistics. 444 */ 445 mutex_enter(&pidlock); 446 mutex_enter(&tk->tk_usage_lock); 447 448 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 449 exacct_snapshot_task_usage(tk, tu); 450 451 mutex_exit(&tk->tk_usage_lock); 452 mutex_exit(&pidlock); 453 break; 454 case EW_INTERVAL: 455 /* 456 * We need to allocate spare task_usage_t buffer before 457 * grabbing pidlock because we might need it later in 458 * exacct_get_interval_task_usage(). 459 */ 460 tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 461 mutex_enter(&pidlock); 462 mutex_enter(&tk->tk_usage_lock); 463 464 /* 465 * For interval records, we deduct the previous microstate 466 * accounting data and cpu usage times from previously saved 467 * results and update the previous task usage structure. 468 */ 469 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 470 exacct_snapshot_task_usage(tk, tu); 471 exacct_get_interval_task_usage(tk, tu, &tu_buf); 472 473 mutex_exit(&tk->tk_usage_lock); 474 mutex_exit(&pidlock); 475 476 if (tu_buf != NULL) 477 kmem_free(tu_buf, sizeof (task_usage_t)); 478 break; 479 case EW_FINAL: 480 /* 481 * For final records, we deduct, from the task's current 482 * usage, any usage that was inherited with the arrival 483 * of a process from a previous task. We then record 484 * the task's finish time. 485 */ 486 mutex_enter(&tk->tk_usage_lock); 487 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 488 exacct_sub_task_mstate(tu, tk->tk_inherited); 489 mutex_exit(&tk->tk_usage_lock); 490 491 gethrestime(&ts); 492 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 493 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 494 495 break; 496 } 497 } 498 499 static int 500 exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record, 501 int res) 502 { 503 int attached = 1; 504 505 switch (res) { 506 case AC_TASK_TASKID: 507 (void) ea_attach_item(record, &tk->tk_tkid, 508 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID); 509 break; 510 case AC_TASK_PROJID: 511 (void) ea_attach_item(record, &tk->tk_proj->kpj_id, 512 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID); 513 break; 514 case AC_TASK_CPU: { 515 timestruc_t ts; 516 uint64_t ui; 517 518 hrt2ts(tu->tu_stime, &ts); 519 ui = ts.tv_sec; 520 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 521 EXT_UINT64 | EXD_TASK_CPU_SYS_SEC); 522 ui = ts.tv_nsec; 523 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 524 EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC); 525 526 hrt2ts(tu->tu_utime, &ts); 527 ui = ts.tv_sec; 528 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 529 EXT_UINT64 | EXD_TASK_CPU_USER_SEC); 530 ui = ts.tv_nsec; 531 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 532 EXT_UINT64 | EXD_TASK_CPU_USER_NSEC); 533 } 534 break; 535 case AC_TASK_TIME: 536 (void) ea_attach_item(record, &tu->tu_startsec, 537 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC); 538 (void) ea_attach_item(record, &tu->tu_startnsec, 539 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC); 540 (void) ea_attach_item(record, &tu->tu_finishsec, 541 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC); 542 (void) ea_attach_item(record, &tu->tu_finishnsec, 543 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC); 544 break; 545 case AC_TASK_HOSTNAME: 546 (void) ea_attach_item(record, tk->tk_zone->zone_nodename, 547 strlen(tk->tk_zone->zone_nodename) + 1, 548 EXT_STRING | EXD_TASK_HOSTNAME); 549 break; 550 case AC_TASK_MICROSTATE: 551 (void) ea_attach_item(record, &tu->tu_majflt, 552 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR); 553 (void) ea_attach_item(record, &tu->tu_minflt, 554 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR); 555 (void) ea_attach_item(record, &tu->tu_sndmsg, 556 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND); 557 (void) ea_attach_item(record, &tu->tu_rcvmsg, 558 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV); 559 (void) ea_attach_item(record, &tu->tu_iblk, 560 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN); 561 (void) ea_attach_item(record, &tu->tu_oblk, 562 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT); 563 (void) ea_attach_item(record, &tu->tu_ioch, 564 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR); 565 (void) ea_attach_item(record, &tu->tu_vcsw, 566 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL); 567 (void) ea_attach_item(record, &tu->tu_icsw, 568 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV); 569 (void) ea_attach_item(record, &tu->tu_nsig, 570 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS); 571 (void) ea_attach_item(record, &tu->tu_nswp, 572 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS); 573 (void) ea_attach_item(record, &tu->tu_nscl, 574 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS); 575 break; 576 case AC_TASK_ANCTASKID: 577 (void) ea_attach_item(record, &tu->tu_anctaskid, 578 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID); 579 break; 580 case AC_TASK_ZONENAME: 581 (void) ea_attach_item(record, tk->tk_zone->zone_name, 582 strlen(tk->tk_zone->zone_name) + 1, 583 EXT_STRING | EXD_TASK_ZONENAME); 584 break; 585 default: 586 attached = 0; 587 } 588 return (attached); 589 } 590 591 static ea_object_t * 592 exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask, 593 ea_catalog_t record_type) 594 { 595 int res, count; 596 ea_object_t *record; 597 598 /* 599 * Assemble usage values into group. 600 */ 601 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 602 for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++) 603 if (BT_TEST(mask, res)) 604 count += exacct_attach_task_item(tk, tu, record, res); 605 if (count == 0) { 606 ea_free_object(record, EUP_ALLOC); 607 record = NULL; 608 } 609 return (record); 610 } 611 612 /* 613 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *, 614 * size_t, size_t *), void *, size_t, size_t *, int) 615 * 616 * Overview 617 * exacct_assemble_task_usage() builds the packed exacct buffer for the 618 * indicated task, executes the given callback function, and free the packed 619 * buffer. 620 * 621 * Return values 622 * Returns 0 on success; otherwise the appropriate error code is returned. 623 * 624 * Caller's context 625 * Suitable for KM_SLEEP allocations. 626 */ 627 int 628 exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk, 629 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 630 void *ubuf, size_t ubufsize, size_t *actual, int flag) 631 { 632 ulong_t mask[AC_MASK_SZ]; 633 ea_object_t *task_record; 634 ea_catalog_t record_type; 635 task_usage_t *tu; 636 void *buf; 637 size_t bufsize; 638 int ret; 639 640 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL); 641 642 mutex_enter(&ac_task->ac_lock); 643 if (ac_task->ac_state == AC_OFF) { 644 mutex_exit(&ac_task->ac_lock); 645 return (ENOTACTIVE); 646 } 647 bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ); 648 mutex_exit(&ac_task->ac_lock); 649 650 switch (flag) { 651 case EW_FINAL: 652 record_type = EXD_GROUP_TASK; 653 break; 654 case EW_PARTIAL: 655 record_type = EXD_GROUP_TASK_PARTIAL; 656 break; 657 case EW_INTERVAL: 658 record_type = EXD_GROUP_TASK_INTERVAL; 659 break; 660 } 661 662 /* 663 * Calculate task usage and assemble it into the task record. 664 */ 665 tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 666 exacct_calculate_task_usage(tk, tu, flag); 667 task_record = exacct_assemble_task_record(tk, tu, mask, record_type); 668 if (task_record == NULL) { 669 /* 670 * The current configuration of the accounting system has 671 * resulted in records with no data; accordingly, we don't write 672 * these, but we return success. 673 */ 674 kmem_free(tu, sizeof (task_usage_t)); 675 return (0); 676 } 677 678 /* 679 * Pack object into buffer and run callback on it. 680 */ 681 bufsize = ea_pack_object(task_record, NULL, 0); 682 buf = kmem_alloc(bufsize, KM_SLEEP); 683 (void) ea_pack_object(task_record, buf, bufsize); 684 ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual); 685 686 /* 687 * Free all previously allocated structures. 688 */ 689 kmem_free(buf, bufsize); 690 ea_free_object(task_record, EUP_ALLOC); 691 kmem_free(tu, sizeof (task_usage_t)); 692 return (ret); 693 } 694 695 /* 696 * void exacct_commit_task(void *) 697 * 698 * Overview 699 * exacct_commit_task() calculates the final usage for a task, updating the 700 * task usage if task accounting is active, and writing a task record if task 701 * accounting is active. exacct_commit_task() is intended for being called 702 * from a task queue (taskq_t). 703 * 704 * Return values 705 * None. 706 * 707 * Caller's context 708 * Suitable for KM_SLEEP allocations. 709 */ 710 711 void 712 exacct_commit_task(void *arg) 713 { 714 task_t *tk = (task_t *)arg; 715 size_t size; 716 zone_t *zone = tk->tk_zone; 717 struct exacct_globals *acg; 718 719 ASSERT(tk != task0p); 720 ASSERT(tk->tk_memb_list == NULL); 721 722 /* 723 * Don't do any extra work if the acctctl module isn't loaded. 724 */ 725 if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) { 726 acg = zone_getspecific(exacct_zone_key, zone); 727 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 728 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 729 if (tk->tk_zone != global_zone) { 730 acg = zone_getspecific(exacct_zone_key, global_zone); 731 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 732 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 733 } 734 } 735 /* 736 * Release associated project and finalize task. 737 */ 738 task_end(tk); 739 } 740 741 static int 742 exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res) 743 { 744 int attached = 1; 745 746 switch (res) { 747 case AC_PROC_PID: 748 (void) ea_attach_item(record, &pu->pu_pid, 749 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID); 750 break; 751 case AC_PROC_UID: 752 (void) ea_attach_item(record, &pu->pu_ruid, 753 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID); 754 break; 755 case AC_PROC_FLAG: 756 (void) ea_attach_item(record, &pu->pu_acflag, 757 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS); 758 break; 759 case AC_PROC_GID: 760 (void) ea_attach_item(record, &pu->pu_rgid, 761 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID); 762 break; 763 case AC_PROC_PROJID: 764 (void) ea_attach_item(record, &pu->pu_projid, 765 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID); 766 break; 767 case AC_PROC_TASKID: 768 (void) ea_attach_item(record, &pu->pu_taskid, 769 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID); 770 break; 771 case AC_PROC_CPU: 772 (void) ea_attach_item(record, &pu->pu_utimesec, 773 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC); 774 (void) ea_attach_item(record, &pu->pu_utimensec, 775 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC); 776 (void) ea_attach_item(record, &pu->pu_stimesec, 777 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC); 778 (void) ea_attach_item(record, &pu->pu_stimensec, 779 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC); 780 break; 781 case AC_PROC_TIME: 782 (void) ea_attach_item(record, &pu->pu_startsec, 783 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC); 784 (void) ea_attach_item(record, &pu->pu_startnsec, 785 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC); 786 (void) ea_attach_item(record, &pu->pu_finishsec, 787 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC); 788 (void) ea_attach_item(record, &pu->pu_finishnsec, 789 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC); 790 break; 791 case AC_PROC_COMMAND: 792 (void) ea_attach_item(record, pu->pu_command, 793 strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND); 794 break; 795 case AC_PROC_HOSTNAME: 796 (void) ea_attach_item(record, pu->pu_nodename, 797 strlen(pu->pu_nodename) + 1, 798 EXT_STRING | EXD_PROC_HOSTNAME); 799 break; 800 case AC_PROC_TTY: 801 (void) ea_attach_item(record, &pu->pu_major, 802 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR); 803 (void) ea_attach_item(record, &pu->pu_minor, 804 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR); 805 break; 806 case AC_PROC_MICROSTATE: 807 (void) ea_attach_item(record, &pu->pu_majflt, 808 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR); 809 (void) ea_attach_item(record, &pu->pu_minflt, 810 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR); 811 (void) ea_attach_item(record, &pu->pu_sndmsg, 812 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND); 813 (void) ea_attach_item(record, &pu->pu_rcvmsg, 814 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV); 815 (void) ea_attach_item(record, &pu->pu_iblk, 816 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN); 817 (void) ea_attach_item(record, &pu->pu_oblk, 818 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT); 819 (void) ea_attach_item(record, &pu->pu_ioch, 820 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR); 821 (void) ea_attach_item(record, &pu->pu_vcsw, 822 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL); 823 (void) ea_attach_item(record, &pu->pu_icsw, 824 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV); 825 (void) ea_attach_item(record, &pu->pu_nsig, 826 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS); 827 (void) ea_attach_item(record, &pu->pu_nswp, 828 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS); 829 (void) ea_attach_item(record, &pu->pu_nscl, 830 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS); 831 break; 832 case AC_PROC_ANCPID: 833 (void) ea_attach_item(record, &pu->pu_ancpid, 834 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID); 835 break; 836 case AC_PROC_WAIT_STATUS: 837 (void) ea_attach_item(record, &pu->pu_wstat, 838 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS); 839 break; 840 case AC_PROC_ZONENAME: 841 (void) ea_attach_item(record, pu->pu_zonename, 842 strlen(pu->pu_zonename) + 1, 843 EXT_STRING | EXD_PROC_ZONENAME); 844 break; 845 case AC_PROC_MEM: 846 (void) ea_attach_item(record, &pu->pu_mem_rss_avg, 847 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K); 848 (void) ea_attach_item(record, &pu->pu_mem_rss_max, 849 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K); 850 break; 851 default: 852 attached = 0; 853 } 854 return (attached); 855 } 856 857 static ea_object_t * 858 exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask, 859 ea_catalog_t record_type) 860 { 861 int res, count; 862 ea_object_t *record; 863 864 /* 865 * Assemble usage values into group. 866 */ 867 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 868 for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++) 869 if (BT_TEST(mask, res)) 870 count += exacct_attach_proc_item(pu, record, res); 871 if (count == 0) { 872 ea_free_object(record, EUP_ALLOC); 873 record = NULL; 874 } 875 return (record); 876 } 877 878 /* 879 * The following two routines assume that process's p_lock is held or 880 * exacct_commit_proc has been called from exit() when all lwps are stopped. 881 */ 882 static void 883 exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu) 884 { 885 kthread_t *t; 886 887 ASSERT(MUTEX_HELD(&p->p_lock)); 888 if ((t = p->p_tlist) == NULL) 889 return; 890 891 do { 892 pu->pu_minflt += t->t_lwp->lwp_ru.minflt; 893 pu->pu_majflt += t->t_lwp->lwp_ru.majflt; 894 pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd; 895 pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv; 896 pu->pu_ioch += t->t_lwp->lwp_ru.ioch; 897 pu->pu_iblk += t->t_lwp->lwp_ru.inblock; 898 pu->pu_oblk += t->t_lwp->lwp_ru.oublock; 899 pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw; 900 pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw; 901 pu->pu_nsig += t->t_lwp->lwp_ru.nsignals; 902 pu->pu_nswp += t->t_lwp->lwp_ru.nswap; 903 pu->pu_nscl += t->t_lwp->lwp_ru.sysc; 904 } while ((t = t->t_forw) != p->p_tlist); 905 } 906 907 static void 908 exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu) 909 { 910 pu->pu_minflt = p->p_ru.minflt; 911 pu->pu_majflt = p->p_ru.majflt; 912 pu->pu_sndmsg = p->p_ru.msgsnd; 913 pu->pu_rcvmsg = p->p_ru.msgrcv; 914 pu->pu_ioch = p->p_ru.ioch; 915 pu->pu_iblk = p->p_ru.inblock; 916 pu->pu_oblk = p->p_ru.oublock; 917 pu->pu_vcsw = p->p_ru.nvcsw; 918 pu->pu_icsw = p->p_ru.nivcsw; 919 pu->pu_nsig = p->p_ru.nsignals; 920 pu->pu_nswp = p->p_ru.nswap; 921 pu->pu_nscl = p->p_ru.sysc; 922 } 923 924 void 925 exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask, 926 int flag, int wstat) 927 { 928 timestruc_t ts, ts_run; 929 930 ASSERT(MUTEX_HELD(&p->p_lock)); 931 932 /* 933 * Convert CPU and execution times to sec/nsec format. 934 */ 935 if (BT_TEST(mask, AC_PROC_CPU)) { 936 hrt2ts(mstate_aggr_state(p, LMS_USER), &ts); 937 pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec; 938 pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec; 939 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts); 940 pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec; 941 pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec; 942 } 943 if (BT_TEST(mask, AC_PROC_TIME)) { 944 gethrestime(&ts); 945 pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 946 pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 947 hrt2ts(gethrtime() - p->p_mstart, &ts_run); 948 ts.tv_sec -= ts_run.tv_sec; 949 ts.tv_nsec -= ts_run.tv_nsec; 950 if (ts.tv_nsec < 0) { 951 ts.tv_sec--; 952 if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) { 953 ts.tv_sec++; 954 ts.tv_nsec -= NANOSEC; 955 } 956 } 957 pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec; 958 pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec; 959 } 960 961 pu->pu_pid = p->p_pidp->pid_id; 962 pu->pu_acflag = p->p_user.u_acflag; 963 pu->pu_projid = p->p_task->tk_proj->kpj_id; 964 pu->pu_taskid = p->p_task->tk_tkid; 965 pu->pu_major = getmajor(p->p_sessp->s_dev); 966 pu->pu_minor = getminor(p->p_sessp->s_dev); 967 pu->pu_ancpid = p->p_ancpid; 968 pu->pu_wstat = wstat; 969 /* 970 * Compute average RSS in K. The denominator is the number of 971 * samples: the number of clock ticks plus the initial value. 972 */ 973 pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) * 974 (PAGESIZE / 1024); 975 pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024); 976 977 mutex_enter(&p->p_crlock); 978 pu->pu_ruid = crgetruid(p->p_cred); 979 pu->pu_rgid = crgetrgid(p->p_cred); 980 mutex_exit(&p->p_crlock); 981 982 bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1); 983 bcopy(p->p_zone->zone_name, pu->pu_zonename, 984 strlen(p->p_zone->zone_name) + 1); 985 bcopy(p->p_zone->zone_nodename, pu->pu_nodename, 986 strlen(p->p_zone->zone_nodename) + 1); 987 988 /* 989 * Calculate microstate accounting data for a process that is still 990 * running. Presently, we explicitly collect all of the LWP usage into 991 * the proc usage structure here. 992 */ 993 if (flag & EW_PARTIAL) 994 exacct_calculate_proc_mstate(p, pu); 995 if (flag & EW_FINAL) 996 exacct_copy_proc_mstate(p, pu); 997 } 998 999 /* 1000 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void 1001 * *, size_t, size_t *), void *, size_t, size_t *) 1002 * 1003 * Overview 1004 * Assemble record with miscellaneous accounting information about the process 1005 * and execute the callback on it. It is the callback's job to set "actual" to 1006 * the size of record. 1007 * 1008 * Return values 1009 * The result of the callback function, unless the extended process accounting 1010 * feature is not active, in which case ENOTACTIVE is returned. 1011 * 1012 * Caller's context 1013 * Suitable for KM_SLEEP allocations. 1014 */ 1015 int 1016 exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu, 1017 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1018 void *ubuf, size_t ubufsize, size_t *actual, int flag) 1019 { 1020 ulong_t mask[AC_MASK_SZ]; 1021 ea_object_t *proc_record; 1022 ea_catalog_t record_type; 1023 void *buf; 1024 size_t bufsize; 1025 int ret; 1026 1027 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL); 1028 1029 mutex_enter(&ac_proc->ac_lock); 1030 if (ac_proc->ac_state == AC_OFF) { 1031 mutex_exit(&ac_proc->ac_lock); 1032 return (ENOTACTIVE); 1033 } 1034 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1035 mutex_exit(&ac_proc->ac_lock); 1036 1037 switch (flag) { 1038 case EW_FINAL: 1039 record_type = EXD_GROUP_PROC; 1040 break; 1041 case EW_PARTIAL: 1042 record_type = EXD_GROUP_PROC_PARTIAL; 1043 break; 1044 } 1045 1046 proc_record = exacct_assemble_proc_record(pu, mask, record_type); 1047 if (proc_record == NULL) 1048 return (0); 1049 1050 /* 1051 * Pack object into buffer and pass to callback. 1052 */ 1053 bufsize = ea_pack_object(proc_record, NULL, 0); 1054 buf = kmem_alloc(bufsize, KM_SLEEP); 1055 (void) ea_pack_object(proc_record, buf, bufsize); 1056 1057 ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual); 1058 1059 /* 1060 * Free all previously allocations. 1061 */ 1062 kmem_free(buf, bufsize); 1063 ea_free_object(proc_record, EUP_ALLOC); 1064 return (ret); 1065 } 1066 1067 /* 1068 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t, 1069 * size_t *) 1070 * 1071 * Overview 1072 * exacct_commit_callback() writes the indicated buffer to the indicated 1073 * extended accounting file. 1074 * 1075 * Return values 1076 * The result of the write operation is returned. "actual" is updated to 1077 * contain the number of bytes actually written. 1078 * 1079 * Caller's context 1080 * Suitable for a vn_rdwr() operation. 1081 */ 1082 /*ARGSUSED*/ 1083 int 1084 exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize, 1085 void *buf, size_t bufsize, size_t *actual) 1086 { 1087 int error = 0; 1088 1089 *actual = 0; 1090 if ((error = exacct_vn_write(info, buf, bufsize)) == 0) 1091 *actual = bufsize; 1092 return (error); 1093 } 1094 1095 static void 1096 exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat) 1097 { 1098 size_t size; 1099 proc_usage_t *pu; 1100 ulong_t mask[AC_MASK_SZ]; 1101 1102 mutex_enter(&ac_proc->ac_lock); 1103 if (ac_proc->ac_state == AC_ON) { 1104 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1105 mutex_exit(&ac_proc->ac_lock); 1106 } else { 1107 mutex_exit(&ac_proc->ac_lock); 1108 return; 1109 } 1110 1111 mutex_enter(&p->p_lock); 1112 size = strlen(p->p_user.u_comm) + 1; 1113 mutex_exit(&p->p_lock); 1114 1115 pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP); 1116 pu->pu_command = kmem_alloc(size, KM_SLEEP); 1117 mutex_enter(&p->p_lock); 1118 exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat); 1119 mutex_exit(&p->p_lock); 1120 1121 (void) exacct_assemble_proc_usage(ac_proc, pu, 1122 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 1123 1124 kmem_free(pu->pu_command, strlen(pu->pu_command) + 1); 1125 kmem_free(pu, sizeof (proc_usage_t)); 1126 } 1127 1128 /* 1129 * void exacct_commit_proc(proc_t *, int) 1130 * 1131 * Overview 1132 * exacct_commit_proc() calculates the final usage for a process, updating the 1133 * task usage if task accounting is active, and writing a process record if 1134 * process accounting is active. exacct_commit_proc() is intended for being 1135 * called from proc_exit(). 1136 * 1137 * Return values 1138 * None. 1139 * 1140 * Caller's context 1141 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry. 1142 */ 1143 void 1144 exacct_commit_proc(proc_t *p, int wstat) 1145 { 1146 zone_t *zone = p->p_zone; 1147 struct exacct_globals *acg, *gacg = NULL; 1148 1149 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1150 /* 1151 * acctctl module not loaded. Nothing to do. 1152 */ 1153 return; 1154 } 1155 acg = zone_getspecific(exacct_zone_key, zone); 1156 exacct_do_commit_proc(&acg->ac_proc, p, wstat); 1157 if (zone != global_zone) { 1158 gacg = zone_getspecific(exacct_zone_key, global_zone); 1159 exacct_do_commit_proc(&gacg->ac_proc, p, wstat); 1160 } 1161 } 1162 1163 static int 1164 exacct_attach_netstat_item(net_stat_t *ns, ea_object_t *record, int res) 1165 { 1166 int attached = 1; 1167 1168 switch (res) { 1169 case AC_NET_NAME: 1170 (void) ea_attach_item(record, ns->ns_name, 1171 strlen(ns->ns_name) + 1, EXT_STRING | EXD_NET_STATS_NAME); 1172 break; 1173 case AC_NET_CURTIME: 1174 { 1175 uint64_t now; 1176 timestruc_t ts; 1177 1178 gethrestime(&ts); 1179 now = (uint64_t)(ulong_t)ts.tv_sec; 1180 (void) ea_attach_item(record, &now, sizeof (uint64_t), 1181 EXT_UINT64 | EXD_NET_STATS_CURTIME); 1182 } 1183 break; 1184 case AC_NET_IBYTES: 1185 (void) ea_attach_item(record, &ns->ns_ibytes, 1186 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IBYTES); 1187 break; 1188 case AC_NET_OBYTES: 1189 (void) ea_attach_item(record, &ns->ns_obytes, 1190 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OBYTES); 1191 break; 1192 case AC_NET_IPKTS: 1193 (void) ea_attach_item(record, &ns->ns_ipackets, 1194 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IPKTS); 1195 break; 1196 case AC_NET_OPKTS: 1197 (void) ea_attach_item(record, &ns->ns_opackets, 1198 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OPKTS); 1199 break; 1200 case AC_NET_IERRPKTS: 1201 (void) ea_attach_item(record, &ns->ns_ierrors, 1202 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IERRPKTS); 1203 break; 1204 case AC_NET_OERRPKTS: 1205 (void) ea_attach_item(record, &ns->ns_oerrors, 1206 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OERRPKTS); 1207 break; 1208 default: 1209 attached = 0; 1210 } 1211 return (attached); 1212 } 1213 1214 static int 1215 exacct_attach_netdesc_item(net_desc_t *nd, ea_object_t *record, int res) 1216 { 1217 int attached = 1; 1218 1219 switch (res) { 1220 case AC_NET_NAME: 1221 (void) ea_attach_item(record, nd->nd_name, 1222 strlen(nd->nd_name) + 1, EXT_STRING | EXD_NET_DESC_NAME); 1223 break; 1224 case AC_NET_DEVNAME: 1225 (void) ea_attach_item(record, nd->nd_devname, 1226 strlen(nd->nd_devname) + 1, EXT_STRING | 1227 EXD_NET_DESC_DEVNAME); 1228 break; 1229 case AC_NET_EHOST: 1230 (void) ea_attach_item(record, &nd->nd_ehost, 1231 sizeof (nd->nd_ehost), EXT_RAW | EXD_NET_DESC_EHOST); 1232 break; 1233 case AC_NET_EDEST: 1234 (void) ea_attach_item(record, &nd->nd_edest, 1235 sizeof (nd->nd_edest), EXT_RAW | EXD_NET_DESC_EDEST); 1236 break; 1237 case AC_NET_VLAN_TPID: 1238 (void) ea_attach_item(record, &nd->nd_vlan_tpid, 1239 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TPID); 1240 break; 1241 case AC_NET_VLAN_TCI: 1242 (void) ea_attach_item(record, &nd->nd_vlan_tci, 1243 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TCI); 1244 break; 1245 case AC_NET_SAP: 1246 (void) ea_attach_item(record, &nd->nd_sap, 1247 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_SAP); 1248 break; 1249 case AC_NET_PRIORITY: 1250 (void) ea_attach_item(record, &nd->nd_priority, 1251 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_PRIORITY); 1252 break; 1253 case AC_NET_BWLIMIT: 1254 (void) ea_attach_item(record, &nd->nd_bw_limit, 1255 sizeof (uint64_t), EXT_UINT64 | EXD_NET_DESC_BWLIMIT); 1256 break; 1257 case AC_NET_SADDR: 1258 if (nd->nd_isv4) { 1259 (void) ea_attach_item(record, &nd->nd_saddr[3], 1260 sizeof (uint32_t), EXT_UINT32 | 1261 EXD_NET_DESC_V4SADDR); 1262 } else { 1263 (void) ea_attach_item(record, &nd->nd_saddr, 1264 sizeof (nd->nd_saddr), EXT_RAW | 1265 EXD_NET_DESC_V6SADDR); 1266 } 1267 break; 1268 case AC_NET_DADDR: 1269 if (nd->nd_isv4) { 1270 (void) ea_attach_item(record, &nd->nd_daddr[3], 1271 sizeof (uint32_t), EXT_UINT32 | 1272 EXD_NET_DESC_V4DADDR); 1273 } else { 1274 (void) ea_attach_item(record, &nd->nd_daddr, 1275 sizeof (nd->nd_daddr), EXT_RAW | 1276 EXD_NET_DESC_V6DADDR); 1277 } 1278 break; 1279 case AC_NET_SPORT: 1280 (void) ea_attach_item(record, &nd->nd_sport, 1281 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_SPORT); 1282 break; 1283 case AC_NET_DPORT: 1284 (void) ea_attach_item(record, &nd->nd_dport, 1285 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_DPORT); 1286 break; 1287 case AC_NET_PROTOCOL: 1288 (void) ea_attach_item(record, &nd->nd_protocol, 1289 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_PROTOCOL); 1290 break; 1291 case AC_NET_DSFIELD: 1292 (void) ea_attach_item(record, &nd->nd_dsfield, 1293 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_DSFIELD); 1294 break; 1295 default: 1296 attached = 0; 1297 } 1298 return (attached); 1299 } 1300 1301 static ea_object_t * 1302 exacct_assemble_net_record(void *ninfo, ulong_t *mask, ea_catalog_t record_type, 1303 int what) 1304 { 1305 int res; 1306 int count; 1307 ea_object_t *record; 1308 1309 /* 1310 * Assemble usage values into group. 1311 */ 1312 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1313 for (res = 1, count = 0; res <= AC_NET_MAX_RES; res++) 1314 if (BT_TEST(mask, res)) { 1315 if (what == EX_NET_LNDESC_REC || 1316 what == EX_NET_FLDESC_REC) { 1317 count += exacct_attach_netdesc_item( 1318 (net_desc_t *)ninfo, record, res); 1319 } else { 1320 count += exacct_attach_netstat_item( 1321 (net_stat_t *)ninfo, record, res); 1322 } 1323 } 1324 if (count == 0) { 1325 ea_free_object(record, EUP_ALLOC); 1326 record = NULL; 1327 } 1328 return (record); 1329 } 1330 1331 int 1332 exacct_assemble_net_usage(ac_info_t *ac_net, void *ninfo, 1333 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1334 void *ubuf, size_t ubufsize, size_t *actual, int what) 1335 { 1336 ulong_t mask[AC_MASK_SZ]; 1337 ea_object_t *net_desc; 1338 ea_catalog_t record_type; 1339 void *buf; 1340 size_t bufsize; 1341 int ret; 1342 1343 mutex_enter(&ac_net->ac_lock); 1344 if (ac_net->ac_state == AC_OFF) { 1345 mutex_exit(&ac_net->ac_lock); 1346 return (ENOTACTIVE); 1347 } 1348 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1349 mutex_exit(&ac_net->ac_lock); 1350 1351 switch (what) { 1352 case EX_NET_LNDESC_REC: 1353 record_type = EXD_GROUP_NET_LINK_DESC; 1354 break; 1355 case EX_NET_LNSTAT_REC: 1356 record_type = EXD_GROUP_NET_LINK_STATS; 1357 break; 1358 case EX_NET_FLDESC_REC: 1359 record_type = EXD_GROUP_NET_FLOW_DESC; 1360 break; 1361 case EX_NET_FLSTAT_REC: 1362 record_type = EXD_GROUP_NET_FLOW_STATS; 1363 break; 1364 } 1365 1366 net_desc = exacct_assemble_net_record(ninfo, mask, record_type, what); 1367 if (net_desc == NULL) 1368 return (0); 1369 1370 /* 1371 * Pack object into buffer and pass to callback. 1372 */ 1373 bufsize = ea_pack_object(net_desc, NULL, 0); 1374 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1375 if (buf == NULL) 1376 return (ENOMEM); 1377 1378 (void) ea_pack_object(net_desc, buf, bufsize); 1379 1380 ret = callback(ac_net, ubuf, ubufsize, buf, bufsize, actual); 1381 1382 /* 1383 * Free all previously allocations. 1384 */ 1385 kmem_free(buf, bufsize); 1386 ea_free_object(net_desc, EUP_ALLOC); 1387 return (ret); 1388 } 1389 1390 int 1391 exacct_commit_netinfo(void *arg, int what) 1392 { 1393 size_t size; 1394 ulong_t mask[AC_MASK_SZ]; 1395 struct exacct_globals *acg; 1396 ac_info_t *ac_net; 1397 1398 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1399 /* 1400 * acctctl module not loaded. Nothing to do. 1401 */ 1402 return (ENOTACTIVE); 1403 } 1404 1405 /* 1406 * Even though each zone nominally has its own flow accounting settings 1407 * (ac_flow), these are only maintained by and for the global zone. 1408 * 1409 * If this were to change in the future, this function should grow a 1410 * second zoneid (or zone) argument, and use the corresponding zone's 1411 * settings rather than always using those of the global zone. 1412 */ 1413 acg = zone_getspecific(exacct_zone_key, global_zone); 1414 ac_net = &acg->ac_net; 1415 1416 mutex_enter(&ac_net->ac_lock); 1417 if (ac_net->ac_state == AC_OFF) { 1418 mutex_exit(&ac_net->ac_lock); 1419 return (ENOTACTIVE); 1420 } 1421 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); 1422 mutex_exit(&ac_net->ac_lock); 1423 1424 return (exacct_assemble_net_usage(ac_net, arg, exacct_commit_callback, 1425 NULL, 0, &size, what)); 1426 } 1427 1428 static int 1429 exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res) 1430 { 1431 int attached = 1; 1432 1433 switch (res) { 1434 case AC_FLOW_SADDR: 1435 if (fu->fu_isv4) { 1436 (void) ea_attach_item(record, &fu->fu_saddr[3], 1437 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR); 1438 } else { 1439 (void) ea_attach_item(record, &fu->fu_saddr, 1440 sizeof (fu->fu_saddr), EXT_RAW | 1441 EXD_FLOW_V6SADDR); 1442 } 1443 break; 1444 case AC_FLOW_DADDR: 1445 if (fu->fu_isv4) { 1446 (void) ea_attach_item(record, &fu->fu_daddr[3], 1447 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR); 1448 } else { 1449 (void) ea_attach_item(record, &fu->fu_daddr, 1450 sizeof (fu->fu_daddr), EXT_RAW | 1451 EXD_FLOW_V6DADDR); 1452 } 1453 break; 1454 case AC_FLOW_SPORT: 1455 (void) ea_attach_item(record, &fu->fu_sport, 1456 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT); 1457 break; 1458 case AC_FLOW_DPORT: 1459 (void) ea_attach_item(record, &fu->fu_dport, 1460 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT); 1461 break; 1462 case AC_FLOW_PROTOCOL: 1463 (void) ea_attach_item(record, &fu->fu_protocol, 1464 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL); 1465 break; 1466 case AC_FLOW_DSFIELD: 1467 (void) ea_attach_item(record, &fu->fu_dsfield, 1468 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD); 1469 break; 1470 case AC_FLOW_CTIME: 1471 (void) ea_attach_item(record, &fu->fu_ctime, 1472 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME); 1473 break; 1474 case AC_FLOW_LSEEN: 1475 (void) ea_attach_item(record, &fu->fu_lseen, 1476 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN); 1477 break; 1478 case AC_FLOW_NBYTES: 1479 (void) ea_attach_item(record, &fu->fu_nbytes, 1480 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES); 1481 break; 1482 case AC_FLOW_NPKTS: 1483 (void) ea_attach_item(record, &fu->fu_npackets, 1484 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS); 1485 break; 1486 case AC_FLOW_PROJID: 1487 if (fu->fu_projid >= 0) { 1488 (void) ea_attach_item(record, &fu->fu_projid, 1489 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID); 1490 } 1491 break; 1492 case AC_FLOW_UID: 1493 if (fu->fu_userid >= 0) { 1494 (void) ea_attach_item(record, &fu->fu_userid, 1495 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID); 1496 } 1497 break; 1498 case AC_FLOW_ANAME: 1499 (void) ea_attach_item(record, fu->fu_aname, 1500 strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME); 1501 break; 1502 default: 1503 attached = 0; 1504 } 1505 return (attached); 1506 } 1507 1508 static ea_object_t * 1509 exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask, 1510 ea_catalog_t record_type) 1511 { 1512 int res, count; 1513 ea_object_t *record; 1514 1515 /* 1516 * Assemble usage values into group. 1517 */ 1518 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1519 for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++) 1520 if (BT_TEST(mask, res)) 1521 count += exacct_attach_flow_item(fu, record, res); 1522 if (count == 0) { 1523 ea_free_object(record, EUP_ALLOC); 1524 record = NULL; 1525 } 1526 return (record); 1527 } 1528 1529 int 1530 exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu, 1531 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1532 void *ubuf, size_t ubufsize, size_t *actual) 1533 { 1534 ulong_t mask[AC_MASK_SZ]; 1535 ea_object_t *flow_usage; 1536 ea_catalog_t record_type; 1537 void *buf; 1538 size_t bufsize; 1539 int ret; 1540 1541 mutex_enter(&ac_flow->ac_lock); 1542 if (ac_flow->ac_state == AC_OFF) { 1543 mutex_exit(&ac_flow->ac_lock); 1544 return (ENOTACTIVE); 1545 } 1546 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1547 mutex_exit(&ac_flow->ac_lock); 1548 1549 record_type = EXD_GROUP_FLOW; 1550 1551 flow_usage = exacct_assemble_flow_record(fu, mask, record_type); 1552 if (flow_usage == NULL) { 1553 return (0); 1554 } 1555 1556 /* 1557 * Pack object into buffer and pass to callback. 1558 */ 1559 bufsize = ea_pack_object(flow_usage, NULL, 0); 1560 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1561 if (buf == NULL) { 1562 return (ENOMEM); 1563 } 1564 1565 (void) ea_pack_object(flow_usage, buf, bufsize); 1566 1567 ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual); 1568 1569 /* 1570 * Free all previously allocations. 1571 */ 1572 kmem_free(buf, bufsize); 1573 ea_free_object(flow_usage, EUP_ALLOC); 1574 return (ret); 1575 } 1576 1577 void 1578 exacct_commit_flow(void *arg) 1579 { 1580 flow_usage_t *f = (flow_usage_t *)arg; 1581 size_t size; 1582 ulong_t mask[AC_MASK_SZ]; 1583 struct exacct_globals *acg; 1584 ac_info_t *ac_flow; 1585 1586 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1587 /* 1588 * acctctl module not loaded. Nothing to do. 1589 */ 1590 return; 1591 } 1592 1593 /* 1594 * Even though each zone nominally has its own flow accounting settings 1595 * (ac_flow), these are only maintained by and for the global zone. 1596 * 1597 * If this were to change in the future, this function should grow a 1598 * second zoneid (or zone) argument, and use the corresponding zone's 1599 * settings rather than always using those of the global zone. 1600 */ 1601 acg = zone_getspecific(exacct_zone_key, global_zone); 1602 ac_flow = &acg->ac_flow; 1603 1604 mutex_enter(&ac_flow->ac_lock); 1605 if (ac_flow->ac_state == AC_OFF) { 1606 mutex_exit(&ac_flow->ac_lock); 1607 return; 1608 } 1609 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1610 mutex_exit(&ac_flow->ac_lock); 1611 1612 (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback, 1613 NULL, 0, &size); 1614 } 1615 1616 /* 1617 * int exacct_tag_task(task_t *, void *, size_t, int) 1618 * 1619 * Overview 1620 * exacct_tag_task() provides the exacct record construction and writing 1621 * support required by putacct(2) for task entities. 1622 * 1623 * Return values 1624 * The result of the write operation is returned, unless the extended 1625 * accounting facility is not active, in which case ENOTACTIVE is returned. 1626 * 1627 * Caller's context 1628 * Suitable for KM_SLEEP allocations. 1629 */ 1630 int 1631 exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz, 1632 int flags) 1633 { 1634 int error = 0; 1635 void *buf; 1636 size_t bufsize; 1637 ea_catalog_t cat; 1638 ea_object_t *tag; 1639 1640 mutex_enter(&ac_task->ac_lock); 1641 if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) { 1642 mutex_exit(&ac_task->ac_lock); 1643 return (ENOTACTIVE); 1644 } 1645 mutex_exit(&ac_task->ac_lock); 1646 1647 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG); 1648 (void) ea_attach_item(tag, &tk->tk_tkid, 0, 1649 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1650 (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0, 1651 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1652 if (flags == EP_RAW) 1653 cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG; 1654 else 1655 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG; 1656 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1657 1658 bufsize = ea_pack_object(tag, NULL, 0); 1659 buf = kmem_alloc(bufsize, KM_SLEEP); 1660 (void) ea_pack_object(tag, buf, bufsize); 1661 error = exacct_vn_write(ac_task, buf, bufsize); 1662 kmem_free(buf, bufsize); 1663 ea_free_object(tag, EUP_ALLOC); 1664 return (error); 1665 } 1666 1667 /* 1668 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *) 1669 * 1670 * Overview 1671 * exacct_tag_proc() provides the exacct record construction and writing 1672 * support required by putacct(2) for processes. 1673 * 1674 * Return values 1675 * The result of the write operation is returned, unless the extended 1676 * accounting facility is not active, in which case ENOTACTIVE is returned. 1677 * 1678 * Caller's context 1679 * Suitable for KM_SLEEP allocations. 1680 */ 1681 int 1682 exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf, 1683 size_t ubufsz, int flags, const char *hostname) 1684 { 1685 int error = 0; 1686 void *buf; 1687 size_t bufsize; 1688 ea_catalog_t cat; 1689 ea_object_t *tag; 1690 1691 mutex_enter(&ac_proc->ac_lock); 1692 if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) { 1693 mutex_exit(&ac_proc->ac_lock); 1694 return (ENOTACTIVE); 1695 } 1696 mutex_exit(&ac_proc->ac_lock); 1697 1698 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG); 1699 (void) ea_attach_item(tag, &pid, sizeof (uint32_t), 1700 EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID); 1701 (void) ea_attach_item(tag, &tkid, 0, 1702 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1703 (void) ea_attach_item(tag, (void *)hostname, 0, 1704 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1705 if (flags == EP_RAW) 1706 cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG; 1707 else 1708 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG; 1709 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1710 1711 bufsize = ea_pack_object(tag, NULL, 0); 1712 buf = kmem_alloc(bufsize, KM_SLEEP); 1713 (void) ea_pack_object(tag, buf, bufsize); 1714 error = exacct_vn_write(ac_proc, buf, bufsize); 1715 kmem_free(buf, bufsize); 1716 ea_free_object(tag, EUP_ALLOC); 1717 return (error); 1718 } 1719 1720 /* 1721 * void exacct_init(void) 1722 * 1723 * Overview 1724 * Initialized the extended accounting subsystem. 1725 * 1726 * Return values 1727 * None. 1728 * 1729 * Caller's context 1730 * Suitable for KM_SLEEP allocations. 1731 */ 1732 void 1733 exacct_init() 1734 { 1735 exacct_queue = system_taskq; 1736 exacct_object_cache = kmem_cache_create("exacct_object_cache", 1737 sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1738 } 1739 1740 /* 1741 * exacct_snapshot_proc_mstate() copies a process's microstate accounting data 1742 * and resource usage counters into a given task_usage_t. It differs from 1743 * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t, 1744 * b) p_lock will have been acquired earlier in the call path and c) we 1745 * are here including the process's user and system times. 1746 */ 1747 static void 1748 exacct_snapshot_proc_mstate(proc_t *p, task_usage_t *tu) 1749 { 1750 tu->tu_utime = mstate_aggr_state(p, LMS_USER); 1751 tu->tu_stime = mstate_aggr_state(p, LMS_SYSTEM); 1752 tu->tu_minflt = p->p_ru.minflt; 1753 tu->tu_majflt = p->p_ru.majflt; 1754 tu->tu_sndmsg = p->p_ru.msgsnd; 1755 tu->tu_rcvmsg = p->p_ru.msgrcv; 1756 tu->tu_ioch = p->p_ru.ioch; 1757 tu->tu_iblk = p->p_ru.inblock; 1758 tu->tu_oblk = p->p_ru.oublock; 1759 tu->tu_vcsw = p->p_ru.nvcsw; 1760 tu->tu_icsw = p->p_ru.nivcsw; 1761 tu->tu_nsig = p->p_ru.nsignals; 1762 tu->tu_nswp = p->p_ru.nswap; 1763 tu->tu_nscl = p->p_ru.sysc; 1764 } 1765 1766 /* 1767 * void exacct_move_mstate(proc_t *, task_t *, task_t *) 1768 * 1769 * Overview 1770 * exacct_move_mstate() is called by task_change() and accounts for 1771 * a process's resource usage when it is moved from one task to another. 1772 * 1773 * The process's usage at this point is recorded in the new task so 1774 * that it can be excluded from the calculation of resources consumed 1775 * by that task. 1776 * 1777 * The resource usage inherited by the new task is also added to the 1778 * aggregate maintained by the old task for processes that have exited. 1779 * 1780 * Return values 1781 * None. 1782 * 1783 * Caller's context 1784 * pidlock and p_lock held across exacct_move_mstate(). 1785 */ 1786 void 1787 exacct_move_mstate(proc_t *p, task_t *oldtk, task_t *newtk) 1788 { 1789 task_usage_t tu; 1790 1791 /* Take a snapshot of this process's mstate and RU counters */ 1792 exacct_snapshot_proc_mstate(p, &tu); 1793 1794 /* 1795 * Use the snapshot to increment the aggregate usage of the old 1796 * task, and the inherited usage of the new one. 1797 */ 1798 mutex_enter(&oldtk->tk_usage_lock); 1799 exacct_add_task_mstate(oldtk->tk_usage, &tu); 1800 mutex_exit(&oldtk->tk_usage_lock); 1801 mutex_enter(&newtk->tk_usage_lock); 1802 exacct_add_task_mstate(newtk->tk_inherited, &tu); 1803 mutex_exit(&newtk->tk_usage_lock); 1804 } 1805