1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/exacct.h> 29 #include <sys/exacct_catalog.h> 30 #include <sys/disp.h> 31 #include <sys/task.h> 32 #include <sys/proc.h> 33 #include <sys/cmn_err.h> 34 #include <sys/kmem.h> 35 #include <sys/project.h> 36 #include <sys/systm.h> 37 #include <sys/vnode.h> 38 #include <sys/file.h> 39 #include <sys/acctctl.h> 40 #include <sys/time.h> 41 #include <sys/utsname.h> 42 #include <sys/session.h> 43 #include <sys/sysmacros.h> 44 #include <sys/bitmap.h> 45 #include <sys/msacct.h> 46 47 /* 48 * exacct usage and recording routines 49 * 50 * wracct(2), getacct(2), and the records written at process or task 51 * termination are constructed using the exacct_assemble_[task,proc]_usage() 52 * functions, which take a callback that takes the appropriate action on 53 * the packed exacct record for the task or process. For the process-related 54 * actions, we partition the routines such that the data collecting component 55 * can be performed while holding p_lock, and all sleeping or blocking 56 * operations can be performed without acquiring p_lock. 57 * 58 * putacct(2), which allows an application to construct a customized record 59 * associated with an existing process or task, has its own entry points: 60 * exacct_tag_task() and exacct_tag_proc(). 61 */ 62 63 taskq_t *exacct_queue; 64 kmem_cache_t *exacct_object_cache; 65 66 zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED; 67 68 static const uint32_t exacct_version = EXACCT_VERSION; 69 static const char exacct_header[] = "exacct"; 70 static const char exacct_creator[] = "SunOS"; 71 72 ea_object_t * 73 ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz) 74 { 75 ea_object_t *item; 76 77 item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 78 bzero(item, sizeof (ea_object_t)); 79 (void) ea_set_item(item, catalog, buf, bufsz); 80 return (item); 81 } 82 83 ea_object_t * 84 ea_alloc_group(ea_catalog_t catalog) 85 { 86 ea_object_t *group; 87 88 group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 89 bzero(group, sizeof (ea_object_t)); 90 (void) ea_set_group(group, catalog); 91 return (group); 92 } 93 94 ea_object_t * 95 ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog) 96 { 97 ea_object_t *item; 98 99 item = ea_alloc_item(catalog, buf, bufsz); 100 (void) ea_attach_to_group(grp, item); 101 return (item); 102 } 103 104 /* 105 * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract 106 * microstate accounting data and resource usage counters from one task_usage_t 107 * from those supplied in another. These functions do not operate on *all* 108 * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make 109 * sense. 110 */ 111 static void 112 exacct_add_task_mstate(task_usage_t *tu, task_usage_t *delta) 113 { 114 tu->tu_utime += delta->tu_utime; 115 tu->tu_stime += delta->tu_stime; 116 tu->tu_minflt += delta->tu_minflt; 117 tu->tu_majflt += delta->tu_majflt; 118 tu->tu_sndmsg += delta->tu_sndmsg; 119 tu->tu_rcvmsg += delta->tu_rcvmsg; 120 tu->tu_ioch += delta->tu_ioch; 121 tu->tu_iblk += delta->tu_iblk; 122 tu->tu_oblk += delta->tu_oblk; 123 tu->tu_vcsw += delta->tu_vcsw; 124 tu->tu_icsw += delta->tu_icsw; 125 tu->tu_nsig += delta->tu_nsig; 126 tu->tu_nswp += delta->tu_nswp; 127 tu->tu_nscl += delta->tu_nscl; 128 } 129 130 /* 131 * See the comments for exacct_add_task_mstate(), above. 132 */ 133 static void 134 exacct_sub_task_mstate(task_usage_t *tu, task_usage_t *delta) 135 { 136 tu->tu_utime -= delta->tu_utime; 137 tu->tu_stime -= delta->tu_stime; 138 tu->tu_minflt -= delta->tu_minflt; 139 tu->tu_majflt -= delta->tu_majflt; 140 tu->tu_sndmsg -= delta->tu_sndmsg; 141 tu->tu_rcvmsg -= delta->tu_rcvmsg; 142 tu->tu_ioch -= delta->tu_ioch; 143 tu->tu_iblk -= delta->tu_iblk; 144 tu->tu_oblk -= delta->tu_oblk; 145 tu->tu_vcsw -= delta->tu_vcsw; 146 tu->tu_icsw -= delta->tu_icsw; 147 tu->tu_nsig -= delta->tu_nsig; 148 tu->tu_nswp -= delta->tu_nswp; 149 tu->tu_nscl -= delta->tu_nscl; 150 } 151 152 /* 153 * exacct_vn_write() is a vn_rdwr wrapper that protects us from corrupting the 154 * accounting file in case of an I/O or filesystem error. acctctl() prevents 155 * the two accounting vnodes from being equal, and the appropriate ac_lock is 156 * held across the call, so we're single threaded through this code for each 157 * file. 158 */ 159 static int 160 exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize) 161 { 162 int error = 0; 163 ssize_t resid; 164 struct vattr va; 165 166 if (info == NULL) 167 return (0); 168 169 mutex_enter(&info->ac_lock); 170 171 /* 172 * Don't do anything unless accounting file is set. 173 */ 174 if (info->ac_vnode == NULL) { 175 mutex_exit(&info->ac_lock); 176 return (0); 177 } 178 179 /* 180 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting 181 * the present accounting file. 182 */ 183 va.va_mask = AT_SIZE; 184 error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred); 185 if (error == 0) { 186 error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf, 187 bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T, 188 kcred, &resid); 189 if (error) { 190 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 191 } else if (resid != 0) { 192 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 193 error = ENOSPC; 194 } 195 } 196 mutex_exit(&info->ac_lock); 197 198 return (error); 199 } 200 201 /* 202 * void *exacct_create_header(size_t *) 203 * 204 * Overview 205 * exacct_create_header() constructs an exacct file header identifying the 206 * accounting file as the output of the kernel. exacct_create_header() and 207 * the static write_header() and verify_header() routines in libexacct must 208 * remain synchronized. 209 * 210 * Return values 211 * A pointer to a packed exacct buffer containing the appropriate header is 212 * returned; the size of the buffer is placed in the location indicated by 213 * sizep. 214 * 215 * Caller's context 216 * Suitable for KM_SLEEP allocations. 217 */ 218 void * 219 exacct_create_header(size_t *sizep) 220 { 221 ea_object_t *hdr_grp; 222 uint32_t bskip; 223 void *buf; 224 size_t bufsize; 225 226 hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER); 227 (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0, 228 EXT_UINT32 | EXC_DEFAULT | EXD_VERSION); 229 (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0, 230 EXT_STRING | EXC_DEFAULT | EXD_FILETYPE); 231 (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0, 232 EXT_STRING | EXC_DEFAULT | EXD_CREATOR); 233 (void) ea_attach_item(hdr_grp, uts_nodename(), 0, 234 EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME); 235 236 bufsize = ea_pack_object(hdr_grp, NULL, 0); 237 buf = kmem_alloc(bufsize, KM_SLEEP); 238 (void) ea_pack_object(hdr_grp, buf, bufsize); 239 ea_free_object(hdr_grp, EUP_ALLOC); 240 241 /* 242 * To prevent reading the header when reading the file backwards, 243 * set the large backskip of the header group to 0 (last 4 bytes). 244 */ 245 bskip = 0; 246 exacct_order32(&bskip); 247 bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip), 248 sizeof (bskip)); 249 250 *sizep = bufsize; 251 return (buf); 252 } 253 254 /* 255 * int exacct_write_header(ac_info_t *, void *, size_t) 256 * 257 * Overview 258 * exacct_write_header() writes the given header buffer to the indicated 259 * vnode, and frees the buffer. 260 * 261 * Return values 262 * The result of the write operation is returned. 263 * 264 * Caller's context 265 * Caller must not hold the ac_lock of the appropriate accounting file 266 * information block (ac_info_t). 267 */ 268 int 269 exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize) 270 { 271 int error; 272 273 error = exacct_vn_write(info, hdr, hdrsize); 274 kmem_free(hdr, hdrsize); 275 return (error); 276 } 277 278 static void 279 exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu, 280 task_usage_t **tu_buf) 281 { 282 task_usage_t *oldtu, *newtu; 283 task_usage_t **prevusage; 284 285 ASSERT(MUTEX_HELD(&tk->tk_usage_lock)); 286 if (getzoneid() != GLOBAL_ZONEID) { 287 prevusage = &tk->tk_zoneusage; 288 } else { 289 prevusage = &tk->tk_prevusage; 290 } 291 if ((oldtu = *prevusage) != NULL) { 292 /* 293 * In case we have any accounting information 294 * saved from the previous interval record. 295 */ 296 newtu = *tu_buf; 297 bcopy(tu, newtu, sizeof (task_usage_t)); 298 tu->tu_minflt -= oldtu->tu_minflt; 299 tu->tu_majflt -= oldtu->tu_majflt; 300 tu->tu_sndmsg -= oldtu->tu_sndmsg; 301 tu->tu_rcvmsg -= oldtu->tu_rcvmsg; 302 tu->tu_ioch -= oldtu->tu_ioch; 303 tu->tu_iblk -= oldtu->tu_iblk; 304 tu->tu_oblk -= oldtu->tu_oblk; 305 tu->tu_vcsw -= oldtu->tu_vcsw; 306 tu->tu_icsw -= oldtu->tu_icsw; 307 tu->tu_nsig -= oldtu->tu_nsig; 308 tu->tu_nswp -= oldtu->tu_nswp; 309 tu->tu_nscl -= oldtu->tu_nscl; 310 tu->tu_utime -= oldtu->tu_utime; 311 tu->tu_stime -= oldtu->tu_stime; 312 313 tu->tu_startsec = oldtu->tu_finishsec; 314 tu->tu_startnsec = oldtu->tu_finishnsec; 315 /* 316 * Copy the data from our temporary storage to the task's 317 * previous interval usage structure for future reference. 318 */ 319 bcopy(newtu, oldtu, sizeof (task_usage_t)); 320 } else { 321 /* 322 * Store current statistics in the task's previous interval 323 * usage structure for future references. 324 */ 325 *prevusage = *tu_buf; 326 bcopy(tu, *prevusage, sizeof (task_usage_t)); 327 *tu_buf = NULL; 328 } 329 } 330 331 static void 332 exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu) 333 { 334 timestruc_t ts; 335 proc_t *p; 336 337 ASSERT(MUTEX_HELD(&pidlock)); 338 339 if ((p = tk->tk_memb_list) == NULL) 340 return; 341 342 /* 343 * exacct_snapshot_task_usage() provides an approximate snapshot of the 344 * usage of the potentially many members of the task. Since we don't 345 * guarantee exactness, we don't acquire the p_lock of any of the member 346 * processes. 347 */ 348 do { 349 mutex_enter(&p->p_lock); 350 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 351 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 352 mutex_exit(&p->p_lock); 353 tu->tu_minflt += p->p_ru.minflt; 354 tu->tu_majflt += p->p_ru.majflt; 355 tu->tu_sndmsg += p->p_ru.msgsnd; 356 tu->tu_rcvmsg += p->p_ru.msgrcv; 357 tu->tu_ioch += p->p_ru.ioch; 358 tu->tu_iblk += p->p_ru.inblock; 359 tu->tu_oblk += p->p_ru.oublock; 360 tu->tu_vcsw += p->p_ru.nvcsw; 361 tu->tu_icsw += p->p_ru.nivcsw; 362 tu->tu_nsig += p->p_ru.nsignals; 363 tu->tu_nswp += p->p_ru.nswap; 364 tu->tu_nscl += p->p_ru.sysc; 365 } while ((p = p->p_tasknext) != tk->tk_memb_list); 366 367 /* 368 * The resource usage accounted for so far will include that 369 * contributed by the task's first process. If this process 370 * came from another task, then its accumulated resource usage 371 * will include a contribution from work performed there. 372 * We must therefore subtract any resource usage that was 373 * inherited with the first process. 374 */ 375 exacct_sub_task_mstate(tu, tk->tk_inherited); 376 377 gethrestime(&ts); 378 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 379 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 380 } 381 382 /* 383 * void exacct_update_task_mstate(proc_t *) 384 * 385 * Overview 386 * exacct_update_task_mstate() updates the task usage; it is intended 387 * to be called from proc_exit(). 388 * 389 * Return values 390 * None. 391 * 392 * Caller's context 393 * p_lock must be held at entry. 394 */ 395 void 396 exacct_update_task_mstate(proc_t *p) 397 { 398 task_usage_t *tu; 399 400 mutex_enter(&p->p_task->tk_usage_lock); 401 tu = p->p_task->tk_usage; 402 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 403 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 404 tu->tu_minflt += p->p_ru.minflt; 405 tu->tu_majflt += p->p_ru.majflt; 406 tu->tu_sndmsg += p->p_ru.msgsnd; 407 tu->tu_rcvmsg += p->p_ru.msgrcv; 408 tu->tu_ioch += p->p_ru.ioch; 409 tu->tu_iblk += p->p_ru.inblock; 410 tu->tu_oblk += p->p_ru.oublock; 411 tu->tu_vcsw += p->p_ru.nvcsw; 412 tu->tu_icsw += p->p_ru.nivcsw; 413 tu->tu_nsig += p->p_ru.nsignals; 414 tu->tu_nswp += p->p_ru.nswap; 415 tu->tu_nscl += p->p_ru.sysc; 416 mutex_exit(&p->p_task->tk_usage_lock); 417 } 418 419 static void 420 exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag) 421 { 422 timestruc_t ts; 423 task_usage_t *tu_buf; 424 425 switch (flag) { 426 case EW_PARTIAL: 427 /* 428 * For partial records we must report the sum of current 429 * accounting statistics with previously accumulated 430 * statistics. 431 */ 432 mutex_enter(&pidlock); 433 mutex_enter(&tk->tk_usage_lock); 434 435 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 436 exacct_snapshot_task_usage(tk, tu); 437 438 mutex_exit(&tk->tk_usage_lock); 439 mutex_exit(&pidlock); 440 break; 441 case EW_INTERVAL: 442 /* 443 * We need to allocate spare task_usage_t buffer before 444 * grabbing pidlock because we might need it later in 445 * exacct_get_interval_task_usage(). 446 */ 447 tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 448 mutex_enter(&pidlock); 449 mutex_enter(&tk->tk_usage_lock); 450 451 /* 452 * For interval records, we deduct the previous microstate 453 * accounting data and cpu usage times from previously saved 454 * results and update the previous task usage structure. 455 */ 456 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 457 exacct_snapshot_task_usage(tk, tu); 458 exacct_get_interval_task_usage(tk, tu, &tu_buf); 459 460 mutex_exit(&tk->tk_usage_lock); 461 mutex_exit(&pidlock); 462 463 if (tu_buf != NULL) 464 kmem_free(tu_buf, sizeof (task_usage_t)); 465 break; 466 case EW_FINAL: 467 /* 468 * For final records, we deduct, from the task's current 469 * usage, any usage that was inherited with the arrival 470 * of a process from a previous task. We then record 471 * the task's finish time. 472 */ 473 mutex_enter(&tk->tk_usage_lock); 474 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 475 exacct_sub_task_mstate(tu, tk->tk_inherited); 476 mutex_exit(&tk->tk_usage_lock); 477 478 gethrestime(&ts); 479 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 480 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 481 482 break; 483 } 484 } 485 486 static int 487 exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record, 488 int res) 489 { 490 int attached = 1; 491 492 switch (res) { 493 case AC_TASK_TASKID: 494 (void) ea_attach_item(record, &tk->tk_tkid, 495 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID); 496 break; 497 case AC_TASK_PROJID: 498 (void) ea_attach_item(record, &tk->tk_proj->kpj_id, 499 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID); 500 break; 501 case AC_TASK_CPU: { 502 timestruc_t ts; 503 uint64_t ui; 504 505 hrt2ts(tu->tu_stime, &ts); 506 ui = ts.tv_sec; 507 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 508 EXT_UINT64 | EXD_TASK_CPU_SYS_SEC); 509 ui = ts.tv_nsec; 510 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 511 EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC); 512 513 hrt2ts(tu->tu_utime, &ts); 514 ui = ts.tv_sec; 515 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 516 EXT_UINT64 | EXD_TASK_CPU_USER_SEC); 517 ui = ts.tv_nsec; 518 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 519 EXT_UINT64 | EXD_TASK_CPU_USER_NSEC); 520 } 521 break; 522 case AC_TASK_TIME: 523 (void) ea_attach_item(record, &tu->tu_startsec, 524 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC); 525 (void) ea_attach_item(record, &tu->tu_startnsec, 526 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC); 527 (void) ea_attach_item(record, &tu->tu_finishsec, 528 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC); 529 (void) ea_attach_item(record, &tu->tu_finishnsec, 530 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC); 531 break; 532 case AC_TASK_HOSTNAME: 533 (void) ea_attach_item(record, tk->tk_zone->zone_nodename, 534 strlen(tk->tk_zone->zone_nodename) + 1, 535 EXT_STRING | EXD_TASK_HOSTNAME); 536 break; 537 case AC_TASK_MICROSTATE: 538 (void) ea_attach_item(record, &tu->tu_majflt, 539 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR); 540 (void) ea_attach_item(record, &tu->tu_minflt, 541 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR); 542 (void) ea_attach_item(record, &tu->tu_sndmsg, 543 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND); 544 (void) ea_attach_item(record, &tu->tu_rcvmsg, 545 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV); 546 (void) ea_attach_item(record, &tu->tu_iblk, 547 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN); 548 (void) ea_attach_item(record, &tu->tu_oblk, 549 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT); 550 (void) ea_attach_item(record, &tu->tu_ioch, 551 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR); 552 (void) ea_attach_item(record, &tu->tu_vcsw, 553 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL); 554 (void) ea_attach_item(record, &tu->tu_icsw, 555 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV); 556 (void) ea_attach_item(record, &tu->tu_nsig, 557 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS); 558 (void) ea_attach_item(record, &tu->tu_nswp, 559 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS); 560 (void) ea_attach_item(record, &tu->tu_nscl, 561 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS); 562 break; 563 case AC_TASK_ANCTASKID: 564 (void) ea_attach_item(record, &tu->tu_anctaskid, 565 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID); 566 break; 567 case AC_TASK_ZONENAME: 568 (void) ea_attach_item(record, tk->tk_zone->zone_name, 569 strlen(tk->tk_zone->zone_name) + 1, 570 EXT_STRING | EXD_TASK_ZONENAME); 571 break; 572 default: 573 attached = 0; 574 } 575 return (attached); 576 } 577 578 static ea_object_t * 579 exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask, 580 ea_catalog_t record_type) 581 { 582 int res, count; 583 ea_object_t *record; 584 585 /* 586 * Assemble usage values into group. 587 */ 588 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 589 for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++) 590 if (BT_TEST(mask, res)) 591 count += exacct_attach_task_item(tk, tu, record, res); 592 if (count == 0) { 593 ea_free_object(record, EUP_ALLOC); 594 record = NULL; 595 } 596 return (record); 597 } 598 599 /* 600 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *, 601 * size_t, size_t *), void *, size_t, size_t *, int) 602 * 603 * Overview 604 * exacct_assemble_task_usage() builds the packed exacct buffer for the 605 * indicated task, executes the given callback function, and free the packed 606 * buffer. 607 * 608 * Return values 609 * Returns 0 on success; otherwise the appropriate error code is returned. 610 * 611 * Caller's context 612 * Suitable for KM_SLEEP allocations. 613 */ 614 int 615 exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk, 616 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 617 void *ubuf, size_t ubufsize, size_t *actual, int flag) 618 { 619 ulong_t mask[AC_MASK_SZ]; 620 ea_object_t *task_record; 621 ea_catalog_t record_type; 622 task_usage_t *tu; 623 void *buf; 624 size_t bufsize; 625 int ret; 626 627 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL); 628 629 mutex_enter(&ac_task->ac_lock); 630 if (ac_task->ac_state == AC_OFF) { 631 mutex_exit(&ac_task->ac_lock); 632 return (ENOTACTIVE); 633 } 634 bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ); 635 mutex_exit(&ac_task->ac_lock); 636 637 switch (flag) { 638 case EW_FINAL: 639 record_type = EXD_GROUP_TASK; 640 break; 641 case EW_PARTIAL: 642 record_type = EXD_GROUP_TASK_PARTIAL; 643 break; 644 case EW_INTERVAL: 645 record_type = EXD_GROUP_TASK_INTERVAL; 646 break; 647 } 648 649 /* 650 * Calculate task usage and assemble it into the task record. 651 */ 652 tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 653 exacct_calculate_task_usage(tk, tu, flag); 654 task_record = exacct_assemble_task_record(tk, tu, mask, record_type); 655 if (task_record == NULL) { 656 /* 657 * The current configuration of the accounting system has 658 * resulted in records with no data; accordingly, we don't write 659 * these, but we return success. 660 */ 661 kmem_free(tu, sizeof (task_usage_t)); 662 return (0); 663 } 664 665 /* 666 * Pack object into buffer and run callback on it. 667 */ 668 bufsize = ea_pack_object(task_record, NULL, 0); 669 buf = kmem_alloc(bufsize, KM_SLEEP); 670 (void) ea_pack_object(task_record, buf, bufsize); 671 ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual); 672 673 /* 674 * Free all previously allocated structures. 675 */ 676 kmem_free(buf, bufsize); 677 ea_free_object(task_record, EUP_ALLOC); 678 kmem_free(tu, sizeof (task_usage_t)); 679 return (ret); 680 } 681 682 /* 683 * void exacct_commit_task(void *) 684 * 685 * Overview 686 * exacct_commit_task() calculates the final usage for a task, updating the 687 * task usage if task accounting is active, and writing a task record if task 688 * accounting is active. exacct_commit_task() is intended for being called 689 * from a task queue (taskq_t). 690 * 691 * Return values 692 * None. 693 * 694 * Caller's context 695 * Suitable for KM_SLEEP allocations. 696 */ 697 698 void 699 exacct_commit_task(void *arg) 700 { 701 task_t *tk = (task_t *)arg; 702 size_t size; 703 zone_t *zone = tk->tk_zone; 704 struct exacct_globals *acg; 705 706 ASSERT(tk != task0p); 707 ASSERT(tk->tk_memb_list == NULL); 708 709 /* 710 * Don't do any extra work if the acctctl module isn't loaded. 711 */ 712 if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) { 713 acg = zone_getspecific(exacct_zone_key, zone); 714 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 715 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 716 if (tk->tk_zone != global_zone) { 717 acg = zone_getspecific(exacct_zone_key, global_zone); 718 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 719 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 720 } 721 } 722 /* 723 * Release associated project and finalize task. 724 */ 725 task_end(tk); 726 } 727 728 static int 729 exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res) 730 { 731 int attached = 1; 732 733 switch (res) { 734 case AC_PROC_PID: 735 (void) ea_attach_item(record, &pu->pu_pid, 736 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID); 737 break; 738 case AC_PROC_UID: 739 (void) ea_attach_item(record, &pu->pu_ruid, 740 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID); 741 break; 742 case AC_PROC_FLAG: 743 (void) ea_attach_item(record, &pu->pu_acflag, 744 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS); 745 break; 746 case AC_PROC_GID: 747 (void) ea_attach_item(record, &pu->pu_rgid, 748 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID); 749 break; 750 case AC_PROC_PROJID: 751 (void) ea_attach_item(record, &pu->pu_projid, 752 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID); 753 break; 754 case AC_PROC_TASKID: 755 (void) ea_attach_item(record, &pu->pu_taskid, 756 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID); 757 break; 758 case AC_PROC_CPU: 759 (void) ea_attach_item(record, &pu->pu_utimesec, 760 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC); 761 (void) ea_attach_item(record, &pu->pu_utimensec, 762 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC); 763 (void) ea_attach_item(record, &pu->pu_stimesec, 764 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC); 765 (void) ea_attach_item(record, &pu->pu_stimensec, 766 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC); 767 break; 768 case AC_PROC_TIME: 769 (void) ea_attach_item(record, &pu->pu_startsec, 770 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC); 771 (void) ea_attach_item(record, &pu->pu_startnsec, 772 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC); 773 (void) ea_attach_item(record, &pu->pu_finishsec, 774 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC); 775 (void) ea_attach_item(record, &pu->pu_finishnsec, 776 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC); 777 break; 778 case AC_PROC_COMMAND: 779 (void) ea_attach_item(record, pu->pu_command, 780 strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND); 781 break; 782 case AC_PROC_HOSTNAME: 783 (void) ea_attach_item(record, pu->pu_nodename, 784 strlen(pu->pu_nodename) + 1, 785 EXT_STRING | EXD_PROC_HOSTNAME); 786 break; 787 case AC_PROC_TTY: 788 (void) ea_attach_item(record, &pu->pu_major, 789 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR); 790 (void) ea_attach_item(record, &pu->pu_minor, 791 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR); 792 break; 793 case AC_PROC_MICROSTATE: 794 (void) ea_attach_item(record, &pu->pu_majflt, 795 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR); 796 (void) ea_attach_item(record, &pu->pu_minflt, 797 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR); 798 (void) ea_attach_item(record, &pu->pu_sndmsg, 799 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND); 800 (void) ea_attach_item(record, &pu->pu_rcvmsg, 801 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV); 802 (void) ea_attach_item(record, &pu->pu_iblk, 803 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN); 804 (void) ea_attach_item(record, &pu->pu_oblk, 805 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT); 806 (void) ea_attach_item(record, &pu->pu_ioch, 807 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR); 808 (void) ea_attach_item(record, &pu->pu_vcsw, 809 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL); 810 (void) ea_attach_item(record, &pu->pu_icsw, 811 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV); 812 (void) ea_attach_item(record, &pu->pu_nsig, 813 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS); 814 (void) ea_attach_item(record, &pu->pu_nswp, 815 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS); 816 (void) ea_attach_item(record, &pu->pu_nscl, 817 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS); 818 break; 819 case AC_PROC_ANCPID: 820 (void) ea_attach_item(record, &pu->pu_ancpid, 821 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID); 822 break; 823 case AC_PROC_WAIT_STATUS: 824 (void) ea_attach_item(record, &pu->pu_wstat, 825 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS); 826 break; 827 case AC_PROC_ZONENAME: 828 (void) ea_attach_item(record, pu->pu_zonename, 829 strlen(pu->pu_zonename) + 1, 830 EXT_STRING | EXD_PROC_ZONENAME); 831 break; 832 case AC_PROC_MEM: 833 (void) ea_attach_item(record, &pu->pu_mem_rss_avg, 834 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K); 835 (void) ea_attach_item(record, &pu->pu_mem_rss_max, 836 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K); 837 break; 838 default: 839 attached = 0; 840 } 841 return (attached); 842 } 843 844 static ea_object_t * 845 exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask, 846 ea_catalog_t record_type) 847 { 848 int res, count; 849 ea_object_t *record; 850 851 /* 852 * Assemble usage values into group. 853 */ 854 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 855 for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++) 856 if (BT_TEST(mask, res)) 857 count += exacct_attach_proc_item(pu, record, res); 858 if (count == 0) { 859 ea_free_object(record, EUP_ALLOC); 860 record = NULL; 861 } 862 return (record); 863 } 864 865 /* 866 * The following two routines assume that process's p_lock is held or 867 * exacct_commit_proc has been called from exit() when all lwps are stopped. 868 */ 869 static void 870 exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu) 871 { 872 kthread_t *t; 873 874 ASSERT(MUTEX_HELD(&p->p_lock)); 875 if ((t = p->p_tlist) == NULL) 876 return; 877 878 do { 879 pu->pu_minflt += t->t_lwp->lwp_ru.minflt; 880 pu->pu_majflt += t->t_lwp->lwp_ru.majflt; 881 pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd; 882 pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv; 883 pu->pu_ioch += t->t_lwp->lwp_ru.ioch; 884 pu->pu_iblk += t->t_lwp->lwp_ru.inblock; 885 pu->pu_oblk += t->t_lwp->lwp_ru.oublock; 886 pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw; 887 pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw; 888 pu->pu_nsig += t->t_lwp->lwp_ru.nsignals; 889 pu->pu_nswp += t->t_lwp->lwp_ru.nswap; 890 pu->pu_nscl += t->t_lwp->lwp_ru.sysc; 891 } while ((t = t->t_forw) != p->p_tlist); 892 } 893 894 static void 895 exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu) 896 { 897 pu->pu_minflt = p->p_ru.minflt; 898 pu->pu_majflt = p->p_ru.majflt; 899 pu->pu_sndmsg = p->p_ru.msgsnd; 900 pu->pu_rcvmsg = p->p_ru.msgrcv; 901 pu->pu_ioch = p->p_ru.ioch; 902 pu->pu_iblk = p->p_ru.inblock; 903 pu->pu_oblk = p->p_ru.oublock; 904 pu->pu_vcsw = p->p_ru.nvcsw; 905 pu->pu_icsw = p->p_ru.nivcsw; 906 pu->pu_nsig = p->p_ru.nsignals; 907 pu->pu_nswp = p->p_ru.nswap; 908 pu->pu_nscl = p->p_ru.sysc; 909 } 910 911 void 912 exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask, 913 int flag, int wstat) 914 { 915 timestruc_t ts, ts_run; 916 917 ASSERT(MUTEX_HELD(&p->p_lock)); 918 919 /* 920 * Convert CPU and execution times to sec/nsec format. 921 */ 922 if (BT_TEST(mask, AC_PROC_CPU)) { 923 hrt2ts(mstate_aggr_state(p, LMS_USER), &ts); 924 pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec; 925 pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec; 926 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts); 927 pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec; 928 pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec; 929 } 930 if (BT_TEST(mask, AC_PROC_TIME)) { 931 gethrestime(&ts); 932 pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 933 pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 934 hrt2ts(gethrtime() - p->p_mstart, &ts_run); 935 ts.tv_sec -= ts_run.tv_sec; 936 ts.tv_nsec -= ts_run.tv_nsec; 937 if (ts.tv_nsec < 0) { 938 ts.tv_sec--; 939 if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) { 940 ts.tv_sec++; 941 ts.tv_nsec -= NANOSEC; 942 } 943 } 944 pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec; 945 pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec; 946 } 947 948 pu->pu_pid = p->p_pidp->pid_id; 949 pu->pu_acflag = p->p_user.u_acflag; 950 pu->pu_projid = p->p_task->tk_proj->kpj_id; 951 pu->pu_taskid = p->p_task->tk_tkid; 952 pu->pu_major = getmajor(p->p_sessp->s_dev); 953 pu->pu_minor = getminor(p->p_sessp->s_dev); 954 pu->pu_ancpid = p->p_ancpid; 955 pu->pu_wstat = wstat; 956 /* 957 * Compute average RSS in K. The denominator is the number of 958 * samples: the number of clock ticks plus the initial value. 959 */ 960 pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) * 961 (PAGESIZE / 1024); 962 pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024); 963 964 mutex_enter(&p->p_crlock); 965 pu->pu_ruid = crgetruid(p->p_cred); 966 pu->pu_rgid = crgetrgid(p->p_cred); 967 mutex_exit(&p->p_crlock); 968 969 bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1); 970 bcopy(p->p_zone->zone_name, pu->pu_zonename, 971 strlen(p->p_zone->zone_name) + 1); 972 bcopy(p->p_zone->zone_nodename, pu->pu_nodename, 973 strlen(p->p_zone->zone_nodename) + 1); 974 975 /* 976 * Calculate microstate accounting data for a process that is still 977 * running. Presently, we explicitly collect all of the LWP usage into 978 * the proc usage structure here. 979 */ 980 if (flag & EW_PARTIAL) 981 exacct_calculate_proc_mstate(p, pu); 982 if (flag & EW_FINAL) 983 exacct_copy_proc_mstate(p, pu); 984 } 985 986 /* 987 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void 988 * *, size_t, size_t *), void *, size_t, size_t *) 989 * 990 * Overview 991 * Assemble record with miscellaneous accounting information about the process 992 * and execute the callback on it. It is the callback's job to set "actual" to 993 * the size of record. 994 * 995 * Return values 996 * The result of the callback function, unless the extended process accounting 997 * feature is not active, in which case ENOTACTIVE is returned. 998 * 999 * Caller's context 1000 * Suitable for KM_SLEEP allocations. 1001 */ 1002 int 1003 exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu, 1004 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1005 void *ubuf, size_t ubufsize, size_t *actual, int flag) 1006 { 1007 ulong_t mask[AC_MASK_SZ]; 1008 ea_object_t *proc_record; 1009 ea_catalog_t record_type; 1010 void *buf; 1011 size_t bufsize; 1012 int ret; 1013 1014 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL); 1015 1016 mutex_enter(&ac_proc->ac_lock); 1017 if (ac_proc->ac_state == AC_OFF) { 1018 mutex_exit(&ac_proc->ac_lock); 1019 return (ENOTACTIVE); 1020 } 1021 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1022 mutex_exit(&ac_proc->ac_lock); 1023 1024 switch (flag) { 1025 case EW_FINAL: 1026 record_type = EXD_GROUP_PROC; 1027 break; 1028 case EW_PARTIAL: 1029 record_type = EXD_GROUP_PROC_PARTIAL; 1030 break; 1031 } 1032 1033 proc_record = exacct_assemble_proc_record(pu, mask, record_type); 1034 if (proc_record == NULL) 1035 return (0); 1036 1037 /* 1038 * Pack object into buffer and pass to callback. 1039 */ 1040 bufsize = ea_pack_object(proc_record, NULL, 0); 1041 buf = kmem_alloc(bufsize, KM_SLEEP); 1042 (void) ea_pack_object(proc_record, buf, bufsize); 1043 1044 ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual); 1045 1046 /* 1047 * Free all previously allocations. 1048 */ 1049 kmem_free(buf, bufsize); 1050 ea_free_object(proc_record, EUP_ALLOC); 1051 return (ret); 1052 } 1053 1054 /* 1055 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t, 1056 * size_t *) 1057 * 1058 * Overview 1059 * exacct_commit_callback() writes the indicated buffer to the indicated 1060 * extended accounting file. 1061 * 1062 * Return values 1063 * The result of the write operation is returned. "actual" is updated to 1064 * contain the number of bytes actually written. 1065 * 1066 * Caller's context 1067 * Suitable for a vn_rdwr() operation. 1068 */ 1069 /*ARGSUSED*/ 1070 int 1071 exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize, 1072 void *buf, size_t bufsize, size_t *actual) 1073 { 1074 int error = 0; 1075 1076 *actual = 0; 1077 if ((error = exacct_vn_write(info, buf, bufsize)) == 0) 1078 *actual = bufsize; 1079 return (error); 1080 } 1081 1082 static void 1083 exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat) 1084 { 1085 size_t size; 1086 proc_usage_t *pu; 1087 ulong_t mask[AC_MASK_SZ]; 1088 1089 mutex_enter(&ac_proc->ac_lock); 1090 if (ac_proc->ac_state == AC_ON) { 1091 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1092 mutex_exit(&ac_proc->ac_lock); 1093 } else { 1094 mutex_exit(&ac_proc->ac_lock); 1095 return; 1096 } 1097 1098 mutex_enter(&p->p_lock); 1099 size = strlen(p->p_user.u_comm) + 1; 1100 mutex_exit(&p->p_lock); 1101 1102 pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP); 1103 pu->pu_command = kmem_alloc(size, KM_SLEEP); 1104 mutex_enter(&p->p_lock); 1105 exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat); 1106 mutex_exit(&p->p_lock); 1107 1108 (void) exacct_assemble_proc_usage(ac_proc, pu, 1109 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 1110 1111 kmem_free(pu->pu_command, strlen(pu->pu_command) + 1); 1112 kmem_free(pu, sizeof (proc_usage_t)); 1113 } 1114 1115 /* 1116 * void exacct_commit_proc(proc_t *, int) 1117 * 1118 * Overview 1119 * exacct_commit_proc() calculates the final usage for a process, updating the 1120 * task usage if task accounting is active, and writing a process record if 1121 * process accounting is active. exacct_commit_proc() is intended for being 1122 * called from proc_exit(). 1123 * 1124 * Return values 1125 * None. 1126 * 1127 * Caller's context 1128 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry. 1129 */ 1130 void 1131 exacct_commit_proc(proc_t *p, int wstat) 1132 { 1133 zone_t *zone = p->p_zone; 1134 struct exacct_globals *acg, *gacg = NULL; 1135 1136 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1137 /* 1138 * acctctl module not loaded. Nothing to do. 1139 */ 1140 return; 1141 } 1142 acg = zone_getspecific(exacct_zone_key, zone); 1143 exacct_do_commit_proc(&acg->ac_proc, p, wstat); 1144 if (zone != global_zone) { 1145 gacg = zone_getspecific(exacct_zone_key, global_zone); 1146 exacct_do_commit_proc(&gacg->ac_proc, p, wstat); 1147 } 1148 } 1149 1150 static int 1151 exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res) 1152 { 1153 int attached = 1; 1154 1155 switch (res) { 1156 case AC_FLOW_SADDR: 1157 if (fu->fu_isv4) { 1158 (void) ea_attach_item(record, &fu->fu_saddr[3], 1159 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR); 1160 } else { 1161 (void) ea_attach_item(record, &fu->fu_saddr, 1162 sizeof (fu->fu_saddr), EXT_RAW | 1163 EXD_FLOW_V6SADDR); 1164 } 1165 break; 1166 case AC_FLOW_DADDR: 1167 if (fu->fu_isv4) { 1168 (void) ea_attach_item(record, &fu->fu_daddr[3], 1169 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR); 1170 } else { 1171 (void) ea_attach_item(record, &fu->fu_daddr, 1172 sizeof (fu->fu_daddr), EXT_RAW | 1173 EXD_FLOW_V6DADDR); 1174 } 1175 break; 1176 case AC_FLOW_SPORT: 1177 (void) ea_attach_item(record, &fu->fu_sport, 1178 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT); 1179 break; 1180 case AC_FLOW_DPORT: 1181 (void) ea_attach_item(record, &fu->fu_dport, 1182 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT); 1183 break; 1184 case AC_FLOW_PROTOCOL: 1185 (void) ea_attach_item(record, &fu->fu_protocol, 1186 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL); 1187 break; 1188 case AC_FLOW_DSFIELD: 1189 (void) ea_attach_item(record, &fu->fu_dsfield, 1190 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD); 1191 break; 1192 case AC_FLOW_CTIME: 1193 (void) ea_attach_item(record, &fu->fu_ctime, 1194 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME); 1195 break; 1196 case AC_FLOW_LSEEN: 1197 (void) ea_attach_item(record, &fu->fu_lseen, 1198 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN); 1199 break; 1200 case AC_FLOW_NBYTES: 1201 (void) ea_attach_item(record, &fu->fu_nbytes, 1202 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES); 1203 break; 1204 case AC_FLOW_NPKTS: 1205 (void) ea_attach_item(record, &fu->fu_npackets, 1206 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS); 1207 break; 1208 case AC_FLOW_PROJID: 1209 if (fu->fu_projid >= 0) { 1210 (void) ea_attach_item(record, &fu->fu_projid, 1211 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID); 1212 } 1213 break; 1214 case AC_FLOW_UID: 1215 if (fu->fu_userid >= 0) { 1216 (void) ea_attach_item(record, &fu->fu_userid, 1217 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID); 1218 } 1219 break; 1220 case AC_FLOW_ANAME: 1221 (void) ea_attach_item(record, fu->fu_aname, 1222 strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME); 1223 break; 1224 default: 1225 attached = 0; 1226 } 1227 return (attached); 1228 } 1229 1230 static ea_object_t * 1231 exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask, 1232 ea_catalog_t record_type) 1233 { 1234 int res, count; 1235 ea_object_t *record; 1236 1237 /* 1238 * Assemble usage values into group. 1239 */ 1240 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1241 for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++) 1242 if (BT_TEST(mask, res)) 1243 count += exacct_attach_flow_item(fu, record, res); 1244 if (count == 0) { 1245 ea_free_object(record, EUP_ALLOC); 1246 record = NULL; 1247 } 1248 return (record); 1249 } 1250 1251 int 1252 exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu, 1253 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1254 void *ubuf, size_t ubufsize, size_t *actual) 1255 { 1256 ulong_t mask[AC_MASK_SZ]; 1257 ea_object_t *flow_usage; 1258 ea_catalog_t record_type; 1259 void *buf; 1260 size_t bufsize; 1261 int ret; 1262 1263 mutex_enter(&ac_flow->ac_lock); 1264 if (ac_flow->ac_state == AC_OFF) { 1265 mutex_exit(&ac_flow->ac_lock); 1266 return (ENOTACTIVE); 1267 } 1268 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1269 mutex_exit(&ac_flow->ac_lock); 1270 1271 record_type = EXD_GROUP_FLOW; 1272 1273 flow_usage = exacct_assemble_flow_record(fu, mask, record_type); 1274 if (flow_usage == NULL) { 1275 return (0); 1276 } 1277 1278 /* 1279 * Pack object into buffer and pass to callback. 1280 */ 1281 bufsize = ea_pack_object(flow_usage, NULL, 0); 1282 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1283 if (buf == NULL) { 1284 return (ENOMEM); 1285 } 1286 1287 (void) ea_pack_object(flow_usage, buf, bufsize); 1288 1289 ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual); 1290 1291 /* 1292 * Free all previously allocations. 1293 */ 1294 kmem_free(buf, bufsize); 1295 ea_free_object(flow_usage, EUP_ALLOC); 1296 return (ret); 1297 } 1298 1299 void 1300 exacct_commit_flow(void *arg) 1301 { 1302 flow_usage_t *f = (flow_usage_t *)arg; 1303 size_t size; 1304 ulong_t mask[AC_MASK_SZ]; 1305 struct exacct_globals *acg; 1306 ac_info_t *ac_flow; 1307 1308 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1309 /* 1310 * acctctl module not loaded. Nothing to do. 1311 */ 1312 return; 1313 } 1314 1315 /* 1316 * Even though each zone nominally has its own flow accounting settings 1317 * (ac_flow), these are only maintained by and for the global zone. 1318 * 1319 * If this were to change in the future, this function should grow a 1320 * second zoneid (or zone) argument, and use the corresponding zone's 1321 * settings rather than always using those of the global zone. 1322 */ 1323 acg = zone_getspecific(exacct_zone_key, global_zone); 1324 ac_flow = &acg->ac_flow; 1325 1326 mutex_enter(&ac_flow->ac_lock); 1327 if (ac_flow->ac_state == AC_OFF) { 1328 mutex_exit(&ac_flow->ac_lock); 1329 return; 1330 } 1331 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1332 mutex_exit(&ac_flow->ac_lock); 1333 1334 (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback, 1335 NULL, 0, &size); 1336 } 1337 1338 /* 1339 * int exacct_tag_task(task_t *, void *, size_t, int) 1340 * 1341 * Overview 1342 * exacct_tag_task() provides the exacct record construction and writing 1343 * support required by putacct(2) for task entities. 1344 * 1345 * Return values 1346 * The result of the write operation is returned, unless the extended 1347 * accounting facility is not active, in which case ENOTACTIVE is returned. 1348 * 1349 * Caller's context 1350 * Suitable for KM_SLEEP allocations. 1351 */ 1352 int 1353 exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz, 1354 int flags) 1355 { 1356 int error = 0; 1357 void *buf; 1358 size_t bufsize; 1359 ea_catalog_t cat; 1360 ea_object_t *tag; 1361 1362 mutex_enter(&ac_task->ac_lock); 1363 if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) { 1364 mutex_exit(&ac_task->ac_lock); 1365 return (ENOTACTIVE); 1366 } 1367 mutex_exit(&ac_task->ac_lock); 1368 1369 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG); 1370 (void) ea_attach_item(tag, &tk->tk_tkid, 0, 1371 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1372 (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0, 1373 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1374 if (flags == EP_RAW) 1375 cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG; 1376 else 1377 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG; 1378 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1379 1380 bufsize = ea_pack_object(tag, NULL, 0); 1381 buf = kmem_alloc(bufsize, KM_SLEEP); 1382 (void) ea_pack_object(tag, buf, bufsize); 1383 error = exacct_vn_write(ac_task, buf, bufsize); 1384 kmem_free(buf, bufsize); 1385 ea_free_object(tag, EUP_ALLOC); 1386 return (error); 1387 } 1388 1389 /* 1390 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *) 1391 * 1392 * Overview 1393 * exacct_tag_proc() provides the exacct record construction and writing 1394 * support required by putacct(2) for processes. 1395 * 1396 * Return values 1397 * The result of the write operation is returned, unless the extended 1398 * accounting facility is not active, in which case ENOTACTIVE is returned. 1399 * 1400 * Caller's context 1401 * Suitable for KM_SLEEP allocations. 1402 */ 1403 int 1404 exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf, 1405 size_t ubufsz, int flags, const char *hostname) 1406 { 1407 int error = 0; 1408 void *buf; 1409 size_t bufsize; 1410 ea_catalog_t cat; 1411 ea_object_t *tag; 1412 1413 mutex_enter(&ac_proc->ac_lock); 1414 if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) { 1415 mutex_exit(&ac_proc->ac_lock); 1416 return (ENOTACTIVE); 1417 } 1418 mutex_exit(&ac_proc->ac_lock); 1419 1420 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG); 1421 (void) ea_attach_item(tag, &pid, sizeof (uint32_t), 1422 EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID); 1423 (void) ea_attach_item(tag, &tkid, 0, 1424 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1425 (void) ea_attach_item(tag, (void *)hostname, 0, 1426 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1427 if (flags == EP_RAW) 1428 cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG; 1429 else 1430 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG; 1431 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1432 1433 bufsize = ea_pack_object(tag, NULL, 0); 1434 buf = kmem_alloc(bufsize, KM_SLEEP); 1435 (void) ea_pack_object(tag, buf, bufsize); 1436 error = exacct_vn_write(ac_proc, buf, bufsize); 1437 kmem_free(buf, bufsize); 1438 ea_free_object(tag, EUP_ALLOC); 1439 return (error); 1440 } 1441 1442 /* 1443 * void exacct_init(void) 1444 * 1445 * Overview 1446 * Initialized the extended accounting subsystem. 1447 * 1448 * Return values 1449 * None. 1450 * 1451 * Caller's context 1452 * Suitable for KM_SLEEP allocations. 1453 */ 1454 void 1455 exacct_init() 1456 { 1457 exacct_queue = system_taskq; 1458 exacct_object_cache = kmem_cache_create("exacct_object_cache", 1459 sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1460 } 1461 1462 /* 1463 * exacct_snapshot_proc_mstate() copies a process's microstate accounting data 1464 * and resource usage counters into a given task_usage_t. It differs from 1465 * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t, 1466 * b) p_lock will have been acquired earlier in the call path and c) we 1467 * are here including the process's user and system times. 1468 */ 1469 static void 1470 exacct_snapshot_proc_mstate(proc_t *p, task_usage_t *tu) 1471 { 1472 tu->tu_utime = mstate_aggr_state(p, LMS_USER); 1473 tu->tu_stime = mstate_aggr_state(p, LMS_SYSTEM); 1474 tu->tu_minflt = p->p_ru.minflt; 1475 tu->tu_majflt = p->p_ru.majflt; 1476 tu->tu_sndmsg = p->p_ru.msgsnd; 1477 tu->tu_rcvmsg = p->p_ru.msgrcv; 1478 tu->tu_ioch = p->p_ru.ioch; 1479 tu->tu_iblk = p->p_ru.inblock; 1480 tu->tu_oblk = p->p_ru.oublock; 1481 tu->tu_vcsw = p->p_ru.nvcsw; 1482 tu->tu_icsw = p->p_ru.nivcsw; 1483 tu->tu_nsig = p->p_ru.nsignals; 1484 tu->tu_nswp = p->p_ru.nswap; 1485 tu->tu_nscl = p->p_ru.sysc; 1486 } 1487 1488 /* 1489 * void exacct_move_mstate(proc_t *, task_t *, task_t *) 1490 * 1491 * Overview 1492 * exacct_move_mstate() is called by task_change() and accounts for 1493 * a process's resource usage when it is moved from one task to another. 1494 * 1495 * The process's usage at this point is recorded in the new task so 1496 * that it can be excluded from the calculation of resources consumed 1497 * by that task. 1498 * 1499 * The resource usage inherited by the new task is also added to the 1500 * aggregate maintained by the old task for processes that have exited. 1501 * 1502 * Return values 1503 * None. 1504 * 1505 * Caller's context 1506 * pidlock and p_lock held across exacct_move_mstate(). 1507 */ 1508 void 1509 exacct_move_mstate(proc_t *p, task_t *oldtk, task_t *newtk) 1510 { 1511 task_usage_t tu; 1512 1513 /* Take a snapshot of this process's mstate and RU counters */ 1514 exacct_snapshot_proc_mstate(p, &tu); 1515 1516 /* 1517 * Use the snapshot to increment the aggregate usage of the old 1518 * task, and the inherited usage of the new one. 1519 */ 1520 mutex_enter(&oldtk->tk_usage_lock); 1521 exacct_add_task_mstate(oldtk->tk_usage, &tu); 1522 mutex_exit(&oldtk->tk_usage_lock); 1523 mutex_enter(&newtk->tk_usage_lock); 1524 exacct_add_task_mstate(newtk->tk_inherited, &tu); 1525 mutex_exit(&newtk->tk_usage_lock); 1526 } 1527