1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/exacct.h> 30 #include <sys/exacct_catalog.h> 31 #include <sys/disp.h> 32 #include <sys/task.h> 33 #include <sys/proc.h> 34 #include <sys/cmn_err.h> 35 #include <sys/kmem.h> 36 #include <sys/project.h> 37 #include <sys/systm.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/acctctl.h> 41 #include <sys/time.h> 42 #include <sys/utsname.h> 43 #include <sys/session.h> 44 #include <sys/sysmacros.h> 45 #include <sys/bitmap.h> 46 #include <sys/msacct.h> 47 48 /* 49 * exacct usage and recording routines 50 * 51 * wracct(2), getacct(2), and the records written at process or task 52 * termination are constructed using the exacct_assemble_[task,proc]_usage() 53 * functions, which take a callback that takes the appropriate action on 54 * the packed exacct record for the task or process. For the process-related 55 * actions, we partition the routines such that the data collecting component 56 * can be performed while holding p_lock, and all sleeping or blocking 57 * operations can be performed without acquiring p_lock. 58 * 59 * putacct(2), which allows an application to construct a customized record 60 * associated with an existing process or task, has its own entry points: 61 * exacct_tag_task() and exacct_tag_proc(). 62 */ 63 64 taskq_t *exacct_queue; 65 kmem_cache_t *exacct_object_cache; 66 67 zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED; 68 69 static const uint32_t exacct_version = EXACCT_VERSION; 70 static const char exacct_header[] = "exacct"; 71 static const char exacct_creator[] = "SunOS"; 72 73 ea_object_t * 74 ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz) 75 { 76 ea_object_t *item; 77 78 item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 79 bzero(item, sizeof (ea_object_t)); 80 (void) ea_set_item(item, catalog, buf, bufsz); 81 return (item); 82 } 83 84 ea_object_t * 85 ea_alloc_group(ea_catalog_t catalog) 86 { 87 ea_object_t *group; 88 89 group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); 90 bzero(group, sizeof (ea_object_t)); 91 (void) ea_set_group(group, catalog); 92 return (group); 93 } 94 95 ea_object_t * 96 ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog) 97 { 98 ea_object_t *item; 99 100 item = ea_alloc_item(catalog, buf, bufsz); 101 (void) ea_attach_to_group(grp, item); 102 return (item); 103 } 104 105 /* 106 * exacct_vn_write() is a vn_rdwr wrapper that protects us from corrupting the 107 * accounting file in case of an I/O or filesystem error. acctctl() prevents 108 * the two accounting vnodes from being equal, and the appropriate ac_lock is 109 * held across the call, so we're single threaded through this code for each 110 * file. 111 */ 112 static int 113 exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize) 114 { 115 int error = 0; 116 ssize_t resid; 117 struct vattr va; 118 119 if (info == NULL) 120 return (0); 121 122 mutex_enter(&info->ac_lock); 123 124 /* 125 * Don't do anything unless accounting file is set. 126 */ 127 if (info->ac_vnode == NULL) { 128 mutex_exit(&info->ac_lock); 129 return (0); 130 } 131 132 /* 133 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting 134 * the present accounting file. 135 */ 136 va.va_mask = AT_SIZE; 137 error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred); 138 if (error == 0) { 139 error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf, 140 bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T, 141 kcred, &resid); 142 if (error) { 143 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 144 } else if (resid != 0) { 145 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); 146 error = ENOSPC; 147 } 148 } 149 mutex_exit(&info->ac_lock); 150 151 return (error); 152 } 153 154 /* 155 * void *exacct_create_header(size_t *) 156 * 157 * Overview 158 * exacct_create_header() constructs an exacct file header identifying the 159 * accounting file as the output of the kernel. exacct_create_header() and 160 * the static write_header() and verify_header() routines in libexacct must 161 * remain synchronized. 162 * 163 * Return values 164 * A pointer to a packed exacct buffer containing the appropriate header is 165 * returned; the size of the buffer is placed in the location indicated by 166 * sizep. 167 * 168 * Caller's context 169 * Suitable for KM_SLEEP allocations. 170 */ 171 void * 172 exacct_create_header(size_t *sizep) 173 { 174 ea_object_t *hdr_grp; 175 uint32_t bskip; 176 void *buf; 177 size_t bufsize; 178 179 hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER); 180 (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0, 181 EXT_UINT32 | EXC_DEFAULT | EXD_VERSION); 182 (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0, 183 EXT_STRING | EXC_DEFAULT | EXD_FILETYPE); 184 (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0, 185 EXT_STRING | EXC_DEFAULT | EXD_CREATOR); 186 (void) ea_attach_item(hdr_grp, uts_nodename(), 0, 187 EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME); 188 189 bufsize = ea_pack_object(hdr_grp, NULL, 0); 190 buf = kmem_alloc(bufsize, KM_SLEEP); 191 (void) ea_pack_object(hdr_grp, buf, bufsize); 192 ea_free_object(hdr_grp, EUP_ALLOC); 193 194 /* 195 * To prevent reading the header when reading the file backwards, 196 * set the large backskip of the header group to 0 (last 4 bytes). 197 */ 198 bskip = 0; 199 exacct_order32(&bskip); 200 bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip), 201 sizeof (bskip)); 202 203 *sizep = bufsize; 204 return (buf); 205 } 206 207 /* 208 * int exacct_write_header(ac_info_t *, void *, size_t) 209 * 210 * Overview 211 * exacct_write_header() writes the given header buffer to the indicated 212 * vnode, and frees the buffer. 213 * 214 * Return values 215 * The result of the write operation is returned. 216 * 217 * Caller's context 218 * Caller must not hold the ac_lock of the appropriate accounting file 219 * information block (ac_info_t). 220 */ 221 int 222 exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize) 223 { 224 int error; 225 226 error = exacct_vn_write(info, hdr, hdrsize); 227 kmem_free(hdr, hdrsize); 228 return (error); 229 } 230 231 static void 232 exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu, 233 task_usage_t **tu_buf) 234 { 235 task_usage_t *oldtu, *newtu; 236 task_usage_t **prevusage; 237 238 ASSERT(MUTEX_HELD(&tk->tk_usage_lock)); 239 if (getzoneid() != GLOBAL_ZONEID) { 240 prevusage = &tk->tk_zoneusage; 241 } else { 242 prevusage = &tk->tk_prevusage; 243 } 244 if ((oldtu = *prevusage) != NULL) { 245 /* 246 * In case we have any accounting information 247 * saved from the previous interval record. 248 */ 249 newtu = *tu_buf; 250 bcopy(tu, newtu, sizeof (task_usage_t)); 251 tu->tu_minflt -= oldtu->tu_minflt; 252 tu->tu_majflt -= oldtu->tu_majflt; 253 tu->tu_sndmsg -= oldtu->tu_sndmsg; 254 tu->tu_rcvmsg -= oldtu->tu_rcvmsg; 255 tu->tu_ioch -= oldtu->tu_ioch; 256 tu->tu_iblk -= oldtu->tu_iblk; 257 tu->tu_oblk -= oldtu->tu_oblk; 258 tu->tu_vcsw -= oldtu->tu_vcsw; 259 tu->tu_icsw -= oldtu->tu_icsw; 260 tu->tu_nsig -= oldtu->tu_nsig; 261 tu->tu_nswp -= oldtu->tu_nswp; 262 tu->tu_nscl -= oldtu->tu_nscl; 263 tu->tu_utime -= oldtu->tu_utime; 264 tu->tu_stime -= oldtu->tu_stime; 265 266 tu->tu_startsec = oldtu->tu_finishsec; 267 tu->tu_startnsec = oldtu->tu_finishnsec; 268 /* 269 * Copy the data from our temporary storage to the task's 270 * previous interval usage structure for future reference. 271 */ 272 bcopy(newtu, oldtu, sizeof (task_usage_t)); 273 } else { 274 /* 275 * Store current statistics in the task's previous interval 276 * usage structure for future references. 277 */ 278 *prevusage = *tu_buf; 279 bcopy(tu, *prevusage, sizeof (task_usage_t)); 280 *tu_buf = NULL; 281 } 282 } 283 284 static void 285 exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu) 286 { 287 timestruc_t ts; 288 proc_t *p; 289 290 ASSERT(MUTEX_HELD(&pidlock)); 291 292 if ((p = tk->tk_memb_list) == NULL) 293 return; 294 295 /* 296 * exacct_snapshot_task_usage() provides an approximate snapshot of the 297 * usage of the potentially many members of the task. Since we don't 298 * guarantee exactness, we don't acquire the p_lock of any of the member 299 * processes. 300 */ 301 do { 302 mutex_enter(&p->p_lock); 303 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 304 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 305 mutex_exit(&p->p_lock); 306 tu->tu_minflt += p->p_ru.minflt; 307 tu->tu_majflt += p->p_ru.majflt; 308 tu->tu_sndmsg += p->p_ru.msgsnd; 309 tu->tu_rcvmsg += p->p_ru.msgrcv; 310 tu->tu_ioch += p->p_ru.ioch; 311 tu->tu_iblk += p->p_ru.inblock; 312 tu->tu_oblk += p->p_ru.oublock; 313 tu->tu_vcsw += p->p_ru.nvcsw; 314 tu->tu_icsw += p->p_ru.nivcsw; 315 tu->tu_nsig += p->p_ru.nsignals; 316 tu->tu_nswp += p->p_ru.nswap; 317 tu->tu_nscl += p->p_ru.sysc; 318 } while ((p = p->p_tasknext) != tk->tk_memb_list); 319 320 gethrestime(&ts); 321 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 322 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 323 } 324 325 /* 326 * exacct_update_task_mstate() updates the task's microstate accounting 327 * statistics with accumulated counters for the exiting process. 328 */ 329 static void 330 exacct_update_task_mstate(proc_t *p) 331 { 332 task_usage_t *tu; 333 334 mutex_enter(&p->p_task->tk_usage_lock); 335 tu = p->p_task->tk_usage; 336 mutex_enter(&p->p_lock); 337 tu->tu_utime += mstate_aggr_state(p, LMS_USER); 338 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); 339 mutex_exit(&p->p_lock); 340 tu->tu_minflt += p->p_ru.minflt; 341 tu->tu_majflt += p->p_ru.majflt; 342 tu->tu_sndmsg += p->p_ru.msgsnd; 343 tu->tu_rcvmsg += p->p_ru.msgrcv; 344 tu->tu_ioch += p->p_ru.ioch; 345 tu->tu_iblk += p->p_ru.inblock; 346 tu->tu_oblk += p->p_ru.oublock; 347 tu->tu_vcsw += p->p_ru.nvcsw; 348 tu->tu_icsw += p->p_ru.nivcsw; 349 tu->tu_nsig += p->p_ru.nsignals; 350 tu->tu_nswp += p->p_ru.nswap; 351 tu->tu_nscl += p->p_ru.sysc; 352 mutex_exit(&p->p_task->tk_usage_lock); 353 } 354 355 static void 356 exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag) 357 { 358 timestruc_t ts; 359 task_usage_t *tu_buf; 360 361 switch (flag) { 362 case EW_PARTIAL: 363 /* 364 * For partial records we must report the sum of current 365 * accounting statistics with previously accumulated 366 * statistics. 367 */ 368 mutex_enter(&pidlock); 369 mutex_enter(&tk->tk_usage_lock); 370 371 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 372 exacct_snapshot_task_usage(tk, tu); 373 374 mutex_exit(&tk->tk_usage_lock); 375 mutex_exit(&pidlock); 376 break; 377 case EW_INTERVAL: 378 /* 379 * We need to allocate spare task_usage_t buffer before 380 * grabbing pidlock because we might need it later in 381 * exacct_get_interval_task_usage(). 382 */ 383 tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 384 mutex_enter(&pidlock); 385 mutex_enter(&tk->tk_usage_lock); 386 387 /* 388 * For interval records, we deduct the previous microstate 389 * accounting data and cpu usage times from previously saved 390 * results and update the previous task usage structure. 391 */ 392 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 393 exacct_snapshot_task_usage(tk, tu); 394 exacct_get_interval_task_usage(tk, tu, &tu_buf); 395 396 mutex_exit(&tk->tk_usage_lock); 397 mutex_exit(&pidlock); 398 399 if (tu_buf != NULL) 400 kmem_free(tu_buf, sizeof (task_usage_t)); 401 break; 402 case EW_FINAL: 403 /* 404 * For final records, we only have to record task's finish 405 * time because all other stuff has been calculated already. 406 */ 407 mutex_enter(&tk->tk_usage_lock); 408 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); 409 mutex_exit(&tk->tk_usage_lock); 410 411 gethrestime(&ts); 412 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 413 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 414 415 break; 416 } 417 } 418 419 static int 420 exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record, 421 int res) 422 { 423 int attached = 1; 424 425 switch (res) { 426 case AC_TASK_TASKID: 427 (void) ea_attach_item(record, &tk->tk_tkid, 428 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID); 429 break; 430 case AC_TASK_PROJID: 431 (void) ea_attach_item(record, &tk->tk_proj->kpj_id, 432 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID); 433 break; 434 case AC_TASK_CPU: { 435 timestruc_t ts; 436 uint64_t ui; 437 438 hrt2ts(tu->tu_stime, &ts); 439 ui = ts.tv_sec; 440 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 441 EXT_UINT64 | EXD_TASK_CPU_SYS_SEC); 442 ui = ts.tv_nsec; 443 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 444 EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC); 445 446 hrt2ts(tu->tu_utime, &ts); 447 ui = ts.tv_sec; 448 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 449 EXT_UINT64 | EXD_TASK_CPU_USER_SEC); 450 ui = ts.tv_nsec; 451 (void) ea_attach_item(record, &ui, sizeof (uint64_t), 452 EXT_UINT64 | EXD_TASK_CPU_USER_NSEC); 453 } 454 break; 455 case AC_TASK_TIME: 456 (void) ea_attach_item(record, &tu->tu_startsec, 457 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC); 458 (void) ea_attach_item(record, &tu->tu_startnsec, 459 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC); 460 (void) ea_attach_item(record, &tu->tu_finishsec, 461 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC); 462 (void) ea_attach_item(record, &tu->tu_finishnsec, 463 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC); 464 break; 465 case AC_TASK_HOSTNAME: 466 (void) ea_attach_item(record, tk->tk_zone->zone_nodename, 467 strlen(tk->tk_zone->zone_nodename) + 1, 468 EXT_STRING | EXD_TASK_HOSTNAME); 469 break; 470 case AC_TASK_MICROSTATE: 471 (void) ea_attach_item(record, &tu->tu_majflt, 472 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR); 473 (void) ea_attach_item(record, &tu->tu_minflt, 474 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR); 475 (void) ea_attach_item(record, &tu->tu_sndmsg, 476 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND); 477 (void) ea_attach_item(record, &tu->tu_rcvmsg, 478 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV); 479 (void) ea_attach_item(record, &tu->tu_iblk, 480 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN); 481 (void) ea_attach_item(record, &tu->tu_oblk, 482 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT); 483 (void) ea_attach_item(record, &tu->tu_ioch, 484 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR); 485 (void) ea_attach_item(record, &tu->tu_vcsw, 486 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL); 487 (void) ea_attach_item(record, &tu->tu_icsw, 488 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV); 489 (void) ea_attach_item(record, &tu->tu_nsig, 490 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS); 491 (void) ea_attach_item(record, &tu->tu_nswp, 492 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS); 493 (void) ea_attach_item(record, &tu->tu_nscl, 494 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS); 495 break; 496 case AC_TASK_ANCTASKID: 497 (void) ea_attach_item(record, &tu->tu_anctaskid, 498 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID); 499 break; 500 case AC_TASK_ZONENAME: 501 (void) ea_attach_item(record, tk->tk_zone->zone_name, 502 strlen(tk->tk_zone->zone_name) + 1, 503 EXT_STRING | EXD_TASK_ZONENAME); 504 break; 505 default: 506 attached = 0; 507 } 508 return (attached); 509 } 510 511 static ea_object_t * 512 exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask, 513 ea_catalog_t record_type) 514 { 515 int res, count; 516 ea_object_t *record; 517 518 /* 519 * Assemble usage values into group. 520 */ 521 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 522 for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++) 523 if (BT_TEST(mask, res)) 524 count += exacct_attach_task_item(tk, tu, record, res); 525 if (count == 0) { 526 ea_free_object(record, EUP_ALLOC); 527 record = NULL; 528 } 529 return (record); 530 } 531 532 /* 533 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *, 534 * size_t, size_t *), void *, size_t, size_t *, int) 535 * 536 * Overview 537 * exacct_assemble_task_usage() builds the packed exacct buffer for the 538 * indicated task, executes the given callback function, and free the packed 539 * buffer. 540 * 541 * Return values 542 * Returns 0 on success; otherwise the appropriate error code is returned. 543 * 544 * Caller's context 545 * Suitable for KM_SLEEP allocations. 546 */ 547 int 548 exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk, 549 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 550 void *ubuf, size_t ubufsize, size_t *actual, int flag) 551 { 552 ulong_t mask[AC_MASK_SZ]; 553 ea_object_t *task_record; 554 ea_catalog_t record_type; 555 task_usage_t *tu; 556 void *buf; 557 size_t bufsize; 558 int ret; 559 560 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL); 561 562 mutex_enter(&ac_task->ac_lock); 563 if (ac_task->ac_state == AC_OFF) { 564 mutex_exit(&ac_task->ac_lock); 565 return (ENOTACTIVE); 566 } 567 bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ); 568 mutex_exit(&ac_task->ac_lock); 569 570 switch (flag) { 571 case EW_FINAL: 572 record_type = EXD_GROUP_TASK; 573 break; 574 case EW_PARTIAL: 575 record_type = EXD_GROUP_TASK_PARTIAL; 576 break; 577 case EW_INTERVAL: 578 record_type = EXD_GROUP_TASK_INTERVAL; 579 break; 580 } 581 582 /* 583 * Calculate task usage and assemble it into the task record. 584 */ 585 tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 586 exacct_calculate_task_usage(tk, tu, flag); 587 task_record = exacct_assemble_task_record(tk, tu, mask, record_type); 588 if (task_record == NULL) { 589 /* 590 * The current configuration of the accounting system has 591 * resulted in records with no data; accordingly, we don't write 592 * these, but we return success. 593 */ 594 kmem_free(tu, sizeof (task_usage_t)); 595 return (0); 596 } 597 598 /* 599 * Pack object into buffer and run callback on it. 600 */ 601 bufsize = ea_pack_object(task_record, NULL, 0); 602 buf = kmem_alloc(bufsize, KM_SLEEP); 603 (void) ea_pack_object(task_record, buf, bufsize); 604 ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual); 605 606 /* 607 * Free all previously allocated structures. 608 */ 609 kmem_free(buf, bufsize); 610 ea_free_object(task_record, EUP_ALLOC); 611 kmem_free(tu, sizeof (task_usage_t)); 612 return (ret); 613 } 614 615 /* 616 * void exacct_commit_task(void *) 617 * 618 * Overview 619 * exacct_commit_task() calculates the final usage for a task, updating the 620 * task usage if task accounting is active, and writing a task record if task 621 * accounting is active. exacct_commit_task() is intended for being called 622 * from a task queue (taskq_t). 623 * 624 * Return values 625 * None. 626 * 627 * Caller's context 628 * Suitable for KM_SLEEP allocations. 629 */ 630 631 void 632 exacct_commit_task(void *arg) 633 { 634 task_t *tk = (task_t *)arg; 635 size_t size; 636 zone_t *zone = tk->tk_zone; 637 struct exacct_globals *acg; 638 639 ASSERT(tk != task0p); 640 ASSERT(tk->tk_memb_list == NULL); 641 642 /* 643 * Don't do any extra work if the acctctl module isn't loaded. 644 */ 645 if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) { 646 acg = zone_getspecific(exacct_zone_key, zone); 647 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 648 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 649 if (tk->tk_zone != global_zone) { 650 acg = zone_getspecific(exacct_zone_key, global_zone); 651 (void) exacct_assemble_task_usage(&acg->ac_task, tk, 652 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 653 } 654 } 655 /* 656 * Release associated project and finalize task. 657 */ 658 task_end(tk); 659 } 660 661 static int 662 exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res) 663 { 664 int attached = 1; 665 666 switch (res) { 667 case AC_PROC_PID: 668 (void) ea_attach_item(record, &pu->pu_pid, 669 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID); 670 break; 671 case AC_PROC_UID: 672 (void) ea_attach_item(record, &pu->pu_ruid, 673 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID); 674 break; 675 case AC_PROC_FLAG: 676 (void) ea_attach_item(record, &pu->pu_acflag, 677 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS); 678 break; 679 case AC_PROC_GID: 680 (void) ea_attach_item(record, &pu->pu_rgid, 681 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID); 682 break; 683 case AC_PROC_PROJID: 684 (void) ea_attach_item(record, &pu->pu_projid, 685 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID); 686 break; 687 case AC_PROC_TASKID: 688 (void) ea_attach_item(record, &pu->pu_taskid, 689 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID); 690 break; 691 case AC_PROC_CPU: 692 (void) ea_attach_item(record, &pu->pu_utimesec, 693 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC); 694 (void) ea_attach_item(record, &pu->pu_utimensec, 695 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC); 696 (void) ea_attach_item(record, &pu->pu_stimesec, 697 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC); 698 (void) ea_attach_item(record, &pu->pu_stimensec, 699 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC); 700 break; 701 case AC_PROC_TIME: 702 (void) ea_attach_item(record, &pu->pu_startsec, 703 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC); 704 (void) ea_attach_item(record, &pu->pu_startnsec, 705 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC); 706 (void) ea_attach_item(record, &pu->pu_finishsec, 707 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC); 708 (void) ea_attach_item(record, &pu->pu_finishnsec, 709 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC); 710 break; 711 case AC_PROC_COMMAND: 712 (void) ea_attach_item(record, pu->pu_command, 713 strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND); 714 break; 715 case AC_PROC_HOSTNAME: 716 (void) ea_attach_item(record, pu->pu_nodename, 717 strlen(pu->pu_nodename) + 1, 718 EXT_STRING | EXD_PROC_HOSTNAME); 719 break; 720 case AC_PROC_TTY: 721 (void) ea_attach_item(record, &pu->pu_major, 722 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR); 723 (void) ea_attach_item(record, &pu->pu_minor, 724 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR); 725 break; 726 case AC_PROC_MICROSTATE: 727 (void) ea_attach_item(record, &pu->pu_majflt, 728 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR); 729 (void) ea_attach_item(record, &pu->pu_minflt, 730 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR); 731 (void) ea_attach_item(record, &pu->pu_sndmsg, 732 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND); 733 (void) ea_attach_item(record, &pu->pu_rcvmsg, 734 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV); 735 (void) ea_attach_item(record, &pu->pu_iblk, 736 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN); 737 (void) ea_attach_item(record, &pu->pu_oblk, 738 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT); 739 (void) ea_attach_item(record, &pu->pu_ioch, 740 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR); 741 (void) ea_attach_item(record, &pu->pu_vcsw, 742 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL); 743 (void) ea_attach_item(record, &pu->pu_icsw, 744 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV); 745 (void) ea_attach_item(record, &pu->pu_nsig, 746 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS); 747 (void) ea_attach_item(record, &pu->pu_nswp, 748 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS); 749 (void) ea_attach_item(record, &pu->pu_nscl, 750 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS); 751 break; 752 case AC_PROC_ANCPID: 753 (void) ea_attach_item(record, &pu->pu_ancpid, 754 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID); 755 break; 756 case AC_PROC_WAIT_STATUS: 757 (void) ea_attach_item(record, &pu->pu_wstat, 758 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS); 759 break; 760 case AC_PROC_ZONENAME: 761 (void) ea_attach_item(record, pu->pu_zonename, 762 strlen(pu->pu_zonename) + 1, 763 EXT_STRING | EXD_PROC_ZONENAME); 764 break; 765 case AC_PROC_MEM: 766 (void) ea_attach_item(record, &pu->pu_mem_rss_avg, 767 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K); 768 (void) ea_attach_item(record, &pu->pu_mem_rss_max, 769 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K); 770 break; 771 default: 772 attached = 0; 773 } 774 return (attached); 775 } 776 777 static ea_object_t * 778 exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask, 779 ea_catalog_t record_type) 780 { 781 int res, count; 782 ea_object_t *record; 783 784 /* 785 * Assemble usage values into group. 786 */ 787 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 788 for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++) 789 if (BT_TEST(mask, res)) 790 count += exacct_attach_proc_item(pu, record, res); 791 if (count == 0) { 792 ea_free_object(record, EUP_ALLOC); 793 record = NULL; 794 } 795 return (record); 796 } 797 798 /* 799 * The following two routines assume that process's p_lock is held or 800 * exacct_commit_proc has been called from exit() when all lwps are stopped. 801 */ 802 static void 803 exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu) 804 { 805 kthread_t *t; 806 807 ASSERT(MUTEX_HELD(&p->p_lock)); 808 if ((t = p->p_tlist) == NULL) 809 return; 810 811 do { 812 pu->pu_minflt += t->t_lwp->lwp_ru.minflt; 813 pu->pu_majflt += t->t_lwp->lwp_ru.majflt; 814 pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd; 815 pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv; 816 pu->pu_ioch += t->t_lwp->lwp_ru.ioch; 817 pu->pu_iblk += t->t_lwp->lwp_ru.inblock; 818 pu->pu_oblk += t->t_lwp->lwp_ru.oublock; 819 pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw; 820 pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw; 821 pu->pu_nsig += t->t_lwp->lwp_ru.nsignals; 822 pu->pu_nswp += t->t_lwp->lwp_ru.nswap; 823 pu->pu_nscl += t->t_lwp->lwp_ru.sysc; 824 } while ((t = t->t_forw) != p->p_tlist); 825 } 826 827 static void 828 exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu) 829 { 830 pu->pu_minflt = p->p_ru.minflt; 831 pu->pu_majflt = p->p_ru.majflt; 832 pu->pu_sndmsg = p->p_ru.msgsnd; 833 pu->pu_rcvmsg = p->p_ru.msgrcv; 834 pu->pu_ioch = p->p_ru.ioch; 835 pu->pu_iblk = p->p_ru.inblock; 836 pu->pu_oblk = p->p_ru.oublock; 837 pu->pu_vcsw = p->p_ru.nvcsw; 838 pu->pu_icsw = p->p_ru.nivcsw; 839 pu->pu_nsig = p->p_ru.nsignals; 840 pu->pu_nswp = p->p_ru.nswap; 841 pu->pu_nscl = p->p_ru.sysc; 842 } 843 844 void 845 exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask, 846 int flag, int wstat) 847 { 848 timestruc_t ts, ts_run; 849 850 ASSERT(MUTEX_HELD(&p->p_lock)); 851 852 /* 853 * Convert CPU and execution times to sec/nsec format. 854 */ 855 if (BT_TEST(mask, AC_PROC_CPU)) { 856 hrt2ts(mstate_aggr_state(p, LMS_USER), &ts); 857 pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec; 858 pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec; 859 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts); 860 pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec; 861 pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec; 862 } 863 if (BT_TEST(mask, AC_PROC_TIME)) { 864 gethrestime(&ts); 865 pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; 866 pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; 867 hrt2ts(gethrtime() - p->p_mstart, &ts_run); 868 ts.tv_sec -= ts_run.tv_sec; 869 ts.tv_nsec -= ts_run.tv_nsec; 870 if (ts.tv_nsec < 0) { 871 ts.tv_sec--; 872 if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) { 873 ts.tv_sec++; 874 ts.tv_nsec -= NANOSEC; 875 } 876 } 877 pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec; 878 pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec; 879 } 880 881 pu->pu_pid = p->p_pidp->pid_id; 882 pu->pu_acflag = p->p_user.u_acflag; 883 pu->pu_projid = p->p_task->tk_proj->kpj_id; 884 pu->pu_taskid = p->p_task->tk_tkid; 885 pu->pu_major = getmajor(p->p_sessp->s_dev); 886 pu->pu_minor = getminor(p->p_sessp->s_dev); 887 pu->pu_ancpid = p->p_ancpid; 888 pu->pu_wstat = wstat; 889 /* 890 * Compute average RSS in K. The denominator is the number of 891 * samples: the number of clock ticks plus the initial value. 892 */ 893 pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) * 894 (PAGESIZE / 1024); 895 pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024); 896 897 mutex_enter(&p->p_crlock); 898 pu->pu_ruid = crgetruid(p->p_cred); 899 pu->pu_rgid = crgetrgid(p->p_cred); 900 mutex_exit(&p->p_crlock); 901 902 bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1); 903 bcopy(p->p_zone->zone_name, pu->pu_zonename, 904 strlen(p->p_zone->zone_name) + 1); 905 bcopy(p->p_zone->zone_nodename, pu->pu_nodename, 906 strlen(p->p_zone->zone_nodename) + 1); 907 908 /* 909 * Calculate microstate accounting data for a process that is still 910 * running. Presently, we explicitly collect all of the LWP usage into 911 * the proc usage structure here. 912 */ 913 if (flag & EW_PARTIAL) 914 exacct_calculate_proc_mstate(p, pu); 915 if (flag & EW_FINAL) 916 exacct_copy_proc_mstate(p, pu); 917 } 918 919 /* 920 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void 921 * *, size_t, size_t *), void *, size_t, size_t *) 922 * 923 * Overview 924 * Assemble record with miscellaneous accounting information about the process 925 * and execute the callback on it. It is the callback's job to set "actual" to 926 * the size of record. 927 * 928 * Return values 929 * The result of the callback function, unless the extended process accounting 930 * feature is not active, in which case ENOTACTIVE is returned. 931 * 932 * Caller's context 933 * Suitable for KM_SLEEP allocations. 934 */ 935 int 936 exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu, 937 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 938 void *ubuf, size_t ubufsize, size_t *actual, int flag) 939 { 940 ulong_t mask[AC_MASK_SZ]; 941 ea_object_t *proc_record; 942 ea_catalog_t record_type; 943 void *buf; 944 size_t bufsize; 945 int ret; 946 947 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL); 948 949 mutex_enter(&ac_proc->ac_lock); 950 if (ac_proc->ac_state == AC_OFF) { 951 mutex_exit(&ac_proc->ac_lock); 952 return (ENOTACTIVE); 953 } 954 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 955 mutex_exit(&ac_proc->ac_lock); 956 957 switch (flag) { 958 case EW_FINAL: 959 record_type = EXD_GROUP_PROC; 960 break; 961 case EW_PARTIAL: 962 record_type = EXD_GROUP_PROC_PARTIAL; 963 break; 964 } 965 966 proc_record = exacct_assemble_proc_record(pu, mask, record_type); 967 if (proc_record == NULL) 968 return (0); 969 970 /* 971 * Pack object into buffer and pass to callback. 972 */ 973 bufsize = ea_pack_object(proc_record, NULL, 0); 974 buf = kmem_alloc(bufsize, KM_SLEEP); 975 (void) ea_pack_object(proc_record, buf, bufsize); 976 977 ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual); 978 979 /* 980 * Free all previously allocations. 981 */ 982 kmem_free(buf, bufsize); 983 ea_free_object(proc_record, EUP_ALLOC); 984 return (ret); 985 } 986 987 /* 988 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t, 989 * size_t *) 990 * 991 * Overview 992 * exacct_commit_callback() writes the indicated buffer to the indicated 993 * extended accounting file. 994 * 995 * Return values 996 * The result of the write operation is returned. "actual" is updated to 997 * contain the number of bytes actually written. 998 * 999 * Caller's context 1000 * Suitable for a vn_rdwr() operation. 1001 */ 1002 /*ARGSUSED*/ 1003 int 1004 exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize, 1005 void *buf, size_t bufsize, size_t *actual) 1006 { 1007 int error = 0; 1008 1009 *actual = 0; 1010 if ((error = exacct_vn_write(info, buf, bufsize)) == 0) 1011 *actual = bufsize; 1012 return (error); 1013 } 1014 1015 static void 1016 exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat) 1017 { 1018 size_t size; 1019 proc_usage_t *pu; 1020 ulong_t mask[AC_MASK_SZ]; 1021 1022 mutex_enter(&ac_proc->ac_lock); 1023 if (ac_proc->ac_state == AC_ON) { 1024 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); 1025 mutex_exit(&ac_proc->ac_lock); 1026 } else { 1027 mutex_exit(&ac_proc->ac_lock); 1028 return; 1029 } 1030 1031 mutex_enter(&p->p_lock); 1032 size = strlen(p->p_user.u_comm) + 1; 1033 mutex_exit(&p->p_lock); 1034 1035 pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP); 1036 pu->pu_command = kmem_alloc(size, KM_SLEEP); 1037 mutex_enter(&p->p_lock); 1038 exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat); 1039 mutex_exit(&p->p_lock); 1040 1041 (void) exacct_assemble_proc_usage(ac_proc, pu, 1042 exacct_commit_callback, NULL, 0, &size, EW_FINAL); 1043 1044 kmem_free(pu->pu_command, strlen(pu->pu_command) + 1); 1045 kmem_free(pu, sizeof (proc_usage_t)); 1046 } 1047 /* 1048 * void exacct_commit_proc(proc_t *, int) 1049 * 1050 * Overview 1051 * exacct_commit_proc() calculates the final usage for a process, updating the 1052 * task usage if task accounting is active, and writing a process record if 1053 * process accounting is active. exacct_commit_proc() is intended for being 1054 * called from proc_exit(). 1055 * 1056 * Return values 1057 * None. 1058 * 1059 * Caller's context 1060 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry. 1061 */ 1062 void 1063 exacct_commit_proc(proc_t *p, int wstat) 1064 { 1065 zone_t *zone = p->p_zone; 1066 struct exacct_globals *acg, *gacg = NULL; 1067 1068 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1069 /* 1070 * acctctl module not loaded. Nothing to do. 1071 */ 1072 return; 1073 } 1074 acg = zone_getspecific(exacct_zone_key, zone); 1075 if (zone != global_zone) 1076 gacg = zone_getspecific(exacct_zone_key, global_zone); 1077 if (acg->ac_task.ac_state == AC_ON || 1078 (gacg != NULL && gacg->ac_task.ac_state == AC_ON)) { 1079 exacct_update_task_mstate(p); 1080 } 1081 1082 exacct_do_commit_proc(&acg->ac_proc, p, wstat); 1083 if (p->p_zone != global_zone) 1084 exacct_do_commit_proc(&gacg->ac_proc, p, wstat); 1085 } 1086 1087 static int 1088 exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res) 1089 { 1090 int attached = 1; 1091 1092 switch (res) { 1093 case AC_FLOW_SADDR: 1094 if (fu->fu_isv4) { 1095 (void) ea_attach_item(record, &fu->fu_saddr[3], 1096 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR); 1097 } else { 1098 (void) ea_attach_item(record, &fu->fu_saddr, 1099 sizeof (fu->fu_saddr), EXT_RAW | 1100 EXD_FLOW_V6SADDR); 1101 } 1102 break; 1103 case AC_FLOW_DADDR: 1104 if (fu->fu_isv4) { 1105 (void) ea_attach_item(record, &fu->fu_daddr[3], 1106 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR); 1107 } else { 1108 (void) ea_attach_item(record, &fu->fu_daddr, 1109 sizeof (fu->fu_daddr), EXT_RAW | 1110 EXD_FLOW_V6DADDR); 1111 } 1112 break; 1113 case AC_FLOW_SPORT: 1114 (void) ea_attach_item(record, &fu->fu_sport, 1115 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT); 1116 break; 1117 case AC_FLOW_DPORT: 1118 (void) ea_attach_item(record, &fu->fu_dport, 1119 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT); 1120 break; 1121 case AC_FLOW_PROTOCOL: 1122 (void) ea_attach_item(record, &fu->fu_protocol, 1123 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL); 1124 break; 1125 case AC_FLOW_DSFIELD: 1126 (void) ea_attach_item(record, &fu->fu_dsfield, 1127 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD); 1128 break; 1129 case AC_FLOW_CTIME: 1130 (void) ea_attach_item(record, &fu->fu_ctime, 1131 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME); 1132 break; 1133 case AC_FLOW_LSEEN: 1134 (void) ea_attach_item(record, &fu->fu_lseen, 1135 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN); 1136 break; 1137 case AC_FLOW_NBYTES: 1138 (void) ea_attach_item(record, &fu->fu_nbytes, 1139 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES); 1140 break; 1141 case AC_FLOW_NPKTS: 1142 (void) ea_attach_item(record, &fu->fu_npackets, 1143 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS); 1144 break; 1145 case AC_FLOW_PROJID: 1146 if (fu->fu_projid >= 0) { 1147 (void) ea_attach_item(record, &fu->fu_projid, 1148 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID); 1149 } 1150 break; 1151 case AC_FLOW_UID: 1152 if (fu->fu_userid >= 0) { 1153 (void) ea_attach_item(record, &fu->fu_userid, 1154 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID); 1155 } 1156 break; 1157 case AC_FLOW_ANAME: 1158 (void) ea_attach_item(record, fu->fu_aname, 1159 strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME); 1160 break; 1161 default: 1162 attached = 0; 1163 } 1164 return (attached); 1165 } 1166 1167 static ea_object_t * 1168 exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask, 1169 ea_catalog_t record_type) 1170 { 1171 int res, count; 1172 ea_object_t *record; 1173 1174 /* 1175 * Assemble usage values into group. 1176 */ 1177 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); 1178 for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++) 1179 if (BT_TEST(mask, res)) 1180 count += exacct_attach_flow_item(fu, record, res); 1181 if (count == 0) { 1182 ea_free_object(record, EUP_ALLOC); 1183 record = NULL; 1184 } 1185 return (record); 1186 } 1187 1188 int 1189 exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu, 1190 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), 1191 void *ubuf, size_t ubufsize, size_t *actual) 1192 { 1193 ulong_t mask[AC_MASK_SZ]; 1194 ea_object_t *flow_usage; 1195 ea_catalog_t record_type; 1196 void *buf; 1197 size_t bufsize; 1198 int ret; 1199 1200 mutex_enter(&ac_flow->ac_lock); 1201 if (ac_flow->ac_state == AC_OFF) { 1202 mutex_exit(&ac_flow->ac_lock); 1203 return (ENOTACTIVE); 1204 } 1205 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1206 mutex_exit(&ac_flow->ac_lock); 1207 1208 record_type = EXD_GROUP_FLOW; 1209 1210 flow_usage = exacct_assemble_flow_record(fu, mask, record_type); 1211 if (flow_usage == NULL) { 1212 return (0); 1213 } 1214 1215 /* 1216 * Pack object into buffer and pass to callback. 1217 */ 1218 bufsize = ea_pack_object(flow_usage, NULL, 0); 1219 buf = kmem_alloc(bufsize, KM_NOSLEEP); 1220 if (buf == NULL) { 1221 return (ENOMEM); 1222 } 1223 1224 (void) ea_pack_object(flow_usage, buf, bufsize); 1225 1226 ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual); 1227 1228 /* 1229 * Free all previously allocations. 1230 */ 1231 kmem_free(buf, bufsize); 1232 ea_free_object(flow_usage, EUP_ALLOC); 1233 return (ret); 1234 } 1235 1236 void 1237 exacct_commit_flow(void *arg) 1238 { 1239 flow_usage_t *f = (flow_usage_t *)arg; 1240 size_t size; 1241 ulong_t mask[AC_MASK_SZ]; 1242 struct exacct_globals *acg; 1243 ac_info_t *ac_flow; 1244 1245 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { 1246 /* 1247 * acctctl module not loaded. Nothing to do. 1248 */ 1249 return; 1250 } 1251 1252 /* 1253 * Even though each zone nominally has its own flow accounting settings 1254 * (ac_flow), these are only maintained by and for the global zone. 1255 * 1256 * If this were to change in the future, this function should grow a 1257 * second zoneid (or zone) argument, and use the corresponding zone's 1258 * settings rather than always using those of the global zone. 1259 */ 1260 acg = zone_getspecific(exacct_zone_key, global_zone); 1261 ac_flow = &acg->ac_flow; 1262 1263 mutex_enter(&ac_flow->ac_lock); 1264 if (ac_flow->ac_state == AC_OFF) { 1265 mutex_exit(&ac_flow->ac_lock); 1266 return; 1267 } 1268 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); 1269 mutex_exit(&ac_flow->ac_lock); 1270 1271 (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback, 1272 NULL, 0, &size); 1273 } 1274 1275 /* 1276 * int exacct_tag_task(task_t *, void *, size_t, int) 1277 * 1278 * Overview 1279 * exacct_tag_task() provides the exacct record construction and writing 1280 * support required by putacct(2) for task entities. 1281 * 1282 * Return values 1283 * The result of the write operation is returned, unless the extended 1284 * accounting facility is not active, in which case ENOTACTIVE is returned. 1285 * 1286 * Caller's context 1287 * Suitable for KM_SLEEP allocations. 1288 */ 1289 int 1290 exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz, 1291 int flags) 1292 { 1293 int error = 0; 1294 void *buf; 1295 size_t bufsize; 1296 ea_catalog_t cat; 1297 ea_object_t *tag; 1298 1299 mutex_enter(&ac_task->ac_lock); 1300 if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) { 1301 mutex_exit(&ac_task->ac_lock); 1302 return (ENOTACTIVE); 1303 } 1304 mutex_exit(&ac_task->ac_lock); 1305 1306 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG); 1307 (void) ea_attach_item(tag, &tk->tk_tkid, 0, 1308 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1309 (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0, 1310 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1311 if (flags == EP_RAW) 1312 cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG; 1313 else 1314 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG; 1315 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1316 1317 bufsize = ea_pack_object(tag, NULL, 0); 1318 buf = kmem_alloc(bufsize, KM_SLEEP); 1319 (void) ea_pack_object(tag, buf, bufsize); 1320 error = exacct_vn_write(ac_task, buf, bufsize); 1321 kmem_free(buf, bufsize); 1322 ea_free_object(tag, EUP_ALLOC); 1323 return (error); 1324 } 1325 1326 /* 1327 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *) 1328 * 1329 * Overview 1330 * exacct_tag_proc() provides the exacct record construction and writing 1331 * support required by putacct(2) for processes. 1332 * 1333 * Return values 1334 * The result of the write operation is returned, unless the extended 1335 * accounting facility is not active, in which case ENOTACTIVE is returned. 1336 * 1337 * Caller's context 1338 * Suitable for KM_SLEEP allocations. 1339 */ 1340 int 1341 exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf, 1342 size_t ubufsz, int flags, const char *hostname) 1343 { 1344 int error = 0; 1345 void *buf; 1346 size_t bufsize; 1347 ea_catalog_t cat; 1348 ea_object_t *tag; 1349 1350 mutex_enter(&ac_proc->ac_lock); 1351 if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) { 1352 mutex_exit(&ac_proc->ac_lock); 1353 return (ENOTACTIVE); 1354 } 1355 mutex_exit(&ac_proc->ac_lock); 1356 1357 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG); 1358 (void) ea_attach_item(tag, &pid, sizeof (uint32_t), 1359 EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID); 1360 (void) ea_attach_item(tag, &tkid, 0, 1361 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); 1362 (void) ea_attach_item(tag, (void *)hostname, 0, 1363 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); 1364 if (flags == EP_RAW) 1365 cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG; 1366 else 1367 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG; 1368 (void) ea_attach_item(tag, ubuf, ubufsz, cat); 1369 1370 bufsize = ea_pack_object(tag, NULL, 0); 1371 buf = kmem_alloc(bufsize, KM_SLEEP); 1372 (void) ea_pack_object(tag, buf, bufsize); 1373 error = exacct_vn_write(ac_proc, buf, bufsize); 1374 kmem_free(buf, bufsize); 1375 ea_free_object(tag, EUP_ALLOC); 1376 return (error); 1377 } 1378 1379 /* 1380 * void exacct_init(void) 1381 * 1382 * Overview 1383 * Initialized the extended accounting subsystem. 1384 * 1385 * Return values 1386 * None. 1387 * 1388 * Caller's context 1389 * Suitable for KM_SLEEP allocations. 1390 */ 1391 void 1392 exacct_init() 1393 { 1394 exacct_queue = system_taskq; 1395 exacct_object_cache = kmem_cache_create("exacct_object_cache", 1396 sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1397 } 1398