/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * exacct usage and recording routines * * wracct(2), getacct(2), and the records written at process or task * termination are constructed using the exacct_assemble_[task,proc]_usage() * functions, which take a callback that takes the appropriate action on * the packed exacct record for the task or process. For the process-related * actions, we partition the routines such that the data collecting component * can be performed while holding p_lock, and all sleeping or blocking * operations can be performed without acquiring p_lock. * * putacct(2), which allows an application to construct a customized record * associated with an existing process or task, has its own entry points: * exacct_tag_task() and exacct_tag_proc(). */ taskq_t *exacct_queue; kmem_cache_t *exacct_object_cache; zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED; static const uint32_t exacct_version = EXACCT_VERSION; static const char exacct_header[] = "exacct"; static const char exacct_creator[] = "SunOS"; ea_object_t * ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz) { ea_object_t *item; item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); bzero(item, sizeof (ea_object_t)); (void) ea_set_item(item, catalog, buf, bufsz); return (item); } ea_object_t * ea_alloc_group(ea_catalog_t catalog) { ea_object_t *group; group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP); bzero(group, sizeof (ea_object_t)); (void) ea_set_group(group, catalog); return (group); } ea_object_t * ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog) { ea_object_t *item; item = ea_alloc_item(catalog, buf, bufsz); (void) ea_attach_to_group(grp, item); return (item); } /* * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract * microstate accounting data and resource usage counters from one task_usage_t * from those supplied in another. These functions do not operate on *all* * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make * sense. */ static void exacct_add_task_mstate(task_usage_t *tu, task_usage_t *delta) { tu->tu_utime += delta->tu_utime; tu->tu_stime += delta->tu_stime; tu->tu_minflt += delta->tu_minflt; tu->tu_majflt += delta->tu_majflt; tu->tu_sndmsg += delta->tu_sndmsg; tu->tu_rcvmsg += delta->tu_rcvmsg; tu->tu_ioch += delta->tu_ioch; tu->tu_iblk += delta->tu_iblk; tu->tu_oblk += delta->tu_oblk; tu->tu_vcsw += delta->tu_vcsw; tu->tu_icsw += delta->tu_icsw; tu->tu_nsig += delta->tu_nsig; tu->tu_nswp += delta->tu_nswp; tu->tu_nscl += delta->tu_nscl; } /* * See the comments for exacct_add_task_mstate(), above. */ static void exacct_sub_task_mstate(task_usage_t *tu, task_usage_t *delta) { tu->tu_utime -= delta->tu_utime; tu->tu_stime -= delta->tu_stime; tu->tu_minflt -= delta->tu_minflt; tu->tu_majflt -= delta->tu_majflt; tu->tu_sndmsg -= delta->tu_sndmsg; tu->tu_rcvmsg -= delta->tu_rcvmsg; tu->tu_ioch -= delta->tu_ioch; tu->tu_iblk -= delta->tu_iblk; tu->tu_oblk -= delta->tu_oblk; tu->tu_vcsw -= delta->tu_vcsw; tu->tu_icsw -= delta->tu_icsw; tu->tu_nsig -= delta->tu_nsig; tu->tu_nswp -= delta->tu_nswp; tu->tu_nscl -= delta->tu_nscl; } /* * Wrapper for vn_rdwr() used by exacct_vn_write() and exacct_write_header() * to write to the accounting file without corrupting it in case of an I/O or * filesystem error. */ static int exacct_vn_write_impl(ac_info_t *info, void *buf, ssize_t bufsize) { int error; ssize_t resid; struct vattr va; ASSERT(info != NULL); ASSERT(info->ac_vnode != NULL); ASSERT(MUTEX_HELD(&info->ac_lock)); /* * Save the size. If vn_rdwr fails, reset the size to avoid corrupting * the present accounting file. */ va.va_mask = AT_SIZE; error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred, NULL); if (error == 0) { error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf, bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T, kcred, &resid); if (error) { (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); } else if (resid != 0) { (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL); error = ENOSPC; } } return (error); } /* * exacct_vn_write() safely writes to an accounting file. acctctl() prevents * the two accounting vnodes from being equal, and the appropriate ac_lock is * held across the call, so we're single threaded through this code for each * file. */ static int exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize) { int error; if (info == NULL) return (0); mutex_enter(&info->ac_lock); /* * Don't do anything unless accounting file is set. */ if (info->ac_vnode == NULL) { mutex_exit(&info->ac_lock); return (0); } error = exacct_vn_write_impl(info, buf, bufsize); mutex_exit(&info->ac_lock); return (error); } /* * void *exacct_create_header(size_t *) * * Overview * exacct_create_header() constructs an exacct file header identifying the * accounting file as the output of the kernel. exacct_create_header() and * the static write_header() and verify_header() routines in libexacct must * remain synchronized. * * Return values * A pointer to a packed exacct buffer containing the appropriate header is * returned; the size of the buffer is placed in the location indicated by * sizep. * * Caller's context * Suitable for KM_SLEEP allocations. */ void * exacct_create_header(size_t *sizep) { ea_object_t *hdr_grp; uint32_t bskip; void *buf; size_t bufsize; hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER); (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0, EXT_UINT32 | EXC_DEFAULT | EXD_VERSION); (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0, EXT_STRING | EXC_DEFAULT | EXD_FILETYPE); (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0, EXT_STRING | EXC_DEFAULT | EXD_CREATOR); (void) ea_attach_item(hdr_grp, uts_nodename(), 0, EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME); bufsize = ea_pack_object(hdr_grp, NULL, 0); buf = kmem_alloc(bufsize, KM_SLEEP); (void) ea_pack_object(hdr_grp, buf, bufsize); ea_free_object(hdr_grp, EUP_ALLOC); /* * To prevent reading the header when reading the file backwards, * set the large backskip of the header group to 0 (last 4 bytes). */ bskip = 0; exacct_order32(&bskip); bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip), sizeof (bskip)); *sizep = bufsize; return (buf); } /* * int exacct_write_header(ac_info_t *, void *, size_t) * * Overview * exacct_write_header() writes the given header buffer to the indicated * vnode. * * Return values * The result of the write operation is returned. * * Caller's context * Caller must hold the ac_lock of the appropriate accounting file * information block (ac_info_t). */ int exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize) { if (info != NULL && info->ac_vnode != NULL) return (exacct_vn_write_impl(info, hdr, hdrsize)); return (0); } static void exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu, task_usage_t **tu_buf) { task_usage_t *oldtu, *newtu; task_usage_t **prevusage; ASSERT(MUTEX_HELD(&tk->tk_usage_lock)); if (getzoneid() != GLOBAL_ZONEID) { prevusage = &tk->tk_zoneusage; } else { prevusage = &tk->tk_prevusage; } if ((oldtu = *prevusage) != NULL) { /* * In case we have any accounting information * saved from the previous interval record. */ newtu = *tu_buf; bcopy(tu, newtu, sizeof (task_usage_t)); tu->tu_minflt -= oldtu->tu_minflt; tu->tu_majflt -= oldtu->tu_majflt; tu->tu_sndmsg -= oldtu->tu_sndmsg; tu->tu_rcvmsg -= oldtu->tu_rcvmsg; tu->tu_ioch -= oldtu->tu_ioch; tu->tu_iblk -= oldtu->tu_iblk; tu->tu_oblk -= oldtu->tu_oblk; tu->tu_vcsw -= oldtu->tu_vcsw; tu->tu_icsw -= oldtu->tu_icsw; tu->tu_nsig -= oldtu->tu_nsig; tu->tu_nswp -= oldtu->tu_nswp; tu->tu_nscl -= oldtu->tu_nscl; tu->tu_utime -= oldtu->tu_utime; tu->tu_stime -= oldtu->tu_stime; tu->tu_startsec = oldtu->tu_finishsec; tu->tu_startnsec = oldtu->tu_finishnsec; /* * Copy the data from our temporary storage to the task's * previous interval usage structure for future reference. */ bcopy(newtu, oldtu, sizeof (task_usage_t)); } else { /* * Store current statistics in the task's previous interval * usage structure for future references. */ *prevusage = *tu_buf; bcopy(tu, *prevusage, sizeof (task_usage_t)); *tu_buf = NULL; } } static void exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu) { timestruc_t ts; proc_t *p; ASSERT(MUTEX_HELD(&pidlock)); if ((p = tk->tk_memb_list) == NULL) return; /* * exacct_snapshot_task_usage() provides an approximate snapshot of the * usage of the potentially many members of the task. Since we don't * guarantee exactness, we don't acquire the p_lock of any of the member * processes. */ do { mutex_enter(&p->p_lock); tu->tu_utime += mstate_aggr_state(p, LMS_USER); tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); mutex_exit(&p->p_lock); tu->tu_minflt += p->p_ru.minflt; tu->tu_majflt += p->p_ru.majflt; tu->tu_sndmsg += p->p_ru.msgsnd; tu->tu_rcvmsg += p->p_ru.msgrcv; tu->tu_ioch += p->p_ru.ioch; tu->tu_iblk += p->p_ru.inblock; tu->tu_oblk += p->p_ru.oublock; tu->tu_vcsw += p->p_ru.nvcsw; tu->tu_icsw += p->p_ru.nivcsw; tu->tu_nsig += p->p_ru.nsignals; tu->tu_nswp += p->p_ru.nswap; tu->tu_nscl += p->p_ru.sysc; } while ((p = p->p_tasknext) != tk->tk_memb_list); /* * The resource usage accounted for so far will include that * contributed by the task's first process. If this process * came from another task, then its accumulated resource usage * will include a contribution from work performed there. * We must therefore subtract any resource usage that was * inherited with the first process. */ exacct_sub_task_mstate(tu, tk->tk_inherited); gethrestime(&ts); tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; } /* * void exacct_update_task_mstate(proc_t *) * * Overview * exacct_update_task_mstate() updates the task usage; it is intended * to be called from proc_exit(). * * Return values * None. * * Caller's context * p_lock must be held at entry. */ void exacct_update_task_mstate(proc_t *p) { task_usage_t *tu; mutex_enter(&p->p_task->tk_usage_lock); tu = p->p_task->tk_usage; tu->tu_utime += mstate_aggr_state(p, LMS_USER); tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM); tu->tu_minflt += p->p_ru.minflt; tu->tu_majflt += p->p_ru.majflt; tu->tu_sndmsg += p->p_ru.msgsnd; tu->tu_rcvmsg += p->p_ru.msgrcv; tu->tu_ioch += p->p_ru.ioch; tu->tu_iblk += p->p_ru.inblock; tu->tu_oblk += p->p_ru.oublock; tu->tu_vcsw += p->p_ru.nvcsw; tu->tu_icsw += p->p_ru.nivcsw; tu->tu_nsig += p->p_ru.nsignals; tu->tu_nswp += p->p_ru.nswap; tu->tu_nscl += p->p_ru.sysc; mutex_exit(&p->p_task->tk_usage_lock); } static void exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag) { timestruc_t ts; task_usage_t *tu_buf; switch (flag) { case EW_PARTIAL: /* * For partial records we must report the sum of current * accounting statistics with previously accumulated * statistics. */ mutex_enter(&pidlock); mutex_enter(&tk->tk_usage_lock); (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); exacct_snapshot_task_usage(tk, tu); mutex_exit(&tk->tk_usage_lock); mutex_exit(&pidlock); break; case EW_INTERVAL: /* * We need to allocate spare task_usage_t buffer before * grabbing pidlock because we might need it later in * exacct_get_interval_task_usage(). */ tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); mutex_enter(&pidlock); mutex_enter(&tk->tk_usage_lock); /* * For interval records, we deduct the previous microstate * accounting data and cpu usage times from previously saved * results and update the previous task usage structure. */ (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); exacct_snapshot_task_usage(tk, tu); exacct_get_interval_task_usage(tk, tu, &tu_buf); mutex_exit(&tk->tk_usage_lock); mutex_exit(&pidlock); if (tu_buf != NULL) kmem_free(tu_buf, sizeof (task_usage_t)); break; case EW_FINAL: /* * For final records, we deduct, from the task's current * usage, any usage that was inherited with the arrival * of a process from a previous task. We then record * the task's finish time. */ mutex_enter(&tk->tk_usage_lock); (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t)); exacct_sub_task_mstate(tu, tk->tk_inherited); mutex_exit(&tk->tk_usage_lock); gethrestime(&ts); tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; break; } } static int exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record, int res) { int attached = 1; switch (res) { case AC_TASK_TASKID: (void) ea_attach_item(record, &tk->tk_tkid, sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID); break; case AC_TASK_PROJID: (void) ea_attach_item(record, &tk->tk_proj->kpj_id, sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID); break; case AC_TASK_CPU: { timestruc_t ts; uint64_t ui; hrt2ts(tu->tu_stime, &ts); ui = ts.tv_sec; (void) ea_attach_item(record, &ui, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CPU_SYS_SEC); ui = ts.tv_nsec; (void) ea_attach_item(record, &ui, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC); hrt2ts(tu->tu_utime, &ts); ui = ts.tv_sec; (void) ea_attach_item(record, &ui, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CPU_USER_SEC); ui = ts.tv_nsec; (void) ea_attach_item(record, &ui, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CPU_USER_NSEC); } break; case AC_TASK_TIME: (void) ea_attach_item(record, &tu->tu_startsec, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC); (void) ea_attach_item(record, &tu->tu_startnsec, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC); (void) ea_attach_item(record, &tu->tu_finishsec, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC); (void) ea_attach_item(record, &tu->tu_finishnsec, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC); break; case AC_TASK_HOSTNAME: (void) ea_attach_item(record, tk->tk_zone->zone_nodename, strlen(tk->tk_zone->zone_nodename) + 1, EXT_STRING | EXD_TASK_HOSTNAME); break; case AC_TASK_MICROSTATE: (void) ea_attach_item(record, &tu->tu_majflt, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR); (void) ea_attach_item(record, &tu->tu_minflt, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR); (void) ea_attach_item(record, &tu->tu_sndmsg, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND); (void) ea_attach_item(record, &tu->tu_rcvmsg, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV); (void) ea_attach_item(record, &tu->tu_iblk, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN); (void) ea_attach_item(record, &tu->tu_oblk, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT); (void) ea_attach_item(record, &tu->tu_ioch, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR); (void) ea_attach_item(record, &tu->tu_vcsw, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL); (void) ea_attach_item(record, &tu->tu_icsw, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV); (void) ea_attach_item(record, &tu->tu_nsig, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS); (void) ea_attach_item(record, &tu->tu_nswp, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS); (void) ea_attach_item(record, &tu->tu_nscl, sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS); break; case AC_TASK_ANCTASKID: (void) ea_attach_item(record, &tu->tu_anctaskid, sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID); break; case AC_TASK_ZONENAME: (void) ea_attach_item(record, tk->tk_zone->zone_name, strlen(tk->tk_zone->zone_name) + 1, EXT_STRING | EXD_TASK_ZONENAME); break; default: attached = 0; } return (attached); } static ea_object_t * exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask, ea_catalog_t record_type) { int res, count; ea_object_t *record; /* * Assemble usage values into group. */ record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++) if (BT_TEST(mask, res)) count += exacct_attach_task_item(tk, tu, record, res); if (count == 0) { ea_free_object(record, EUP_ALLOC); record = NULL; } return (record); } /* * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *, * size_t, size_t *), void *, size_t, size_t *, int) * * Overview * exacct_assemble_task_usage() builds the packed exacct buffer for the * indicated task, executes the given callback function, and free the packed * buffer. * * Return values * Returns 0 on success; otherwise the appropriate error code is returned. * * Caller's context * Suitable for KM_SLEEP allocations. */ int exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk, int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), void *ubuf, size_t ubufsize, size_t *actual, int flag) { ulong_t mask[AC_MASK_SZ]; ea_object_t *task_record; ea_catalog_t record_type; task_usage_t *tu; void *buf; size_t bufsize; int ret; ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL); mutex_enter(&ac_task->ac_lock); if (ac_task->ac_state == AC_OFF) { mutex_exit(&ac_task->ac_lock); return (ENOTACTIVE); } bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ); mutex_exit(&ac_task->ac_lock); switch (flag) { case EW_FINAL: record_type = EXD_GROUP_TASK; break; case EW_PARTIAL: record_type = EXD_GROUP_TASK_PARTIAL; break; case EW_INTERVAL: record_type = EXD_GROUP_TASK_INTERVAL; break; } /* * Calculate task usage and assemble it into the task record. */ tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); exacct_calculate_task_usage(tk, tu, flag); task_record = exacct_assemble_task_record(tk, tu, mask, record_type); if (task_record == NULL) { /* * The current configuration of the accounting system has * resulted in records with no data; accordingly, we don't write * these, but we return success. */ kmem_free(tu, sizeof (task_usage_t)); return (0); } /* * Pack object into buffer and run callback on it. */ bufsize = ea_pack_object(task_record, NULL, 0); buf = kmem_alloc(bufsize, KM_SLEEP); (void) ea_pack_object(task_record, buf, bufsize); ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual); /* * Free all previously allocated structures. */ kmem_free(buf, bufsize); ea_free_object(task_record, EUP_ALLOC); kmem_free(tu, sizeof (task_usage_t)); return (ret); } /* * void exacct_commit_task(void *) * * Overview * exacct_commit_task() calculates the final usage for a task, updating the * task usage if task accounting is active, and writing a task record if task * accounting is active. exacct_commit_task() is intended for being called * from a task queue (taskq_t). * * Return values * None. * * Caller's context * Suitable for KM_SLEEP allocations. */ void exacct_commit_task(void *arg) { task_t *tk = (task_t *)arg; size_t size; zone_t *zone = tk->tk_zone; struct exacct_globals *acg; ASSERT(tk != task0p); ASSERT(tk->tk_memb_list == NULL); /* * Don't do any extra work if the acctctl module isn't loaded. */ if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) { acg = zone_getspecific(exacct_zone_key, zone); (void) exacct_assemble_task_usage(&acg->ac_task, tk, exacct_commit_callback, NULL, 0, &size, EW_FINAL); if (tk->tk_zone != global_zone) { acg = zone_getspecific(exacct_zone_key, global_zone); (void) exacct_assemble_task_usage(&acg->ac_task, tk, exacct_commit_callback, NULL, 0, &size, EW_FINAL); } } /* * Release associated project and finalize task. */ task_end(tk); } static int exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res) { int attached = 1; switch (res) { case AC_PROC_PID: (void) ea_attach_item(record, &pu->pu_pid, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID); break; case AC_PROC_UID: (void) ea_attach_item(record, &pu->pu_ruid, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID); break; case AC_PROC_FLAG: (void) ea_attach_item(record, &pu->pu_acflag, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS); break; case AC_PROC_GID: (void) ea_attach_item(record, &pu->pu_rgid, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID); break; case AC_PROC_PROJID: (void) ea_attach_item(record, &pu->pu_projid, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID); break; case AC_PROC_TASKID: (void) ea_attach_item(record, &pu->pu_taskid, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID); break; case AC_PROC_CPU: (void) ea_attach_item(record, &pu->pu_utimesec, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC); (void) ea_attach_item(record, &pu->pu_utimensec, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC); (void) ea_attach_item(record, &pu->pu_stimesec, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC); (void) ea_attach_item(record, &pu->pu_stimensec, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC); break; case AC_PROC_TIME: (void) ea_attach_item(record, &pu->pu_startsec, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC); (void) ea_attach_item(record, &pu->pu_startnsec, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC); (void) ea_attach_item(record, &pu->pu_finishsec, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC); (void) ea_attach_item(record, &pu->pu_finishnsec, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC); break; case AC_PROC_COMMAND: (void) ea_attach_item(record, pu->pu_command, strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND); break; case AC_PROC_HOSTNAME: (void) ea_attach_item(record, pu->pu_nodename, strlen(pu->pu_nodename) + 1, EXT_STRING | EXD_PROC_HOSTNAME); break; case AC_PROC_TTY: (void) ea_attach_item(record, &pu->pu_major, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR); (void) ea_attach_item(record, &pu->pu_minor, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR); break; case AC_PROC_MICROSTATE: (void) ea_attach_item(record, &pu->pu_majflt, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR); (void) ea_attach_item(record, &pu->pu_minflt, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR); (void) ea_attach_item(record, &pu->pu_sndmsg, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND); (void) ea_attach_item(record, &pu->pu_rcvmsg, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV); (void) ea_attach_item(record, &pu->pu_iblk, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN); (void) ea_attach_item(record, &pu->pu_oblk, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT); (void) ea_attach_item(record, &pu->pu_ioch, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR); (void) ea_attach_item(record, &pu->pu_vcsw, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL); (void) ea_attach_item(record, &pu->pu_icsw, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV); (void) ea_attach_item(record, &pu->pu_nsig, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS); (void) ea_attach_item(record, &pu->pu_nswp, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS); (void) ea_attach_item(record, &pu->pu_nscl, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS); break; case AC_PROC_ANCPID: (void) ea_attach_item(record, &pu->pu_ancpid, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID); break; case AC_PROC_WAIT_STATUS: (void) ea_attach_item(record, &pu->pu_wstat, sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS); break; case AC_PROC_ZONENAME: (void) ea_attach_item(record, pu->pu_zonename, strlen(pu->pu_zonename) + 1, EXT_STRING | EXD_PROC_ZONENAME); break; case AC_PROC_MEM: (void) ea_attach_item(record, &pu->pu_mem_rss_avg, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K); (void) ea_attach_item(record, &pu->pu_mem_rss_max, sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K); break; default: attached = 0; } return (attached); } static ea_object_t * exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask, ea_catalog_t record_type) { int res, count; ea_object_t *record; /* * Assemble usage values into group. */ record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++) if (BT_TEST(mask, res)) count += exacct_attach_proc_item(pu, record, res); if (count == 0) { ea_free_object(record, EUP_ALLOC); record = NULL; } return (record); } /* * The following two routines assume that process's p_lock is held or * exacct_commit_proc has been called from exit() when all lwps are stopped. */ static void exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu) { kthread_t *t; ASSERT(MUTEX_HELD(&p->p_lock)); if ((t = p->p_tlist) == NULL) return; do { pu->pu_minflt += t->t_lwp->lwp_ru.minflt; pu->pu_majflt += t->t_lwp->lwp_ru.majflt; pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd; pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv; pu->pu_ioch += t->t_lwp->lwp_ru.ioch; pu->pu_iblk += t->t_lwp->lwp_ru.inblock; pu->pu_oblk += t->t_lwp->lwp_ru.oublock; pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw; pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw; pu->pu_nsig += t->t_lwp->lwp_ru.nsignals; pu->pu_nswp += t->t_lwp->lwp_ru.nswap; pu->pu_nscl += t->t_lwp->lwp_ru.sysc; } while ((t = t->t_forw) != p->p_tlist); } static void exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu) { pu->pu_minflt = p->p_ru.minflt; pu->pu_majflt = p->p_ru.majflt; pu->pu_sndmsg = p->p_ru.msgsnd; pu->pu_rcvmsg = p->p_ru.msgrcv; pu->pu_ioch = p->p_ru.ioch; pu->pu_iblk = p->p_ru.inblock; pu->pu_oblk = p->p_ru.oublock; pu->pu_vcsw = p->p_ru.nvcsw; pu->pu_icsw = p->p_ru.nivcsw; pu->pu_nsig = p->p_ru.nsignals; pu->pu_nswp = p->p_ru.nswap; pu->pu_nscl = p->p_ru.sysc; } void exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask, int flag, int wstat) { timestruc_t ts, ts_run; ASSERT(MUTEX_HELD(&p->p_lock)); /* * Convert CPU and execution times to sec/nsec format. */ if (BT_TEST(mask, AC_PROC_CPU)) { hrt2ts(mstate_aggr_state(p, LMS_USER), &ts); pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec; pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec; hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts); pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec; pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec; } if (BT_TEST(mask, AC_PROC_TIME)) { gethrestime(&ts); pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec; pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec; hrt2ts(gethrtime() - p->p_mstart, &ts_run); ts.tv_sec -= ts_run.tv_sec; ts.tv_nsec -= ts_run.tv_nsec; if (ts.tv_nsec < 0) { ts.tv_sec--; if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) { ts.tv_sec++; ts.tv_nsec -= NANOSEC; } } pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec; pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec; } pu->pu_pid = p->p_pidp->pid_id; pu->pu_acflag = p->p_user.u_acflag; pu->pu_projid = p->p_task->tk_proj->kpj_id; pu->pu_taskid = p->p_task->tk_tkid; pu->pu_major = getmajor(p->p_sessp->s_dev); pu->pu_minor = getminor(p->p_sessp->s_dev); pu->pu_ancpid = p->p_ancpid; pu->pu_wstat = wstat; /* * Compute average RSS in K. The denominator is the number of * samples: the number of clock ticks plus the initial value. */ pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) * (PAGESIZE / 1024); pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024); mutex_enter(&p->p_crlock); pu->pu_ruid = crgetruid(p->p_cred); pu->pu_rgid = crgetrgid(p->p_cred); mutex_exit(&p->p_crlock); bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1); bcopy(p->p_zone->zone_name, pu->pu_zonename, strlen(p->p_zone->zone_name) + 1); bcopy(p->p_zone->zone_nodename, pu->pu_nodename, strlen(p->p_zone->zone_nodename) + 1); /* * Calculate microstate accounting data for a process that is still * running. Presently, we explicitly collect all of the LWP usage into * the proc usage structure here. */ if (flag & EW_PARTIAL) exacct_calculate_proc_mstate(p, pu); if (flag & EW_FINAL) exacct_copy_proc_mstate(p, pu); } /* * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void * *, size_t, size_t *), void *, size_t, size_t *) * * Overview * Assemble record with miscellaneous accounting information about the process * and execute the callback on it. It is the callback's job to set "actual" to * the size of record. * * Return values * The result of the callback function, unless the extended process accounting * feature is not active, in which case ENOTACTIVE is returned. * * Caller's context * Suitable for KM_SLEEP allocations. */ int exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu, int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), void *ubuf, size_t ubufsize, size_t *actual, int flag) { ulong_t mask[AC_MASK_SZ]; ea_object_t *proc_record; ea_catalog_t record_type; void *buf; size_t bufsize; int ret; ASSERT(flag == EW_FINAL || flag == EW_PARTIAL); mutex_enter(&ac_proc->ac_lock); if (ac_proc->ac_state == AC_OFF) { mutex_exit(&ac_proc->ac_lock); return (ENOTACTIVE); } bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); mutex_exit(&ac_proc->ac_lock); switch (flag) { case EW_FINAL: record_type = EXD_GROUP_PROC; break; case EW_PARTIAL: record_type = EXD_GROUP_PROC_PARTIAL; break; } proc_record = exacct_assemble_proc_record(pu, mask, record_type); if (proc_record == NULL) return (0); /* * Pack object into buffer and pass to callback. */ bufsize = ea_pack_object(proc_record, NULL, 0); buf = kmem_alloc(bufsize, KM_SLEEP); (void) ea_pack_object(proc_record, buf, bufsize); ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual); /* * Free all previously allocations. */ kmem_free(buf, bufsize); ea_free_object(proc_record, EUP_ALLOC); return (ret); } /* * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t, * size_t *) * * Overview * exacct_commit_callback() writes the indicated buffer to the indicated * extended accounting file. * * Return values * The result of the write operation is returned. "actual" is updated to * contain the number of bytes actually written. * * Caller's context * Suitable for a vn_rdwr() operation. */ /*ARGSUSED*/ int exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize, void *buf, size_t bufsize, size_t *actual) { int error = 0; *actual = 0; if ((error = exacct_vn_write(info, buf, bufsize)) == 0) *actual = bufsize; return (error); } static void exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat) { size_t size; proc_usage_t *pu; ulong_t mask[AC_MASK_SZ]; mutex_enter(&ac_proc->ac_lock); if (ac_proc->ac_state == AC_ON) { bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ); mutex_exit(&ac_proc->ac_lock); } else { mutex_exit(&ac_proc->ac_lock); return; } mutex_enter(&p->p_lock); size = strlen(p->p_user.u_comm) + 1; mutex_exit(&p->p_lock); pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP); pu->pu_command = kmem_alloc(size, KM_SLEEP); mutex_enter(&p->p_lock); exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat); mutex_exit(&p->p_lock); (void) exacct_assemble_proc_usage(ac_proc, pu, exacct_commit_callback, NULL, 0, &size, EW_FINAL); kmem_free(pu->pu_command, strlen(pu->pu_command) + 1); kmem_free(pu, sizeof (proc_usage_t)); } /* * void exacct_commit_proc(proc_t *, int) * * Overview * exacct_commit_proc() calculates the final usage for a process, updating the * task usage if task accounting is active, and writing a process record if * process accounting is active. exacct_commit_proc() is intended for being * called from proc_exit(). * * Return values * None. * * Caller's context * Suitable for KM_SLEEP allocations. p_lock must not be held at entry. */ void exacct_commit_proc(proc_t *p, int wstat) { zone_t *zone = p->p_zone; struct exacct_globals *acg, *gacg = NULL; if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { /* * acctctl module not loaded. Nothing to do. */ return; } acg = zone_getspecific(exacct_zone_key, zone); exacct_do_commit_proc(&acg->ac_proc, p, wstat); if (zone != global_zone) { gacg = zone_getspecific(exacct_zone_key, global_zone); exacct_do_commit_proc(&gacg->ac_proc, p, wstat); } } static int exacct_attach_netstat_item(net_stat_t *ns, ea_object_t *record, int res) { int attached = 1; switch (res) { case AC_NET_NAME: (void) ea_attach_item(record, ns->ns_name, strlen(ns->ns_name) + 1, EXT_STRING | EXD_NET_STATS_NAME); break; case AC_NET_CURTIME: { uint64_t now; timestruc_t ts; gethrestime(&ts); now = (uint64_t)(ulong_t)ts.tv_sec; (void) ea_attach_item(record, &now, sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_CURTIME); } break; case AC_NET_IBYTES: (void) ea_attach_item(record, &ns->ns_ibytes, sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IBYTES); break; case AC_NET_OBYTES: (void) ea_attach_item(record, &ns->ns_obytes, sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OBYTES); break; case AC_NET_IPKTS: (void) ea_attach_item(record, &ns->ns_ipackets, sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IPKTS); break; case AC_NET_OPKTS: (void) ea_attach_item(record, &ns->ns_opackets, sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OPKTS); break; case AC_NET_IERRPKTS: (void) ea_attach_item(record, &ns->ns_ierrors, sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IERRPKTS); break; case AC_NET_OERRPKTS: (void) ea_attach_item(record, &ns->ns_oerrors, sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OERRPKTS); break; default: attached = 0; } return (attached); } static int exacct_attach_netdesc_item(net_desc_t *nd, ea_object_t *record, int res) { int attached = 1; switch (res) { case AC_NET_NAME: (void) ea_attach_item(record, nd->nd_name, strlen(nd->nd_name) + 1, EXT_STRING | EXD_NET_DESC_NAME); break; case AC_NET_DEVNAME: (void) ea_attach_item(record, nd->nd_devname, strlen(nd->nd_devname) + 1, EXT_STRING | EXD_NET_DESC_DEVNAME); break; case AC_NET_EHOST: (void) ea_attach_item(record, &nd->nd_ehost, sizeof (nd->nd_ehost), EXT_RAW | EXD_NET_DESC_EHOST); break; case AC_NET_EDEST: (void) ea_attach_item(record, &nd->nd_edest, sizeof (nd->nd_edest), EXT_RAW | EXD_NET_DESC_EDEST); break; case AC_NET_VLAN_TPID: (void) ea_attach_item(record, &nd->nd_vlan_tpid, sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TPID); break; case AC_NET_VLAN_TCI: (void) ea_attach_item(record, &nd->nd_vlan_tci, sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TCI); break; case AC_NET_SAP: (void) ea_attach_item(record, &nd->nd_sap, sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_SAP); break; case AC_NET_PRIORITY: (void) ea_attach_item(record, &nd->nd_priority, sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_PRIORITY); break; case AC_NET_BWLIMIT: (void) ea_attach_item(record, &nd->nd_bw_limit, sizeof (uint64_t), EXT_UINT64 | EXD_NET_DESC_BWLIMIT); break; case AC_NET_SADDR: if (nd->nd_isv4) { (void) ea_attach_item(record, &nd->nd_saddr[3], sizeof (uint32_t), EXT_UINT32 | EXD_NET_DESC_V4SADDR); } else { (void) ea_attach_item(record, &nd->nd_saddr, sizeof (nd->nd_saddr), EXT_RAW | EXD_NET_DESC_V6SADDR); } break; case AC_NET_DADDR: if (nd->nd_isv4) { (void) ea_attach_item(record, &nd->nd_daddr[3], sizeof (uint32_t), EXT_UINT32 | EXD_NET_DESC_V4DADDR); } else { (void) ea_attach_item(record, &nd->nd_daddr, sizeof (nd->nd_daddr), EXT_RAW | EXD_NET_DESC_V6DADDR); } break; case AC_NET_SPORT: (void) ea_attach_item(record, &nd->nd_sport, sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_SPORT); break; case AC_NET_DPORT: (void) ea_attach_item(record, &nd->nd_dport, sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_DPORT); break; case AC_NET_PROTOCOL: (void) ea_attach_item(record, &nd->nd_protocol, sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_PROTOCOL); break; case AC_NET_DSFIELD: (void) ea_attach_item(record, &nd->nd_dsfield, sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_DSFIELD); break; default: attached = 0; } return (attached); } static ea_object_t * exacct_assemble_net_record(void *ninfo, ulong_t *mask, ea_catalog_t record_type, int what) { int res; int count; ea_object_t *record; /* * Assemble usage values into group. */ record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); for (res = 1, count = 0; res <= AC_NET_MAX_RES; res++) if (BT_TEST(mask, res)) { if (what == EX_NET_LNDESC_REC || what == EX_NET_FLDESC_REC) { count += exacct_attach_netdesc_item( (net_desc_t *)ninfo, record, res); } else { count += exacct_attach_netstat_item( (net_stat_t *)ninfo, record, res); } } if (count == 0) { ea_free_object(record, EUP_ALLOC); record = NULL; } return (record); } int exacct_assemble_net_usage(ac_info_t *ac_net, void *ninfo, int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), void *ubuf, size_t ubufsize, size_t *actual, int what) { ulong_t mask[AC_MASK_SZ]; ea_object_t *net_desc; ea_catalog_t record_type; void *buf; size_t bufsize; int ret; mutex_enter(&ac_net->ac_lock); if (ac_net->ac_state == AC_OFF) { mutex_exit(&ac_net->ac_lock); return (ENOTACTIVE); } bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); mutex_exit(&ac_net->ac_lock); switch (what) { case EX_NET_LNDESC_REC: record_type = EXD_GROUP_NET_LINK_DESC; break; case EX_NET_LNSTAT_REC: record_type = EXD_GROUP_NET_LINK_STATS; break; case EX_NET_FLDESC_REC: record_type = EXD_GROUP_NET_FLOW_DESC; break; case EX_NET_FLSTAT_REC: record_type = EXD_GROUP_NET_FLOW_STATS; break; } net_desc = exacct_assemble_net_record(ninfo, mask, record_type, what); if (net_desc == NULL) return (0); /* * Pack object into buffer and pass to callback. */ bufsize = ea_pack_object(net_desc, NULL, 0); buf = kmem_alloc(bufsize, KM_NOSLEEP); if (buf == NULL) return (ENOMEM); (void) ea_pack_object(net_desc, buf, bufsize); ret = callback(ac_net, ubuf, ubufsize, buf, bufsize, actual); /* * Free all previously allocations. */ kmem_free(buf, bufsize); ea_free_object(net_desc, EUP_ALLOC); return (ret); } int exacct_commit_netinfo(void *arg, int what) { size_t size; ulong_t mask[AC_MASK_SZ]; struct exacct_globals *acg; ac_info_t *ac_net; if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { /* * acctctl module not loaded. Nothing to do. */ return (ENOTACTIVE); } /* * Even though each zone nominally has its own flow accounting settings * (ac_flow), these are only maintained by and for the global zone. * * If this were to change in the future, this function should grow a * second zoneid (or zone) argument, and use the corresponding zone's * settings rather than always using those of the global zone. */ acg = zone_getspecific(exacct_zone_key, global_zone); ac_net = &acg->ac_net; mutex_enter(&ac_net->ac_lock); if (ac_net->ac_state == AC_OFF) { mutex_exit(&ac_net->ac_lock); return (ENOTACTIVE); } bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ); mutex_exit(&ac_net->ac_lock); return (exacct_assemble_net_usage(ac_net, arg, exacct_commit_callback, NULL, 0, &size, what)); } static int exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res) { int attached = 1; switch (res) { case AC_FLOW_SADDR: if (fu->fu_isv4) { (void) ea_attach_item(record, &fu->fu_saddr[3], sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR); } else { (void) ea_attach_item(record, &fu->fu_saddr, sizeof (fu->fu_saddr), EXT_RAW | EXD_FLOW_V6SADDR); } break; case AC_FLOW_DADDR: if (fu->fu_isv4) { (void) ea_attach_item(record, &fu->fu_daddr[3], sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR); } else { (void) ea_attach_item(record, &fu->fu_daddr, sizeof (fu->fu_daddr), EXT_RAW | EXD_FLOW_V6DADDR); } break; case AC_FLOW_SPORT: (void) ea_attach_item(record, &fu->fu_sport, sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT); break; case AC_FLOW_DPORT: (void) ea_attach_item(record, &fu->fu_dport, sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT); break; case AC_FLOW_PROTOCOL: (void) ea_attach_item(record, &fu->fu_protocol, sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL); break; case AC_FLOW_DSFIELD: (void) ea_attach_item(record, &fu->fu_dsfield, sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD); break; case AC_FLOW_CTIME: (void) ea_attach_item(record, &fu->fu_ctime, sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME); break; case AC_FLOW_LSEEN: (void) ea_attach_item(record, &fu->fu_lseen, sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN); break; case AC_FLOW_NBYTES: (void) ea_attach_item(record, &fu->fu_nbytes, sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES); break; case AC_FLOW_NPKTS: (void) ea_attach_item(record, &fu->fu_npackets, sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS); break; case AC_FLOW_PROJID: if (fu->fu_projid >= 0) { (void) ea_attach_item(record, &fu->fu_projid, sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID); } break; case AC_FLOW_UID: if (fu->fu_userid >= 0) { (void) ea_attach_item(record, &fu->fu_userid, sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID); } break; case AC_FLOW_ANAME: (void) ea_attach_item(record, fu->fu_aname, strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME); break; default: attached = 0; } return (attached); } static ea_object_t * exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask, ea_catalog_t record_type) { int res, count; ea_object_t *record; /* * Assemble usage values into group. */ record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type); for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++) if (BT_TEST(mask, res)) count += exacct_attach_flow_item(fu, record, res); if (count == 0) { ea_free_object(record, EUP_ALLOC); record = NULL; } return (record); } int exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu, int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *), void *ubuf, size_t ubufsize, size_t *actual) { ulong_t mask[AC_MASK_SZ]; ea_object_t *flow_usage; ea_catalog_t record_type; void *buf; size_t bufsize; int ret; mutex_enter(&ac_flow->ac_lock); if (ac_flow->ac_state == AC_OFF) { mutex_exit(&ac_flow->ac_lock); return (ENOTACTIVE); } bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); mutex_exit(&ac_flow->ac_lock); record_type = EXD_GROUP_FLOW; flow_usage = exacct_assemble_flow_record(fu, mask, record_type); if (flow_usage == NULL) { return (0); } /* * Pack object into buffer and pass to callback. */ bufsize = ea_pack_object(flow_usage, NULL, 0); buf = kmem_alloc(bufsize, KM_NOSLEEP); if (buf == NULL) { return (ENOMEM); } (void) ea_pack_object(flow_usage, buf, bufsize); ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual); /* * Free all previously allocations. */ kmem_free(buf, bufsize); ea_free_object(flow_usage, EUP_ALLOC); return (ret); } void exacct_commit_flow(void *arg) { flow_usage_t *f = (flow_usage_t *)arg; size_t size; ulong_t mask[AC_MASK_SZ]; struct exacct_globals *acg; ac_info_t *ac_flow; if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) { /* * acctctl module not loaded. Nothing to do. */ return; } /* * Even though each zone nominally has its own flow accounting settings * (ac_flow), these are only maintained by and for the global zone. * * If this were to change in the future, this function should grow a * second zoneid (or zone) argument, and use the corresponding zone's * settings rather than always using those of the global zone. */ acg = zone_getspecific(exacct_zone_key, global_zone); ac_flow = &acg->ac_flow; mutex_enter(&ac_flow->ac_lock); if (ac_flow->ac_state == AC_OFF) { mutex_exit(&ac_flow->ac_lock); return; } bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ); mutex_exit(&ac_flow->ac_lock); (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback, NULL, 0, &size); } /* * int exacct_tag_task(task_t *, void *, size_t, int) * * Overview * exacct_tag_task() provides the exacct record construction and writing * support required by putacct(2) for task entities. * * Return values * The result of the write operation is returned, unless the extended * accounting facility is not active, in which case ENOTACTIVE is returned. * * Caller's context * Suitable for KM_SLEEP allocations. */ int exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz, int flags) { int error = 0; void *buf; size_t bufsize; ea_catalog_t cat; ea_object_t *tag; mutex_enter(&ac_task->ac_lock); if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) { mutex_exit(&ac_task->ac_lock); return (ENOTACTIVE); } mutex_exit(&ac_task->ac_lock); tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG); (void) ea_attach_item(tag, &tk->tk_tkid, 0, EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0, EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); if (flags == EP_RAW) cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG; else cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG; (void) ea_attach_item(tag, ubuf, ubufsz, cat); bufsize = ea_pack_object(tag, NULL, 0); buf = kmem_alloc(bufsize, KM_SLEEP); (void) ea_pack_object(tag, buf, bufsize); error = exacct_vn_write(ac_task, buf, bufsize); kmem_free(buf, bufsize); ea_free_object(tag, EUP_ALLOC); return (error); } /* * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *) * * Overview * exacct_tag_proc() provides the exacct record construction and writing * support required by putacct(2) for processes. * * Return values * The result of the write operation is returned, unless the extended * accounting facility is not active, in which case ENOTACTIVE is returned. * * Caller's context * Suitable for KM_SLEEP allocations. */ int exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf, size_t ubufsz, int flags, const char *hostname) { int error = 0; void *buf; size_t bufsize; ea_catalog_t cat; ea_object_t *tag; mutex_enter(&ac_proc->ac_lock); if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) { mutex_exit(&ac_proc->ac_lock); return (ENOTACTIVE); } mutex_exit(&ac_proc->ac_lock); tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG); (void) ea_attach_item(tag, &pid, sizeof (uint32_t), EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID); (void) ea_attach_item(tag, &tkid, 0, EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID); (void) ea_attach_item(tag, (void *)hostname, 0, EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME); if (flags == EP_RAW) cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG; else cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG; (void) ea_attach_item(tag, ubuf, ubufsz, cat); bufsize = ea_pack_object(tag, NULL, 0); buf = kmem_alloc(bufsize, KM_SLEEP); (void) ea_pack_object(tag, buf, bufsize); error = exacct_vn_write(ac_proc, buf, bufsize); kmem_free(buf, bufsize); ea_free_object(tag, EUP_ALLOC); return (error); } /* * void exacct_init(void) * * Overview * Initialized the extended accounting subsystem. * * Return values * None. * * Caller's context * Suitable for KM_SLEEP allocations. */ void exacct_init() { exacct_queue = system_taskq; exacct_object_cache = kmem_cache_create("exacct_object_cache", sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0); } /* * exacct_snapshot_proc_mstate() copies a process's microstate accounting data * and resource usage counters into a given task_usage_t. It differs from * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t, * b) p_lock will have been acquired earlier in the call path and c) we * are here including the process's user and system times. */ static void exacct_snapshot_proc_mstate(proc_t *p, task_usage_t *tu) { tu->tu_utime = mstate_aggr_state(p, LMS_USER); tu->tu_stime = mstate_aggr_state(p, LMS_SYSTEM); tu->tu_minflt = p->p_ru.minflt; tu->tu_majflt = p->p_ru.majflt; tu->tu_sndmsg = p->p_ru.msgsnd; tu->tu_rcvmsg = p->p_ru.msgrcv; tu->tu_ioch = p->p_ru.ioch; tu->tu_iblk = p->p_ru.inblock; tu->tu_oblk = p->p_ru.oublock; tu->tu_vcsw = p->p_ru.nvcsw; tu->tu_icsw = p->p_ru.nivcsw; tu->tu_nsig = p->p_ru.nsignals; tu->tu_nswp = p->p_ru.nswap; tu->tu_nscl = p->p_ru.sysc; } /* * void exacct_move_mstate(proc_t *, task_t *, task_t *) * * Overview * exacct_move_mstate() is called by task_change() and accounts for * a process's resource usage when it is moved from one task to another. * * The process's usage at this point is recorded in the new task so * that it can be excluded from the calculation of resources consumed * by that task. * * The resource usage inherited by the new task is also added to the * aggregate maintained by the old task for processes that have exited. * * Return values * None. * * Caller's context * pidlock and p_lock held across exacct_move_mstate(). */ void exacct_move_mstate(proc_t *p, task_t *oldtk, task_t *newtk) { task_usage_t tu; /* Take a snapshot of this process's mstate and RU counters */ exacct_snapshot_proc_mstate(p, &tu); /* * Use the snapshot to increment the aggregate usage of the old * task, and the inherited usage of the new one. */ mutex_enter(&oldtk->tk_usage_lock); exacct_add_task_mstate(oldtk->tk_usage, &tu); mutex_exit(&oldtk->tk_usage_lock); mutex_enter(&newtk->tk_usage_lock); exacct_add_task_mstate(newtk->tk_inherited, &tu); mutex_exit(&newtk->tk_usage_lock); }