1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/exacct.h>
26 #include <sys/exacct_catalog.h>
27 #include <sys/disp.h>
28 #include <sys/task.h>
29 #include <sys/proc.h>
30 #include <sys/cmn_err.h>
31 #include <sys/kmem.h>
32 #include <sys/project.h>
33 #include <sys/systm.h>
34 #include <sys/vnode.h>
35 #include <sys/file.h>
36 #include <sys/acctctl.h>
37 #include <sys/time.h>
38 #include <sys/utsname.h>
39 #include <sys/session.h>
40 #include <sys/sysmacros.h>
41 #include <sys/bitmap.h>
42 #include <sys/msacct.h>
43
44 /*
45 * exacct usage and recording routines
46 *
47 * wracct(2), getacct(2), and the records written at process or task
48 * termination are constructed using the exacct_assemble_[task,proc]_usage()
49 * functions, which take a callback that takes the appropriate action on
50 * the packed exacct record for the task or process. For the process-related
51 * actions, we partition the routines such that the data collecting component
52 * can be performed while holding p_lock, and all sleeping or blocking
53 * operations can be performed without acquiring p_lock.
54 *
55 * putacct(2), which allows an application to construct a customized record
56 * associated with an existing process or task, has its own entry points:
57 * exacct_tag_task() and exacct_tag_proc().
58 */
59
60 taskq_t *exacct_queue;
61 kmem_cache_t *exacct_object_cache;
62
63 zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED;
64
65 static const uint32_t exacct_version = EXACCT_VERSION;
66 static const char exacct_header[] = "exacct";
67 static const char exacct_creator[] = "SunOS";
68
69 ea_object_t *
ea_alloc_item(ea_catalog_t catalog,void * buf,size_t bufsz)70 ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz)
71 {
72 ea_object_t *item;
73
74 item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP);
75 bzero(item, sizeof (ea_object_t));
76 (void) ea_set_item(item, catalog, buf, bufsz);
77 return (item);
78 }
79
80 ea_object_t *
ea_alloc_group(ea_catalog_t catalog)81 ea_alloc_group(ea_catalog_t catalog)
82 {
83 ea_object_t *group;
84
85 group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP);
86 bzero(group, sizeof (ea_object_t));
87 (void) ea_set_group(group, catalog);
88 return (group);
89 }
90
91 ea_object_t *
ea_attach_item(ea_object_t * grp,void * buf,size_t bufsz,ea_catalog_t catalog)92 ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog)
93 {
94 ea_object_t *item;
95
96 item = ea_alloc_item(catalog, buf, bufsz);
97 (void) ea_attach_to_group(grp, item);
98 return (item);
99 }
100
101 /*
102 * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract
103 * microstate accounting data and resource usage counters from one task_usage_t
104 * from those supplied in another. These functions do not operate on *all*
105 * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make
106 * sense.
107 */
108 static void
exacct_add_task_mstate(task_usage_t * tu,task_usage_t * delta)109 exacct_add_task_mstate(task_usage_t *tu, task_usage_t *delta)
110 {
111 tu->tu_utime += delta->tu_utime;
112 tu->tu_stime += delta->tu_stime;
113 tu->tu_minflt += delta->tu_minflt;
114 tu->tu_majflt += delta->tu_majflt;
115 tu->tu_sndmsg += delta->tu_sndmsg;
116 tu->tu_rcvmsg += delta->tu_rcvmsg;
117 tu->tu_ioch += delta->tu_ioch;
118 tu->tu_iblk += delta->tu_iblk;
119 tu->tu_oblk += delta->tu_oblk;
120 tu->tu_vcsw += delta->tu_vcsw;
121 tu->tu_icsw += delta->tu_icsw;
122 tu->tu_nsig += delta->tu_nsig;
123 tu->tu_nswp += delta->tu_nswp;
124 tu->tu_nscl += delta->tu_nscl;
125 }
126
127 /*
128 * See the comments for exacct_add_task_mstate(), above.
129 */
130 static void
exacct_sub_task_mstate(task_usage_t * tu,task_usage_t * delta)131 exacct_sub_task_mstate(task_usage_t *tu, task_usage_t *delta)
132 {
133 tu->tu_utime -= delta->tu_utime;
134 tu->tu_stime -= delta->tu_stime;
135 tu->tu_minflt -= delta->tu_minflt;
136 tu->tu_majflt -= delta->tu_majflt;
137 tu->tu_sndmsg -= delta->tu_sndmsg;
138 tu->tu_rcvmsg -= delta->tu_rcvmsg;
139 tu->tu_ioch -= delta->tu_ioch;
140 tu->tu_iblk -= delta->tu_iblk;
141 tu->tu_oblk -= delta->tu_oblk;
142 tu->tu_vcsw -= delta->tu_vcsw;
143 tu->tu_icsw -= delta->tu_icsw;
144 tu->tu_nsig -= delta->tu_nsig;
145 tu->tu_nswp -= delta->tu_nswp;
146 tu->tu_nscl -= delta->tu_nscl;
147 }
148
149 /*
150 * Wrapper for vn_rdwr() used by exacct_vn_write() and exacct_write_header()
151 * to write to the accounting file without corrupting it in case of an I/O or
152 * filesystem error.
153 */
154 static int
exacct_vn_write_impl(ac_info_t * info,void * buf,ssize_t bufsize)155 exacct_vn_write_impl(ac_info_t *info, void *buf, ssize_t bufsize)
156 {
157 int error;
158 ssize_t resid;
159 struct vattr va;
160
161 ASSERT(info != NULL);
162 ASSERT(info->ac_vnode != NULL);
163 ASSERT(MUTEX_HELD(&info->ac_lock));
164
165 /*
166 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting
167 * the present accounting file.
168 */
169 va.va_mask = AT_SIZE;
170 error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred, NULL);
171 if (error == 0) {
172 error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf,
173 bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T,
174 kcred, &resid);
175 if (error) {
176 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL);
177 } else if (resid != 0) {
178 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL);
179 error = ENOSPC;
180 }
181 }
182 return (error);
183 }
184
185 /*
186 * exacct_vn_write() safely writes to an accounting file. acctctl() prevents
187 * the two accounting vnodes from being equal, and the appropriate ac_lock is
188 * held across the call, so we're single threaded through this code for each
189 * file.
190 */
191 static int
exacct_vn_write(ac_info_t * info,void * buf,ssize_t bufsize)192 exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize)
193 {
194 int error;
195
196 if (info == NULL)
197 return (0);
198
199 mutex_enter(&info->ac_lock);
200
201 /*
202 * Don't do anything unless accounting file is set.
203 */
204 if (info->ac_vnode == NULL) {
205 mutex_exit(&info->ac_lock);
206 return (0);
207 }
208 error = exacct_vn_write_impl(info, buf, bufsize);
209 mutex_exit(&info->ac_lock);
210
211 return (error);
212 }
213
214 /*
215 * void *exacct_create_header(size_t *)
216 *
217 * Overview
218 * exacct_create_header() constructs an exacct file header identifying the
219 * accounting file as the output of the kernel. exacct_create_header() and
220 * the static write_header() and verify_header() routines in libexacct must
221 * remain synchronized.
222 *
223 * Return values
224 * A pointer to a packed exacct buffer containing the appropriate header is
225 * returned; the size of the buffer is placed in the location indicated by
226 * sizep.
227 *
228 * Caller's context
229 * Suitable for KM_SLEEP allocations.
230 */
231 void *
exacct_create_header(size_t * sizep)232 exacct_create_header(size_t *sizep)
233 {
234 ea_object_t *hdr_grp;
235 uint32_t bskip;
236 void *buf;
237 size_t bufsize;
238
239 hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER);
240 (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0,
241 EXT_UINT32 | EXC_DEFAULT | EXD_VERSION);
242 (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0,
243 EXT_STRING | EXC_DEFAULT | EXD_FILETYPE);
244 (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0,
245 EXT_STRING | EXC_DEFAULT | EXD_CREATOR);
246 (void) ea_attach_item(hdr_grp, uts_nodename(), 0,
247 EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME);
248
249 bufsize = ea_pack_object(hdr_grp, NULL, 0);
250 buf = kmem_alloc(bufsize, KM_SLEEP);
251 (void) ea_pack_object(hdr_grp, buf, bufsize);
252 ea_free_object(hdr_grp, EUP_ALLOC);
253
254 /*
255 * To prevent reading the header when reading the file backwards,
256 * set the large backskip of the header group to 0 (last 4 bytes).
257 */
258 bskip = 0;
259 exacct_order32(&bskip);
260 bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip),
261 sizeof (bskip));
262
263 *sizep = bufsize;
264 return (buf);
265 }
266
267 /*
268 * int exacct_write_header(ac_info_t *, void *, size_t)
269 *
270 * Overview
271 * exacct_write_header() writes the given header buffer to the indicated
272 * vnode.
273 *
274 * Return values
275 * The result of the write operation is returned.
276 *
277 * Caller's context
278 * Caller must hold the ac_lock of the appropriate accounting file
279 * information block (ac_info_t).
280 */
281 int
exacct_write_header(ac_info_t * info,void * hdr,size_t hdrsize)282 exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize)
283 {
284 if (info != NULL && info->ac_vnode != NULL)
285 return (exacct_vn_write_impl(info, hdr, hdrsize));
286
287 return (0);
288 }
289
290 static void
exacct_get_interval_task_usage(task_t * tk,task_usage_t * tu,task_usage_t ** tu_buf)291 exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu,
292 task_usage_t **tu_buf)
293 {
294 task_usage_t *oldtu, *newtu;
295 task_usage_t **prevusage;
296
297 ASSERT(MUTEX_HELD(&tk->tk_usage_lock));
298 if (getzoneid() != GLOBAL_ZONEID) {
299 prevusage = &tk->tk_zoneusage;
300 } else {
301 prevusage = &tk->tk_prevusage;
302 }
303 if ((oldtu = *prevusage) != NULL) {
304 /*
305 * In case we have any accounting information
306 * saved from the previous interval record.
307 */
308 newtu = *tu_buf;
309 bcopy(tu, newtu, sizeof (task_usage_t));
310 tu->tu_minflt -= oldtu->tu_minflt;
311 tu->tu_majflt -= oldtu->tu_majflt;
312 tu->tu_sndmsg -= oldtu->tu_sndmsg;
313 tu->tu_rcvmsg -= oldtu->tu_rcvmsg;
314 tu->tu_ioch -= oldtu->tu_ioch;
315 tu->tu_iblk -= oldtu->tu_iblk;
316 tu->tu_oblk -= oldtu->tu_oblk;
317 tu->tu_vcsw -= oldtu->tu_vcsw;
318 tu->tu_icsw -= oldtu->tu_icsw;
319 tu->tu_nsig -= oldtu->tu_nsig;
320 tu->tu_nswp -= oldtu->tu_nswp;
321 tu->tu_nscl -= oldtu->tu_nscl;
322 tu->tu_utime -= oldtu->tu_utime;
323 tu->tu_stime -= oldtu->tu_stime;
324
325 tu->tu_startsec = oldtu->tu_finishsec;
326 tu->tu_startnsec = oldtu->tu_finishnsec;
327 /*
328 * Copy the data from our temporary storage to the task's
329 * previous interval usage structure for future reference.
330 */
331 bcopy(newtu, oldtu, sizeof (task_usage_t));
332 } else {
333 /*
334 * Store current statistics in the task's previous interval
335 * usage structure for future references.
336 */
337 *prevusage = *tu_buf;
338 bcopy(tu, *prevusage, sizeof (task_usage_t));
339 *tu_buf = NULL;
340 }
341 }
342
343 static void
exacct_snapshot_task_usage(task_t * tk,task_usage_t * tu)344 exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu)
345 {
346 timestruc_t ts;
347 proc_t *p;
348
349 ASSERT(MUTEX_HELD(&pidlock));
350
351 if ((p = tk->tk_memb_list) == NULL)
352 return;
353
354 /*
355 * exacct_snapshot_task_usage() provides an approximate snapshot of the
356 * usage of the potentially many members of the task. Since we don't
357 * guarantee exactness, we don't acquire the p_lock of any of the member
358 * processes.
359 */
360 do {
361 mutex_enter(&p->p_lock);
362 tu->tu_utime += mstate_aggr_state(p, LMS_USER);
363 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM);
364 mutex_exit(&p->p_lock);
365 tu->tu_minflt += p->p_ru.minflt;
366 tu->tu_majflt += p->p_ru.majflt;
367 tu->tu_sndmsg += p->p_ru.msgsnd;
368 tu->tu_rcvmsg += p->p_ru.msgrcv;
369 tu->tu_ioch += p->p_ru.ioch;
370 tu->tu_iblk += p->p_ru.inblock;
371 tu->tu_oblk += p->p_ru.oublock;
372 tu->tu_vcsw += p->p_ru.nvcsw;
373 tu->tu_icsw += p->p_ru.nivcsw;
374 tu->tu_nsig += p->p_ru.nsignals;
375 tu->tu_nswp += p->p_ru.nswap;
376 tu->tu_nscl += p->p_ru.sysc;
377 } while ((p = p->p_tasknext) != tk->tk_memb_list);
378
379 /*
380 * The resource usage accounted for so far will include that
381 * contributed by the task's first process. If this process
382 * came from another task, then its accumulated resource usage
383 * will include a contribution from work performed there.
384 * We must therefore subtract any resource usage that was
385 * inherited with the first process.
386 */
387 exacct_sub_task_mstate(tu, tk->tk_inherited);
388
389 gethrestime(&ts);
390 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec;
391 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec;
392 }
393
394 /*
395 * void exacct_update_task_mstate(proc_t *)
396 *
397 * Overview
398 * exacct_update_task_mstate() updates the task usage; it is intended
399 * to be called from proc_exit().
400 *
401 * Return values
402 * None.
403 *
404 * Caller's context
405 * p_lock must be held at entry.
406 */
407 void
exacct_update_task_mstate(proc_t * p)408 exacct_update_task_mstate(proc_t *p)
409 {
410 task_usage_t *tu;
411
412 mutex_enter(&p->p_task->tk_usage_lock);
413 tu = p->p_task->tk_usage;
414 tu->tu_utime += mstate_aggr_state(p, LMS_USER);
415 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM);
416 tu->tu_minflt += p->p_ru.minflt;
417 tu->tu_majflt += p->p_ru.majflt;
418 tu->tu_sndmsg += p->p_ru.msgsnd;
419 tu->tu_rcvmsg += p->p_ru.msgrcv;
420 tu->tu_ioch += p->p_ru.ioch;
421 tu->tu_iblk += p->p_ru.inblock;
422 tu->tu_oblk += p->p_ru.oublock;
423 tu->tu_vcsw += p->p_ru.nvcsw;
424 tu->tu_icsw += p->p_ru.nivcsw;
425 tu->tu_nsig += p->p_ru.nsignals;
426 tu->tu_nswp += p->p_ru.nswap;
427 tu->tu_nscl += p->p_ru.sysc;
428 mutex_exit(&p->p_task->tk_usage_lock);
429 }
430
431 static void
exacct_calculate_task_usage(task_t * tk,task_usage_t * tu,int flag)432 exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag)
433 {
434 timestruc_t ts;
435 task_usage_t *tu_buf;
436
437 switch (flag) {
438 case EW_PARTIAL:
439 /*
440 * For partial records we must report the sum of current
441 * accounting statistics with previously accumulated
442 * statistics.
443 */
444 mutex_enter(&pidlock);
445 mutex_enter(&tk->tk_usage_lock);
446
447 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t));
448 exacct_snapshot_task_usage(tk, tu);
449
450 mutex_exit(&tk->tk_usage_lock);
451 mutex_exit(&pidlock);
452 break;
453 case EW_INTERVAL:
454 /*
455 * We need to allocate spare task_usage_t buffer before
456 * grabbing pidlock because we might need it later in
457 * exacct_get_interval_task_usage().
458 */
459 tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP);
460 mutex_enter(&pidlock);
461 mutex_enter(&tk->tk_usage_lock);
462
463 /*
464 * For interval records, we deduct the previous microstate
465 * accounting data and cpu usage times from previously saved
466 * results and update the previous task usage structure.
467 */
468 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t));
469 exacct_snapshot_task_usage(tk, tu);
470 exacct_get_interval_task_usage(tk, tu, &tu_buf);
471
472 mutex_exit(&tk->tk_usage_lock);
473 mutex_exit(&pidlock);
474
475 if (tu_buf != NULL)
476 kmem_free(tu_buf, sizeof (task_usage_t));
477 break;
478 case EW_FINAL:
479 /*
480 * For final records, we deduct, from the task's current
481 * usage, any usage that was inherited with the arrival
482 * of a process from a previous task. We then record
483 * the task's finish time.
484 */
485 mutex_enter(&tk->tk_usage_lock);
486 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t));
487 exacct_sub_task_mstate(tu, tk->tk_inherited);
488 mutex_exit(&tk->tk_usage_lock);
489
490 gethrestime(&ts);
491 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec;
492 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec;
493
494 break;
495 }
496 }
497
498 static int
exacct_attach_task_item(task_t * tk,task_usage_t * tu,ea_object_t * record,int res)499 exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record,
500 int res)
501 {
502 int attached = 1;
503
504 switch (res) {
505 case AC_TASK_TASKID:
506 (void) ea_attach_item(record, &tk->tk_tkid,
507 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID);
508 break;
509 case AC_TASK_PROJID:
510 (void) ea_attach_item(record, &tk->tk_proj->kpj_id,
511 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID);
512 break;
513 case AC_TASK_CPU: {
514 timestruc_t ts;
515 uint64_t ui;
516
517 hrt2ts(tu->tu_stime, &ts);
518 ui = ts.tv_sec;
519 (void) ea_attach_item(record, &ui, sizeof (uint64_t),
520 EXT_UINT64 | EXD_TASK_CPU_SYS_SEC);
521 ui = ts.tv_nsec;
522 (void) ea_attach_item(record, &ui, sizeof (uint64_t),
523 EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC);
524
525 hrt2ts(tu->tu_utime, &ts);
526 ui = ts.tv_sec;
527 (void) ea_attach_item(record, &ui, sizeof (uint64_t),
528 EXT_UINT64 | EXD_TASK_CPU_USER_SEC);
529 ui = ts.tv_nsec;
530 (void) ea_attach_item(record, &ui, sizeof (uint64_t),
531 EXT_UINT64 | EXD_TASK_CPU_USER_NSEC);
532 }
533 break;
534 case AC_TASK_TIME:
535 (void) ea_attach_item(record, &tu->tu_startsec,
536 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC);
537 (void) ea_attach_item(record, &tu->tu_startnsec,
538 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC);
539 (void) ea_attach_item(record, &tu->tu_finishsec,
540 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC);
541 (void) ea_attach_item(record, &tu->tu_finishnsec,
542 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC);
543 break;
544 case AC_TASK_HOSTNAME:
545 (void) ea_attach_item(record, tk->tk_zone->zone_nodename,
546 strlen(tk->tk_zone->zone_nodename) + 1,
547 EXT_STRING | EXD_TASK_HOSTNAME);
548 break;
549 case AC_TASK_MICROSTATE:
550 (void) ea_attach_item(record, &tu->tu_majflt,
551 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR);
552 (void) ea_attach_item(record, &tu->tu_minflt,
553 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR);
554 (void) ea_attach_item(record, &tu->tu_sndmsg,
555 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND);
556 (void) ea_attach_item(record, &tu->tu_rcvmsg,
557 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV);
558 (void) ea_attach_item(record, &tu->tu_iblk,
559 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN);
560 (void) ea_attach_item(record, &tu->tu_oblk,
561 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT);
562 (void) ea_attach_item(record, &tu->tu_ioch,
563 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR);
564 (void) ea_attach_item(record, &tu->tu_vcsw,
565 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL);
566 (void) ea_attach_item(record, &tu->tu_icsw,
567 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV);
568 (void) ea_attach_item(record, &tu->tu_nsig,
569 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS);
570 (void) ea_attach_item(record, &tu->tu_nswp,
571 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS);
572 (void) ea_attach_item(record, &tu->tu_nscl,
573 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS);
574 break;
575 case AC_TASK_ANCTASKID:
576 (void) ea_attach_item(record, &tu->tu_anctaskid,
577 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID);
578 break;
579 case AC_TASK_ZONENAME:
580 (void) ea_attach_item(record, tk->tk_zone->zone_name,
581 strlen(tk->tk_zone->zone_name) + 1,
582 EXT_STRING | EXD_TASK_ZONENAME);
583 break;
584 default:
585 attached = 0;
586 }
587 return (attached);
588 }
589
590 static ea_object_t *
exacct_assemble_task_record(task_t * tk,task_usage_t * tu,ulong_t * mask,ea_catalog_t record_type)591 exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask,
592 ea_catalog_t record_type)
593 {
594 int res, count;
595 ea_object_t *record;
596
597 /*
598 * Assemble usage values into group.
599 */
600 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type);
601 for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++)
602 if (BT_TEST(mask, res))
603 count += exacct_attach_task_item(tk, tu, record, res);
604 if (count == 0) {
605 ea_free_object(record, EUP_ALLOC);
606 record = NULL;
607 }
608 return (record);
609 }
610
611 /*
612 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *,
613 * size_t, size_t *), void *, size_t, size_t *, int)
614 *
615 * Overview
616 * exacct_assemble_task_usage() builds the packed exacct buffer for the
617 * indicated task, executes the given callback function, and free the packed
618 * buffer.
619 *
620 * Return values
621 * Returns 0 on success; otherwise the appropriate error code is returned.
622 *
623 * Caller's context
624 * Suitable for KM_SLEEP allocations.
625 */
626 int
exacct_assemble_task_usage(ac_info_t * ac_task,task_t * tk,int (* callback)(ac_info_t *,void *,size_t,void *,size_t,size_t *),void * ubuf,size_t ubufsize,size_t * actual,int flag)627 exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk,
628 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *),
629 void *ubuf, size_t ubufsize, size_t *actual, int flag)
630 {
631 ulong_t mask[AC_MASK_SZ];
632 ea_object_t *task_record;
633 ea_catalog_t record_type;
634 task_usage_t *tu;
635 void *buf;
636 size_t bufsize;
637 int ret;
638
639 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL);
640
641 mutex_enter(&ac_task->ac_lock);
642 if (ac_task->ac_state == AC_OFF) {
643 mutex_exit(&ac_task->ac_lock);
644 return (ENOTACTIVE);
645 }
646 bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ);
647 mutex_exit(&ac_task->ac_lock);
648
649 switch (flag) {
650 case EW_FINAL:
651 record_type = EXD_GROUP_TASK;
652 break;
653 case EW_PARTIAL:
654 record_type = EXD_GROUP_TASK_PARTIAL;
655 break;
656 case EW_INTERVAL:
657 record_type = EXD_GROUP_TASK_INTERVAL;
658 break;
659 }
660
661 /*
662 * Calculate task usage and assemble it into the task record.
663 */
664 tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP);
665 exacct_calculate_task_usage(tk, tu, flag);
666 task_record = exacct_assemble_task_record(tk, tu, mask, record_type);
667 if (task_record == NULL) {
668 /*
669 * The current configuration of the accounting system has
670 * resulted in records with no data; accordingly, we don't write
671 * these, but we return success.
672 */
673 kmem_free(tu, sizeof (task_usage_t));
674 return (0);
675 }
676
677 /*
678 * Pack object into buffer and run callback on it.
679 */
680 bufsize = ea_pack_object(task_record, NULL, 0);
681 buf = kmem_alloc(bufsize, KM_SLEEP);
682 (void) ea_pack_object(task_record, buf, bufsize);
683 ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual);
684
685 /*
686 * Free all previously allocated structures.
687 */
688 kmem_free(buf, bufsize);
689 ea_free_object(task_record, EUP_ALLOC);
690 kmem_free(tu, sizeof (task_usage_t));
691 return (ret);
692 }
693
694 /*
695 * void exacct_commit_task(void *)
696 *
697 * Overview
698 * exacct_commit_task() calculates the final usage for a task, updating the
699 * task usage if task accounting is active, and writing a task record if task
700 * accounting is active. exacct_commit_task() is intended for being called
701 * from a task queue (taskq_t).
702 *
703 * Return values
704 * None.
705 *
706 * Caller's context
707 * Suitable for KM_SLEEP allocations.
708 */
709
710 void
exacct_commit_task(void * arg)711 exacct_commit_task(void *arg)
712 {
713 task_t *tk = (task_t *)arg;
714 size_t size;
715 zone_t *zone = tk->tk_zone;
716 struct exacct_globals *acg;
717
718 ASSERT(tk != task0p);
719 ASSERT(tk->tk_memb_list == NULL);
720
721 /*
722 * Don't do any extra work if the acctctl module isn't loaded.
723 * If acctctl module is loaded when zone is in down state then
724 * zone_getspecific can return NULL for that zone.
725 */
726 if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) {
727 acg = zone_getspecific(exacct_zone_key, zone);
728 if (acg == NULL)
729 goto err;
730 (void) exacct_assemble_task_usage(&acg->ac_task, tk,
731 exacct_commit_callback, NULL, 0, &size, EW_FINAL);
732 if (tk->tk_zone != global_zone) {
733 acg = zone_getspecific(exacct_zone_key, global_zone);
734 (void) exacct_assemble_task_usage(&acg->ac_task, tk,
735 exacct_commit_callback, NULL, 0, &size, EW_FINAL);
736 }
737 }
738 /*
739 * Release associated project and finalize task.
740 */
741 err:
742 task_end(tk);
743 }
744
745 static int
exacct_attach_proc_item(proc_usage_t * pu,ea_object_t * record,int res)746 exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res)
747 {
748 int attached = 1;
749
750 switch (res) {
751 case AC_PROC_PID:
752 (void) ea_attach_item(record, &pu->pu_pid,
753 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID);
754 break;
755 case AC_PROC_UID:
756 (void) ea_attach_item(record, &pu->pu_ruid,
757 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID);
758 break;
759 case AC_PROC_FLAG:
760 (void) ea_attach_item(record, &pu->pu_acflag,
761 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS);
762 break;
763 case AC_PROC_GID:
764 (void) ea_attach_item(record, &pu->pu_rgid,
765 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID);
766 break;
767 case AC_PROC_PROJID:
768 (void) ea_attach_item(record, &pu->pu_projid,
769 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID);
770 break;
771 case AC_PROC_TASKID:
772 (void) ea_attach_item(record, &pu->pu_taskid,
773 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID);
774 break;
775 case AC_PROC_CPU:
776 (void) ea_attach_item(record, &pu->pu_utimesec,
777 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC);
778 (void) ea_attach_item(record, &pu->pu_utimensec,
779 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC);
780 (void) ea_attach_item(record, &pu->pu_stimesec,
781 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC);
782 (void) ea_attach_item(record, &pu->pu_stimensec,
783 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC);
784 break;
785 case AC_PROC_TIME:
786 (void) ea_attach_item(record, &pu->pu_startsec,
787 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC);
788 (void) ea_attach_item(record, &pu->pu_startnsec,
789 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC);
790 (void) ea_attach_item(record, &pu->pu_finishsec,
791 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC);
792 (void) ea_attach_item(record, &pu->pu_finishnsec,
793 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC);
794 break;
795 case AC_PROC_COMMAND:
796 (void) ea_attach_item(record, pu->pu_command,
797 strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND);
798 break;
799 case AC_PROC_HOSTNAME:
800 (void) ea_attach_item(record, pu->pu_nodename,
801 strlen(pu->pu_nodename) + 1,
802 EXT_STRING | EXD_PROC_HOSTNAME);
803 break;
804 case AC_PROC_TTY:
805 (void) ea_attach_item(record, &pu->pu_major,
806 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR);
807 (void) ea_attach_item(record, &pu->pu_minor,
808 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR);
809 break;
810 case AC_PROC_MICROSTATE:
811 (void) ea_attach_item(record, &pu->pu_majflt,
812 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR);
813 (void) ea_attach_item(record, &pu->pu_minflt,
814 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR);
815 (void) ea_attach_item(record, &pu->pu_sndmsg,
816 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND);
817 (void) ea_attach_item(record, &pu->pu_rcvmsg,
818 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV);
819 (void) ea_attach_item(record, &pu->pu_iblk,
820 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN);
821 (void) ea_attach_item(record, &pu->pu_oblk,
822 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT);
823 (void) ea_attach_item(record, &pu->pu_ioch,
824 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR);
825 (void) ea_attach_item(record, &pu->pu_vcsw,
826 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL);
827 (void) ea_attach_item(record, &pu->pu_icsw,
828 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV);
829 (void) ea_attach_item(record, &pu->pu_nsig,
830 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS);
831 (void) ea_attach_item(record, &pu->pu_nswp,
832 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS);
833 (void) ea_attach_item(record, &pu->pu_nscl,
834 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS);
835 break;
836 case AC_PROC_ANCPID:
837 (void) ea_attach_item(record, &pu->pu_ancpid,
838 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID);
839 break;
840 case AC_PROC_WAIT_STATUS:
841 (void) ea_attach_item(record, &pu->pu_wstat,
842 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS);
843 break;
844 case AC_PROC_ZONENAME:
845 (void) ea_attach_item(record, pu->pu_zonename,
846 strlen(pu->pu_zonename) + 1,
847 EXT_STRING | EXD_PROC_ZONENAME);
848 break;
849 case AC_PROC_MEM:
850 (void) ea_attach_item(record, &pu->pu_mem_rss_avg,
851 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K);
852 (void) ea_attach_item(record, &pu->pu_mem_rss_max,
853 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K);
854 break;
855 default:
856 attached = 0;
857 }
858 return (attached);
859 }
860
861 static ea_object_t *
exacct_assemble_proc_record(proc_usage_t * pu,ulong_t * mask,ea_catalog_t record_type)862 exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask,
863 ea_catalog_t record_type)
864 {
865 int res, count;
866 ea_object_t *record;
867
868 /*
869 * Assemble usage values into group.
870 */
871 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type);
872 for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++)
873 if (BT_TEST(mask, res))
874 count += exacct_attach_proc_item(pu, record, res);
875 if (count == 0) {
876 ea_free_object(record, EUP_ALLOC);
877 record = NULL;
878 }
879 return (record);
880 }
881
882 /*
883 * The following two routines assume that process's p_lock is held or
884 * exacct_commit_proc has been called from exit() when all lwps are stopped.
885 */
886 static void
exacct_calculate_proc_mstate(proc_t * p,proc_usage_t * pu)887 exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu)
888 {
889 kthread_t *t;
890
891 ASSERT(MUTEX_HELD(&p->p_lock));
892 if ((t = p->p_tlist) == NULL)
893 return;
894
895 do {
896 pu->pu_minflt += t->t_lwp->lwp_ru.minflt;
897 pu->pu_majflt += t->t_lwp->lwp_ru.majflt;
898 pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd;
899 pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv;
900 pu->pu_ioch += t->t_lwp->lwp_ru.ioch;
901 pu->pu_iblk += t->t_lwp->lwp_ru.inblock;
902 pu->pu_oblk += t->t_lwp->lwp_ru.oublock;
903 pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw;
904 pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw;
905 pu->pu_nsig += t->t_lwp->lwp_ru.nsignals;
906 pu->pu_nswp += t->t_lwp->lwp_ru.nswap;
907 pu->pu_nscl += t->t_lwp->lwp_ru.sysc;
908 } while ((t = t->t_forw) != p->p_tlist);
909 }
910
911 static void
exacct_copy_proc_mstate(proc_t * p,proc_usage_t * pu)912 exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu)
913 {
914 pu->pu_minflt = p->p_ru.minflt;
915 pu->pu_majflt = p->p_ru.majflt;
916 pu->pu_sndmsg = p->p_ru.msgsnd;
917 pu->pu_rcvmsg = p->p_ru.msgrcv;
918 pu->pu_ioch = p->p_ru.ioch;
919 pu->pu_iblk = p->p_ru.inblock;
920 pu->pu_oblk = p->p_ru.oublock;
921 pu->pu_vcsw = p->p_ru.nvcsw;
922 pu->pu_icsw = p->p_ru.nivcsw;
923 pu->pu_nsig = p->p_ru.nsignals;
924 pu->pu_nswp = p->p_ru.nswap;
925 pu->pu_nscl = p->p_ru.sysc;
926 }
927
928 void
exacct_calculate_proc_usage(proc_t * p,proc_usage_t * pu,ulong_t * mask,int flag,int wstat)929 exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask,
930 int flag, int wstat)
931 {
932 timestruc_t ts, ts_run;
933
934 ASSERT(MUTEX_HELD(&p->p_lock));
935
936 /*
937 * Convert CPU and execution times to sec/nsec format.
938 */
939 if (BT_TEST(mask, AC_PROC_CPU)) {
940 hrt2ts(mstate_aggr_state(p, LMS_USER), &ts);
941 pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec;
942 pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec;
943 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts);
944 pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec;
945 pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec;
946 }
947 if (BT_TEST(mask, AC_PROC_TIME)) {
948 gethrestime(&ts);
949 pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec;
950 pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec;
951 hrt2ts(gethrtime() - p->p_mstart, &ts_run);
952 ts.tv_sec -= ts_run.tv_sec;
953 ts.tv_nsec -= ts_run.tv_nsec;
954 if (ts.tv_nsec < 0) {
955 ts.tv_sec--;
956 if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) {
957 ts.tv_sec++;
958 ts.tv_nsec -= NANOSEC;
959 }
960 }
961 pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec;
962 pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec;
963 }
964
965 pu->pu_pid = p->p_pidp->pid_id;
966 pu->pu_acflag = p->p_user.u_acflag;
967 pu->pu_projid = p->p_task->tk_proj->kpj_id;
968 pu->pu_taskid = p->p_task->tk_tkid;
969 pu->pu_major = getmajor(p->p_sessp->s_dev);
970 pu->pu_minor = getminor(p->p_sessp->s_dev);
971 pu->pu_ancpid = p->p_ancpid;
972 pu->pu_wstat = wstat;
973 /*
974 * Compute average RSS in K. The denominator is the number of
975 * samples: the number of clock ticks plus the initial value.
976 */
977 pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) *
978 (PAGESIZE / 1024);
979 pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024);
980
981 mutex_enter(&p->p_crlock);
982 pu->pu_ruid = crgetruid(p->p_cred);
983 pu->pu_rgid = crgetrgid(p->p_cred);
984 mutex_exit(&p->p_crlock);
985
986 bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1);
987 bcopy(p->p_zone->zone_name, pu->pu_zonename,
988 strlen(p->p_zone->zone_name) + 1);
989 bcopy(p->p_zone->zone_nodename, pu->pu_nodename,
990 strlen(p->p_zone->zone_nodename) + 1);
991
992 /*
993 * Calculate microstate accounting data for a process that is still
994 * running. Presently, we explicitly collect all of the LWP usage into
995 * the proc usage structure here.
996 */
997 if (flag & EW_PARTIAL)
998 exacct_calculate_proc_mstate(p, pu);
999 if (flag & EW_FINAL)
1000 exacct_copy_proc_mstate(p, pu);
1001 }
1002
1003 /*
1004 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void
1005 * *, size_t, size_t *), void *, size_t, size_t *)
1006 *
1007 * Overview
1008 * Assemble record with miscellaneous accounting information about the process
1009 * and execute the callback on it. It is the callback's job to set "actual" to
1010 * the size of record.
1011 *
1012 * Return values
1013 * The result of the callback function, unless the extended process accounting
1014 * feature is not active, in which case ENOTACTIVE is returned.
1015 *
1016 * Caller's context
1017 * Suitable for KM_SLEEP allocations.
1018 */
1019 int
exacct_assemble_proc_usage(ac_info_t * ac_proc,proc_usage_t * pu,int (* callback)(ac_info_t *,void *,size_t,void *,size_t,size_t *),void * ubuf,size_t ubufsize,size_t * actual,int flag)1020 exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu,
1021 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *),
1022 void *ubuf, size_t ubufsize, size_t *actual, int flag)
1023 {
1024 ulong_t mask[AC_MASK_SZ];
1025 ea_object_t *proc_record;
1026 ea_catalog_t record_type;
1027 void *buf;
1028 size_t bufsize;
1029 int ret;
1030
1031 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL);
1032
1033 mutex_enter(&ac_proc->ac_lock);
1034 if (ac_proc->ac_state == AC_OFF) {
1035 mutex_exit(&ac_proc->ac_lock);
1036 return (ENOTACTIVE);
1037 }
1038 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ);
1039 mutex_exit(&ac_proc->ac_lock);
1040
1041 switch (flag) {
1042 case EW_FINAL:
1043 record_type = EXD_GROUP_PROC;
1044 break;
1045 case EW_PARTIAL:
1046 record_type = EXD_GROUP_PROC_PARTIAL;
1047 break;
1048 }
1049
1050 proc_record = exacct_assemble_proc_record(pu, mask, record_type);
1051 if (proc_record == NULL)
1052 return (0);
1053
1054 /*
1055 * Pack object into buffer and pass to callback.
1056 */
1057 bufsize = ea_pack_object(proc_record, NULL, 0);
1058 buf = kmem_alloc(bufsize, KM_SLEEP);
1059 (void) ea_pack_object(proc_record, buf, bufsize);
1060
1061 ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual);
1062
1063 /*
1064 * Free all previously allocations.
1065 */
1066 kmem_free(buf, bufsize);
1067 ea_free_object(proc_record, EUP_ALLOC);
1068 return (ret);
1069 }
1070
1071 /*
1072 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t,
1073 * size_t *)
1074 *
1075 * Overview
1076 * exacct_commit_callback() writes the indicated buffer to the indicated
1077 * extended accounting file.
1078 *
1079 * Return values
1080 * The result of the write operation is returned. "actual" is updated to
1081 * contain the number of bytes actually written.
1082 *
1083 * Caller's context
1084 * Suitable for a vn_rdwr() operation.
1085 */
1086 /*ARGSUSED*/
1087 int
exacct_commit_callback(ac_info_t * info,void * ubuf,size_t ubufsize,void * buf,size_t bufsize,size_t * actual)1088 exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize,
1089 void *buf, size_t bufsize, size_t *actual)
1090 {
1091 int error = 0;
1092
1093 *actual = 0;
1094 if ((error = exacct_vn_write(info, buf, bufsize)) == 0)
1095 *actual = bufsize;
1096 return (error);
1097 }
1098
1099 static void
exacct_do_commit_proc(ac_info_t * ac_proc,proc_t * p,int wstat)1100 exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat)
1101 {
1102 size_t size;
1103 proc_usage_t *pu;
1104 ulong_t mask[AC_MASK_SZ];
1105
1106 mutex_enter(&ac_proc->ac_lock);
1107 if (ac_proc->ac_state == AC_ON) {
1108 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ);
1109 mutex_exit(&ac_proc->ac_lock);
1110 } else {
1111 mutex_exit(&ac_proc->ac_lock);
1112 return;
1113 }
1114
1115 mutex_enter(&p->p_lock);
1116 size = strlen(p->p_user.u_comm) + 1;
1117 mutex_exit(&p->p_lock);
1118
1119 pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP);
1120 pu->pu_command = kmem_alloc(size, KM_SLEEP);
1121 mutex_enter(&p->p_lock);
1122 exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat);
1123 mutex_exit(&p->p_lock);
1124
1125 (void) exacct_assemble_proc_usage(ac_proc, pu,
1126 exacct_commit_callback, NULL, 0, &size, EW_FINAL);
1127
1128 kmem_free(pu->pu_command, strlen(pu->pu_command) + 1);
1129 kmem_free(pu, sizeof (proc_usage_t));
1130 }
1131
1132 /*
1133 * void exacct_commit_proc(proc_t *, int)
1134 *
1135 * Overview
1136 * exacct_commit_proc() calculates the final usage for a process, updating the
1137 * task usage if task accounting is active, and writing a process record if
1138 * process accounting is active. exacct_commit_proc() is intended for being
1139 * called from proc_exit().
1140 *
1141 * Return values
1142 * None.
1143 *
1144 * Caller's context
1145 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry.
1146 */
1147 void
exacct_commit_proc(proc_t * p,int wstat)1148 exacct_commit_proc(proc_t *p, int wstat)
1149 {
1150 zone_t *zone = p->p_zone;
1151 struct exacct_globals *acg, *gacg = NULL;
1152
1153 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) {
1154 /*
1155 * acctctl module not loaded. Nothing to do.
1156 */
1157 return;
1158 }
1159
1160 /*
1161 * If acctctl module is loaded when zone is in down state then
1162 * zone_getspecific can return NULL for that zone.
1163 */
1164 acg = zone_getspecific(exacct_zone_key, zone);
1165 if (acg == NULL)
1166 return;
1167 exacct_do_commit_proc(&acg->ac_proc, p, wstat);
1168 if (zone != global_zone) {
1169 gacg = zone_getspecific(exacct_zone_key, global_zone);
1170 exacct_do_commit_proc(&gacg->ac_proc, p, wstat);
1171 }
1172 }
1173
1174 static int
exacct_attach_netstat_item(net_stat_t * ns,ea_object_t * record,int res)1175 exacct_attach_netstat_item(net_stat_t *ns, ea_object_t *record, int res)
1176 {
1177 int attached = 1;
1178
1179 switch (res) {
1180 case AC_NET_NAME:
1181 (void) ea_attach_item(record, ns->ns_name,
1182 strlen(ns->ns_name) + 1, EXT_STRING | EXD_NET_STATS_NAME);
1183 break;
1184 case AC_NET_CURTIME:
1185 {
1186 uint64_t now;
1187 timestruc_t ts;
1188
1189 gethrestime(&ts);
1190 now = (uint64_t)(ulong_t)ts.tv_sec;
1191 (void) ea_attach_item(record, &now, sizeof (uint64_t),
1192 EXT_UINT64 | EXD_NET_STATS_CURTIME);
1193 }
1194 break;
1195 case AC_NET_IBYTES:
1196 (void) ea_attach_item(record, &ns->ns_ibytes,
1197 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IBYTES);
1198 break;
1199 case AC_NET_OBYTES:
1200 (void) ea_attach_item(record, &ns->ns_obytes,
1201 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OBYTES);
1202 break;
1203 case AC_NET_IPKTS:
1204 (void) ea_attach_item(record, &ns->ns_ipackets,
1205 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IPKTS);
1206 break;
1207 case AC_NET_OPKTS:
1208 (void) ea_attach_item(record, &ns->ns_opackets,
1209 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OPKTS);
1210 break;
1211 case AC_NET_IERRPKTS:
1212 (void) ea_attach_item(record, &ns->ns_ierrors,
1213 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IERRPKTS);
1214 break;
1215 case AC_NET_OERRPKTS:
1216 (void) ea_attach_item(record, &ns->ns_oerrors,
1217 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OERRPKTS);
1218 break;
1219 default:
1220 attached = 0;
1221 }
1222 return (attached);
1223 }
1224
1225 static int
exacct_attach_netdesc_item(net_desc_t * nd,ea_object_t * record,int res)1226 exacct_attach_netdesc_item(net_desc_t *nd, ea_object_t *record, int res)
1227 {
1228 int attached = 1;
1229
1230 switch (res) {
1231 case AC_NET_NAME:
1232 (void) ea_attach_item(record, nd->nd_name,
1233 strlen(nd->nd_name) + 1, EXT_STRING | EXD_NET_DESC_NAME);
1234 break;
1235 case AC_NET_DEVNAME:
1236 (void) ea_attach_item(record, nd->nd_devname,
1237 strlen(nd->nd_devname) + 1, EXT_STRING |
1238 EXD_NET_DESC_DEVNAME);
1239 break;
1240 case AC_NET_EHOST:
1241 (void) ea_attach_item(record, &nd->nd_ehost,
1242 sizeof (nd->nd_ehost), EXT_RAW | EXD_NET_DESC_EHOST);
1243 break;
1244 case AC_NET_EDEST:
1245 (void) ea_attach_item(record, &nd->nd_edest,
1246 sizeof (nd->nd_edest), EXT_RAW | EXD_NET_DESC_EDEST);
1247 break;
1248 case AC_NET_VLAN_TPID:
1249 (void) ea_attach_item(record, &nd->nd_vlan_tpid,
1250 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TPID);
1251 break;
1252 case AC_NET_VLAN_TCI:
1253 (void) ea_attach_item(record, &nd->nd_vlan_tci,
1254 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TCI);
1255 break;
1256 case AC_NET_SAP:
1257 (void) ea_attach_item(record, &nd->nd_sap,
1258 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_SAP);
1259 break;
1260 case AC_NET_PRIORITY:
1261 (void) ea_attach_item(record, &nd->nd_priority,
1262 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_PRIORITY);
1263 break;
1264 case AC_NET_BWLIMIT:
1265 (void) ea_attach_item(record, &nd->nd_bw_limit,
1266 sizeof (uint64_t), EXT_UINT64 | EXD_NET_DESC_BWLIMIT);
1267 break;
1268 case AC_NET_SADDR:
1269 if (nd->nd_isv4) {
1270 (void) ea_attach_item(record, &nd->nd_saddr[3],
1271 sizeof (uint32_t), EXT_UINT32 |
1272 EXD_NET_DESC_V4SADDR);
1273 } else {
1274 (void) ea_attach_item(record, &nd->nd_saddr,
1275 sizeof (nd->nd_saddr), EXT_RAW |
1276 EXD_NET_DESC_V6SADDR);
1277 }
1278 break;
1279 case AC_NET_DADDR:
1280 if (nd->nd_isv4) {
1281 (void) ea_attach_item(record, &nd->nd_daddr[3],
1282 sizeof (uint32_t), EXT_UINT32 |
1283 EXD_NET_DESC_V4DADDR);
1284 } else {
1285 (void) ea_attach_item(record, &nd->nd_daddr,
1286 sizeof (nd->nd_daddr), EXT_RAW |
1287 EXD_NET_DESC_V6DADDR);
1288 }
1289 break;
1290 case AC_NET_SPORT:
1291 (void) ea_attach_item(record, &nd->nd_sport,
1292 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_SPORT);
1293 break;
1294 case AC_NET_DPORT:
1295 (void) ea_attach_item(record, &nd->nd_dport,
1296 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_DPORT);
1297 break;
1298 case AC_NET_PROTOCOL:
1299 (void) ea_attach_item(record, &nd->nd_protocol,
1300 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_PROTOCOL);
1301 break;
1302 case AC_NET_DSFIELD:
1303 (void) ea_attach_item(record, &nd->nd_dsfield,
1304 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_DSFIELD);
1305 break;
1306 default:
1307 attached = 0;
1308 }
1309 return (attached);
1310 }
1311
1312 static ea_object_t *
exacct_assemble_net_record(void * ninfo,ulong_t * mask,ea_catalog_t record_type,int what)1313 exacct_assemble_net_record(void *ninfo, ulong_t *mask, ea_catalog_t record_type,
1314 int what)
1315 {
1316 int res;
1317 int count;
1318 ea_object_t *record;
1319
1320 /*
1321 * Assemble usage values into group.
1322 */
1323 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type);
1324 for (res = 1, count = 0; res <= AC_NET_MAX_RES; res++)
1325 if (BT_TEST(mask, res)) {
1326 if (what == EX_NET_LNDESC_REC ||
1327 what == EX_NET_FLDESC_REC) {
1328 count += exacct_attach_netdesc_item(
1329 (net_desc_t *)ninfo, record, res);
1330 } else {
1331 count += exacct_attach_netstat_item(
1332 (net_stat_t *)ninfo, record, res);
1333 }
1334 }
1335 if (count == 0) {
1336 ea_free_object(record, EUP_ALLOC);
1337 record = NULL;
1338 }
1339 return (record);
1340 }
1341
1342 int
exacct_assemble_net_usage(ac_info_t * ac_net,void * ninfo,int (* callback)(ac_info_t *,void *,size_t,void *,size_t,size_t *),void * ubuf,size_t ubufsize,size_t * actual,int what)1343 exacct_assemble_net_usage(ac_info_t *ac_net, void *ninfo,
1344 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *),
1345 void *ubuf, size_t ubufsize, size_t *actual, int what)
1346 {
1347 ulong_t mask[AC_MASK_SZ];
1348 ea_object_t *net_desc;
1349 ea_catalog_t record_type;
1350 void *buf;
1351 size_t bufsize;
1352 int ret;
1353
1354 mutex_enter(&ac_net->ac_lock);
1355 if (ac_net->ac_state == AC_OFF) {
1356 mutex_exit(&ac_net->ac_lock);
1357 return (ENOTACTIVE);
1358 }
1359 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ);
1360 mutex_exit(&ac_net->ac_lock);
1361
1362 switch (what) {
1363 case EX_NET_LNDESC_REC:
1364 record_type = EXD_GROUP_NET_LINK_DESC;
1365 break;
1366 case EX_NET_LNSTAT_REC:
1367 record_type = EXD_GROUP_NET_LINK_STATS;
1368 break;
1369 case EX_NET_FLDESC_REC:
1370 record_type = EXD_GROUP_NET_FLOW_DESC;
1371 break;
1372 case EX_NET_FLSTAT_REC:
1373 record_type = EXD_GROUP_NET_FLOW_STATS;
1374 break;
1375 }
1376
1377 net_desc = exacct_assemble_net_record(ninfo, mask, record_type, what);
1378 if (net_desc == NULL)
1379 return (0);
1380
1381 /*
1382 * Pack object into buffer and pass to callback.
1383 */
1384 bufsize = ea_pack_object(net_desc, NULL, 0);
1385 buf = kmem_alloc(bufsize, KM_NOSLEEP);
1386 if (buf == NULL)
1387 return (ENOMEM);
1388
1389 (void) ea_pack_object(net_desc, buf, bufsize);
1390
1391 ret = callback(ac_net, ubuf, ubufsize, buf, bufsize, actual);
1392
1393 /*
1394 * Free all previously allocations.
1395 */
1396 kmem_free(buf, bufsize);
1397 ea_free_object(net_desc, EUP_ALLOC);
1398 return (ret);
1399 }
1400
1401 int
exacct_commit_netinfo(void * arg,int what)1402 exacct_commit_netinfo(void *arg, int what)
1403 {
1404 size_t size;
1405 ulong_t mask[AC_MASK_SZ];
1406 struct exacct_globals *acg;
1407 ac_info_t *ac_net;
1408
1409 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) {
1410 /*
1411 * acctctl module not loaded. Nothing to do.
1412 */
1413 return (ENOTACTIVE);
1414 }
1415
1416 /*
1417 * Even though each zone nominally has its own flow accounting settings
1418 * (ac_flow), these are only maintained by and for the global zone.
1419 *
1420 * If this were to change in the future, this function should grow a
1421 * second zoneid (or zone) argument, and use the corresponding zone's
1422 * settings rather than always using those of the global zone.
1423 */
1424 acg = zone_getspecific(exacct_zone_key, global_zone);
1425 ac_net = &acg->ac_net;
1426
1427 mutex_enter(&ac_net->ac_lock);
1428 if (ac_net->ac_state == AC_OFF) {
1429 mutex_exit(&ac_net->ac_lock);
1430 return (ENOTACTIVE);
1431 }
1432 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ);
1433 mutex_exit(&ac_net->ac_lock);
1434
1435 return (exacct_assemble_net_usage(ac_net, arg, exacct_commit_callback,
1436 NULL, 0, &size, what));
1437 }
1438
1439 static int
exacct_attach_flow_item(flow_usage_t * fu,ea_object_t * record,int res)1440 exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res)
1441 {
1442 int attached = 1;
1443
1444 switch (res) {
1445 case AC_FLOW_SADDR:
1446 if (fu->fu_isv4) {
1447 (void) ea_attach_item(record, &fu->fu_saddr[3],
1448 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR);
1449 } else {
1450 (void) ea_attach_item(record, &fu->fu_saddr,
1451 sizeof (fu->fu_saddr), EXT_RAW |
1452 EXD_FLOW_V6SADDR);
1453 }
1454 break;
1455 case AC_FLOW_DADDR:
1456 if (fu->fu_isv4) {
1457 (void) ea_attach_item(record, &fu->fu_daddr[3],
1458 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR);
1459 } else {
1460 (void) ea_attach_item(record, &fu->fu_daddr,
1461 sizeof (fu->fu_daddr), EXT_RAW |
1462 EXD_FLOW_V6DADDR);
1463 }
1464 break;
1465 case AC_FLOW_SPORT:
1466 (void) ea_attach_item(record, &fu->fu_sport,
1467 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT);
1468 break;
1469 case AC_FLOW_DPORT:
1470 (void) ea_attach_item(record, &fu->fu_dport,
1471 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT);
1472 break;
1473 case AC_FLOW_PROTOCOL:
1474 (void) ea_attach_item(record, &fu->fu_protocol,
1475 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL);
1476 break;
1477 case AC_FLOW_DSFIELD:
1478 (void) ea_attach_item(record, &fu->fu_dsfield,
1479 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD);
1480 break;
1481 case AC_FLOW_CTIME:
1482 (void) ea_attach_item(record, &fu->fu_ctime,
1483 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME);
1484 break;
1485 case AC_FLOW_LSEEN:
1486 (void) ea_attach_item(record, &fu->fu_lseen,
1487 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN);
1488 break;
1489 case AC_FLOW_NBYTES:
1490 (void) ea_attach_item(record, &fu->fu_nbytes,
1491 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES);
1492 break;
1493 case AC_FLOW_NPKTS:
1494 (void) ea_attach_item(record, &fu->fu_npackets,
1495 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS);
1496 break;
1497 case AC_FLOW_PROJID:
1498 if (fu->fu_projid >= 0) {
1499 (void) ea_attach_item(record, &fu->fu_projid,
1500 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID);
1501 }
1502 break;
1503 case AC_FLOW_UID:
1504 if (fu->fu_userid >= 0) {
1505 (void) ea_attach_item(record, &fu->fu_userid,
1506 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID);
1507 }
1508 break;
1509 case AC_FLOW_ANAME:
1510 (void) ea_attach_item(record, fu->fu_aname,
1511 strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME);
1512 break;
1513 default:
1514 attached = 0;
1515 }
1516 return (attached);
1517 }
1518
1519 static ea_object_t *
exacct_assemble_flow_record(flow_usage_t * fu,ulong_t * mask,ea_catalog_t record_type)1520 exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask,
1521 ea_catalog_t record_type)
1522 {
1523 int res, count;
1524 ea_object_t *record;
1525
1526 /*
1527 * Assemble usage values into group.
1528 */
1529 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type);
1530 for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++)
1531 if (BT_TEST(mask, res))
1532 count += exacct_attach_flow_item(fu, record, res);
1533 if (count == 0) {
1534 ea_free_object(record, EUP_ALLOC);
1535 record = NULL;
1536 }
1537 return (record);
1538 }
1539
1540 int
exacct_assemble_flow_usage(ac_info_t * ac_flow,flow_usage_t * fu,int (* callback)(ac_info_t *,void *,size_t,void *,size_t,size_t *),void * ubuf,size_t ubufsize,size_t * actual)1541 exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu,
1542 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *),
1543 void *ubuf, size_t ubufsize, size_t *actual)
1544 {
1545 ulong_t mask[AC_MASK_SZ];
1546 ea_object_t *flow_usage;
1547 ea_catalog_t record_type;
1548 void *buf;
1549 size_t bufsize;
1550 int ret;
1551
1552 mutex_enter(&ac_flow->ac_lock);
1553 if (ac_flow->ac_state == AC_OFF) {
1554 mutex_exit(&ac_flow->ac_lock);
1555 return (ENOTACTIVE);
1556 }
1557 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ);
1558 mutex_exit(&ac_flow->ac_lock);
1559
1560 record_type = EXD_GROUP_FLOW;
1561
1562 flow_usage = exacct_assemble_flow_record(fu, mask, record_type);
1563 if (flow_usage == NULL) {
1564 return (0);
1565 }
1566
1567 /*
1568 * Pack object into buffer and pass to callback.
1569 */
1570 bufsize = ea_pack_object(flow_usage, NULL, 0);
1571 buf = kmem_alloc(bufsize, KM_NOSLEEP);
1572 if (buf == NULL) {
1573 return (ENOMEM);
1574 }
1575
1576 (void) ea_pack_object(flow_usage, buf, bufsize);
1577
1578 ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual);
1579
1580 /*
1581 * Free all previously allocations.
1582 */
1583 kmem_free(buf, bufsize);
1584 ea_free_object(flow_usage, EUP_ALLOC);
1585 return (ret);
1586 }
1587
1588 void
exacct_commit_flow(void * arg)1589 exacct_commit_flow(void *arg)
1590 {
1591 flow_usage_t *f = (flow_usage_t *)arg;
1592 size_t size;
1593 ulong_t mask[AC_MASK_SZ];
1594 struct exacct_globals *acg;
1595 ac_info_t *ac_flow;
1596
1597 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) {
1598 /*
1599 * acctctl module not loaded. Nothing to do.
1600 */
1601 return;
1602 }
1603
1604 /*
1605 * Even though each zone nominally has its own flow accounting settings
1606 * (ac_flow), these are only maintained by and for the global zone.
1607 *
1608 * If this were to change in the future, this function should grow a
1609 * second zoneid (or zone) argument, and use the corresponding zone's
1610 * settings rather than always using those of the global zone.
1611 */
1612 acg = zone_getspecific(exacct_zone_key, global_zone);
1613 ac_flow = &acg->ac_flow;
1614
1615 mutex_enter(&ac_flow->ac_lock);
1616 if (ac_flow->ac_state == AC_OFF) {
1617 mutex_exit(&ac_flow->ac_lock);
1618 return;
1619 }
1620 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ);
1621 mutex_exit(&ac_flow->ac_lock);
1622
1623 (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback,
1624 NULL, 0, &size);
1625 }
1626
1627 /*
1628 * int exacct_tag_task(task_t *, void *, size_t, int)
1629 *
1630 * Overview
1631 * exacct_tag_task() provides the exacct record construction and writing
1632 * support required by putacct(2) for task entities.
1633 *
1634 * Return values
1635 * The result of the write operation is returned, unless the extended
1636 * accounting facility is not active, in which case ENOTACTIVE is returned.
1637 *
1638 * Caller's context
1639 * Suitable for KM_SLEEP allocations.
1640 */
1641 int
exacct_tag_task(ac_info_t * ac_task,task_t * tk,void * ubuf,size_t ubufsz,int flags)1642 exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz,
1643 int flags)
1644 {
1645 int error = 0;
1646 void *buf;
1647 size_t bufsize;
1648 ea_catalog_t cat;
1649 ea_object_t *tag;
1650
1651 mutex_enter(&ac_task->ac_lock);
1652 if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) {
1653 mutex_exit(&ac_task->ac_lock);
1654 return (ENOTACTIVE);
1655 }
1656 mutex_exit(&ac_task->ac_lock);
1657
1658 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG);
1659 (void) ea_attach_item(tag, &tk->tk_tkid, 0,
1660 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID);
1661 (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0,
1662 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME);
1663 if (flags == EP_RAW)
1664 cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG;
1665 else
1666 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG;
1667 (void) ea_attach_item(tag, ubuf, ubufsz, cat);
1668
1669 bufsize = ea_pack_object(tag, NULL, 0);
1670 buf = kmem_alloc(bufsize, KM_SLEEP);
1671 (void) ea_pack_object(tag, buf, bufsize);
1672 error = exacct_vn_write(ac_task, buf, bufsize);
1673 kmem_free(buf, bufsize);
1674 ea_free_object(tag, EUP_ALLOC);
1675 return (error);
1676 }
1677
1678 /*
1679 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *)
1680 *
1681 * Overview
1682 * exacct_tag_proc() provides the exacct record construction and writing
1683 * support required by putacct(2) for processes.
1684 *
1685 * Return values
1686 * The result of the write operation is returned, unless the extended
1687 * accounting facility is not active, in which case ENOTACTIVE is returned.
1688 *
1689 * Caller's context
1690 * Suitable for KM_SLEEP allocations.
1691 */
1692 int
exacct_tag_proc(ac_info_t * ac_proc,pid_t pid,taskid_t tkid,void * ubuf,size_t ubufsz,int flags,const char * hostname)1693 exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf,
1694 size_t ubufsz, int flags, const char *hostname)
1695 {
1696 int error = 0;
1697 void *buf;
1698 size_t bufsize;
1699 ea_catalog_t cat;
1700 ea_object_t *tag;
1701
1702 mutex_enter(&ac_proc->ac_lock);
1703 if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) {
1704 mutex_exit(&ac_proc->ac_lock);
1705 return (ENOTACTIVE);
1706 }
1707 mutex_exit(&ac_proc->ac_lock);
1708
1709 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG);
1710 (void) ea_attach_item(tag, &pid, sizeof (uint32_t),
1711 EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID);
1712 (void) ea_attach_item(tag, &tkid, 0,
1713 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID);
1714 (void) ea_attach_item(tag, (void *)hostname, 0,
1715 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME);
1716 if (flags == EP_RAW)
1717 cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG;
1718 else
1719 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG;
1720 (void) ea_attach_item(tag, ubuf, ubufsz, cat);
1721
1722 bufsize = ea_pack_object(tag, NULL, 0);
1723 buf = kmem_alloc(bufsize, KM_SLEEP);
1724 (void) ea_pack_object(tag, buf, bufsize);
1725 error = exacct_vn_write(ac_proc, buf, bufsize);
1726 kmem_free(buf, bufsize);
1727 ea_free_object(tag, EUP_ALLOC);
1728 return (error);
1729 }
1730
1731 /*
1732 * void exacct_init(void)
1733 *
1734 * Overview
1735 * Initialized the extended accounting subsystem.
1736 *
1737 * Return values
1738 * None.
1739 *
1740 * Caller's context
1741 * Suitable for KM_SLEEP allocations.
1742 */
1743 void
exacct_init()1744 exacct_init()
1745 {
1746 exacct_queue = system_taskq;
1747 exacct_object_cache = kmem_cache_create("exacct_object_cache",
1748 sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
1749 task_commit_thread_init();
1750 }
1751
1752 /*
1753 * exacct_snapshot_proc_mstate() copies a process's microstate accounting data
1754 * and resource usage counters into a given task_usage_t. It differs from
1755 * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t,
1756 * b) p_lock will have been acquired earlier in the call path and c) we
1757 * are here including the process's user and system times.
1758 */
1759 static void
exacct_snapshot_proc_mstate(proc_t * p,task_usage_t * tu)1760 exacct_snapshot_proc_mstate(proc_t *p, task_usage_t *tu)
1761 {
1762 tu->tu_utime = mstate_aggr_state(p, LMS_USER);
1763 tu->tu_stime = mstate_aggr_state(p, LMS_SYSTEM);
1764 tu->tu_minflt = p->p_ru.minflt;
1765 tu->tu_majflt = p->p_ru.majflt;
1766 tu->tu_sndmsg = p->p_ru.msgsnd;
1767 tu->tu_rcvmsg = p->p_ru.msgrcv;
1768 tu->tu_ioch = p->p_ru.ioch;
1769 tu->tu_iblk = p->p_ru.inblock;
1770 tu->tu_oblk = p->p_ru.oublock;
1771 tu->tu_vcsw = p->p_ru.nvcsw;
1772 tu->tu_icsw = p->p_ru.nivcsw;
1773 tu->tu_nsig = p->p_ru.nsignals;
1774 tu->tu_nswp = p->p_ru.nswap;
1775 tu->tu_nscl = p->p_ru.sysc;
1776 }
1777
1778 /*
1779 * void exacct_move_mstate(proc_t *, task_t *, task_t *)
1780 *
1781 * Overview
1782 * exacct_move_mstate() is called by task_change() and accounts for
1783 * a process's resource usage when it is moved from one task to another.
1784 *
1785 * The process's usage at this point is recorded in the new task so
1786 * that it can be excluded from the calculation of resources consumed
1787 * by that task.
1788 *
1789 * The resource usage inherited by the new task is also added to the
1790 * aggregate maintained by the old task for processes that have exited.
1791 *
1792 * Return values
1793 * None.
1794 *
1795 * Caller's context
1796 * pidlock and p_lock held across exacct_move_mstate().
1797 */
1798 void
exacct_move_mstate(proc_t * p,task_t * oldtk,task_t * newtk)1799 exacct_move_mstate(proc_t *p, task_t *oldtk, task_t *newtk)
1800 {
1801 task_usage_t tu;
1802
1803 /* Take a snapshot of this process's mstate and RU counters */
1804 exacct_snapshot_proc_mstate(p, &tu);
1805
1806 /*
1807 * Use the snapshot to increment the aggregate usage of the old
1808 * task, and the inherited usage of the new one.
1809 */
1810 mutex_enter(&oldtk->tk_usage_lock);
1811 exacct_add_task_mstate(oldtk->tk_usage, &tu);
1812 mutex_exit(&oldtk->tk_usage_lock);
1813 mutex_enter(&newtk->tk_usage_lock);
1814 exacct_add_task_mstate(newtk->tk_inherited, &tu);
1815 mutex_exit(&newtk->tk_usage_lock);
1816 }
1817