1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/exacct.h>
26 #include <sys/exacct_catalog.h>
27 #include <sys/disp.h>
28 #include <sys/task.h>
29 #include <sys/proc.h>
30 #include <sys/cmn_err.h>
31 #include <sys/kmem.h>
32 #include <sys/project.h>
33 #include <sys/systm.h>
34 #include <sys/vnode.h>
35 #include <sys/file.h>
36 #include <sys/acctctl.h>
37 #include <sys/time.h>
38 #include <sys/utsname.h>
39 #include <sys/session.h>
40 #include <sys/sysmacros.h>
41 #include <sys/bitmap.h>
42 #include <sys/msacct.h>
43
44 /*
45 * exacct usage and recording routines
46 *
47 * wracct(2), getacct(2), and the records written at process or task
48 * termination are constructed using the exacct_assemble_[task,proc]_usage()
49 * functions, which take a callback that takes the appropriate action on
50 * the packed exacct record for the task or process. For the process-related
51 * actions, we partition the routines such that the data collecting component
52 * can be performed while holding p_lock, and all sleeping or blocking
53 * operations can be performed without acquiring p_lock.
54 *
55 * putacct(2), which allows an application to construct a customized record
56 * associated with an existing process or task, has its own entry points:
57 * exacct_tag_task() and exacct_tag_proc().
58 */
59
60 taskq_t *exacct_queue;
61 kmem_cache_t *exacct_object_cache;
62
63 zone_key_t exacct_zone_key = ZONE_KEY_UNINITIALIZED;
64
65 static const uint32_t exacct_version = EXACCT_VERSION;
66 static const char exacct_header[] = "exacct";
67 static const char exacct_creator[] = "SunOS";
68
69 ea_object_t *
ea_alloc_item(ea_catalog_t catalog,void * buf,size_t bufsz)70 ea_alloc_item(ea_catalog_t catalog, void *buf, size_t bufsz)
71 {
72 ea_object_t *item;
73
74 item = kmem_cache_alloc(exacct_object_cache, KM_SLEEP);
75 bzero(item, sizeof (ea_object_t));
76 (void) ea_set_item(item, catalog, buf, bufsz);
77 return (item);
78 }
79
80 ea_object_t *
ea_alloc_group(ea_catalog_t catalog)81 ea_alloc_group(ea_catalog_t catalog)
82 {
83 ea_object_t *group;
84
85 group = kmem_cache_alloc(exacct_object_cache, KM_SLEEP);
86 bzero(group, sizeof (ea_object_t));
87 (void) ea_set_group(group, catalog);
88 return (group);
89 }
90
91 ea_object_t *
ea_attach_item(ea_object_t * grp,void * buf,size_t bufsz,ea_catalog_t catalog)92 ea_attach_item(ea_object_t *grp, void *buf, size_t bufsz, ea_catalog_t catalog)
93 {
94 ea_object_t *item;
95
96 item = ea_alloc_item(catalog, buf, bufsz);
97 (void) ea_attach_to_group(grp, item);
98 return (item);
99 }
100
101 /*
102 * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract
103 * microstate accounting data and resource usage counters from one task_usage_t
104 * from those supplied in another. These functions do not operate on *all*
105 * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make
106 * sense.
107 */
108 static void
exacct_add_task_mstate(task_usage_t * tu,task_usage_t * delta)109 exacct_add_task_mstate(task_usage_t *tu, task_usage_t *delta)
110 {
111 tu->tu_utime += delta->tu_utime;
112 tu->tu_stime += delta->tu_stime;
113 tu->tu_minflt += delta->tu_minflt;
114 tu->tu_majflt += delta->tu_majflt;
115 tu->tu_sndmsg += delta->tu_sndmsg;
116 tu->tu_rcvmsg += delta->tu_rcvmsg;
117 tu->tu_ioch += delta->tu_ioch;
118 tu->tu_iblk += delta->tu_iblk;
119 tu->tu_oblk += delta->tu_oblk;
120 tu->tu_vcsw += delta->tu_vcsw;
121 tu->tu_icsw += delta->tu_icsw;
122 tu->tu_nsig += delta->tu_nsig;
123 tu->tu_nswp += delta->tu_nswp;
124 tu->tu_nscl += delta->tu_nscl;
125 }
126
127 /*
128 * See the comments for exacct_add_task_mstate(), above.
129 */
130 static void
exacct_sub_task_mstate(task_usage_t * tu,task_usage_t * delta)131 exacct_sub_task_mstate(task_usage_t *tu, task_usage_t *delta)
132 {
133 tu->tu_utime -= delta->tu_utime;
134 tu->tu_stime -= delta->tu_stime;
135 tu->tu_minflt -= delta->tu_minflt;
136 tu->tu_majflt -= delta->tu_majflt;
137 tu->tu_sndmsg -= delta->tu_sndmsg;
138 tu->tu_rcvmsg -= delta->tu_rcvmsg;
139 tu->tu_ioch -= delta->tu_ioch;
140 tu->tu_iblk -= delta->tu_iblk;
141 tu->tu_oblk -= delta->tu_oblk;
142 tu->tu_vcsw -= delta->tu_vcsw;
143 tu->tu_icsw -= delta->tu_icsw;
144 tu->tu_nsig -= delta->tu_nsig;
145 tu->tu_nswp -= delta->tu_nswp;
146 tu->tu_nscl -= delta->tu_nscl;
147 }
148
149 /*
150 * Wrapper for vn_rdwr() used by exacct_vn_write() and exacct_write_header()
151 * to write to the accounting file without corrupting it in case of an I/O or
152 * filesystem error.
153 */
154 static int
exacct_vn_write_impl(ac_info_t * info,void * buf,ssize_t bufsize)155 exacct_vn_write_impl(ac_info_t *info, void *buf, ssize_t bufsize)
156 {
157 int error;
158 ssize_t resid;
159 struct vattr va;
160
161 ASSERT(info != NULL);
162 ASSERT(info->ac_vnode != NULL);
163 ASSERT(MUTEX_HELD(&info->ac_lock));
164
165 /*
166 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting
167 * the present accounting file.
168 */
169 va.va_mask = AT_SIZE;
170 error = VOP_GETATTR(info->ac_vnode, &va, 0, kcred, NULL);
171 if (error == 0) {
172 error = vn_rdwr(UIO_WRITE, info->ac_vnode, (caddr_t)buf,
173 bufsize, 0LL, UIO_SYSSPACE, FAPPEND, (rlim64_t)MAXOFFSET_T,
174 kcred, &resid);
175 if (error) {
176 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL);
177 } else if (resid != 0) {
178 (void) VOP_SETATTR(info->ac_vnode, &va, 0, kcred, NULL);
179 error = ENOSPC;
180 }
181 }
182 return (error);
183 }
184
185 /*
186 * exacct_vn_write() safely writes to an accounting file. acctctl() prevents
187 * the two accounting vnodes from being equal, and the appropriate ac_lock is
188 * held across the call, so we're single threaded through this code for each
189 * file.
190 */
191 static int
exacct_vn_write(ac_info_t * info,void * buf,ssize_t bufsize)192 exacct_vn_write(ac_info_t *info, void *buf, ssize_t bufsize)
193 {
194 int error;
195
196 if (info == NULL)
197 return (0);
198
199 mutex_enter(&info->ac_lock);
200
201 /*
202 * Don't do anything unless accounting file is set.
203 */
204 if (info->ac_vnode == NULL) {
205 mutex_exit(&info->ac_lock);
206 return (0);
207 }
208 error = exacct_vn_write_impl(info, buf, bufsize);
209 mutex_exit(&info->ac_lock);
210
211 return (error);
212 }
213
214 /*
215 * void *exacct_create_header(size_t *)
216 *
217 * Overview
218 * exacct_create_header() constructs an exacct file header identifying the
219 * accounting file as the output of the kernel. exacct_create_header() and
220 * the static write_header() and verify_header() routines in libexacct must
221 * remain synchronized.
222 *
223 * Return values
224 * A pointer to a packed exacct buffer containing the appropriate header is
225 * returned; the size of the buffer is placed in the location indicated by
226 * sizep.
227 *
228 * Caller's context
229 * Suitable for KM_SLEEP allocations.
230 */
231 void *
exacct_create_header(size_t * sizep)232 exacct_create_header(size_t *sizep)
233 {
234 ea_object_t *hdr_grp;
235 uint32_t bskip;
236 void *buf;
237 size_t bufsize;
238
239 hdr_grp = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_HEADER);
240 (void) ea_attach_item(hdr_grp, (void *)&exacct_version, 0,
241 EXT_UINT32 | EXC_DEFAULT | EXD_VERSION);
242 (void) ea_attach_item(hdr_grp, (void *)exacct_header, 0,
243 EXT_STRING | EXC_DEFAULT | EXD_FILETYPE);
244 (void) ea_attach_item(hdr_grp, (void *)exacct_creator, 0,
245 EXT_STRING | EXC_DEFAULT | EXD_CREATOR);
246 (void) ea_attach_item(hdr_grp, uts_nodename(), 0,
247 EXT_STRING | EXC_DEFAULT | EXD_HOSTNAME);
248
249 bufsize = ea_pack_object(hdr_grp, NULL, 0);
250 buf = kmem_alloc(bufsize, KM_SLEEP);
251 (void) ea_pack_object(hdr_grp, buf, bufsize);
252 ea_free_object(hdr_grp, EUP_ALLOC);
253
254 /*
255 * To prevent reading the header when reading the file backwards,
256 * set the large backskip of the header group to 0 (last 4 bytes).
257 */
258 bskip = 0;
259 exacct_order32(&bskip);
260 bcopy(&bskip, (char *)buf + bufsize - sizeof (bskip),
261 sizeof (bskip));
262
263 *sizep = bufsize;
264 return (buf);
265 }
266
267 /*
268 * int exacct_write_header(ac_info_t *, void *, size_t)
269 *
270 * Overview
271 * exacct_write_header() writes the given header buffer to the indicated
272 * vnode.
273 *
274 * Return values
275 * The result of the write operation is returned.
276 *
277 * Caller's context
278 * Caller must hold the ac_lock of the appropriate accounting file
279 * information block (ac_info_t).
280 */
281 int
exacct_write_header(ac_info_t * info,void * hdr,size_t hdrsize)282 exacct_write_header(ac_info_t *info, void *hdr, size_t hdrsize)
283 {
284 if (info != NULL && info->ac_vnode != NULL)
285 return (exacct_vn_write_impl(info, hdr, hdrsize));
286
287 return (0);
288 }
289
290 static void
exacct_get_interval_task_usage(task_t * tk,task_usage_t * tu,task_usage_t ** tu_buf)291 exacct_get_interval_task_usage(task_t *tk, task_usage_t *tu,
292 task_usage_t **tu_buf)
293 {
294 task_usage_t *oldtu, *newtu;
295 task_usage_t **prevusage;
296
297 ASSERT(MUTEX_HELD(&tk->tk_usage_lock));
298 if (getzoneid() != GLOBAL_ZONEID) {
299 prevusage = &tk->tk_zoneusage;
300 } else {
301 prevusage = &tk->tk_prevusage;
302 }
303 if ((oldtu = *prevusage) != NULL) {
304 /*
305 * In case we have any accounting information
306 * saved from the previous interval record.
307 */
308 newtu = *tu_buf;
309 bcopy(tu, newtu, sizeof (task_usage_t));
310 tu->tu_minflt -= oldtu->tu_minflt;
311 tu->tu_majflt -= oldtu->tu_majflt;
312 tu->tu_sndmsg -= oldtu->tu_sndmsg;
313 tu->tu_rcvmsg -= oldtu->tu_rcvmsg;
314 tu->tu_ioch -= oldtu->tu_ioch;
315 tu->tu_iblk -= oldtu->tu_iblk;
316 tu->tu_oblk -= oldtu->tu_oblk;
317 tu->tu_vcsw -= oldtu->tu_vcsw;
318 tu->tu_icsw -= oldtu->tu_icsw;
319 tu->tu_nsig -= oldtu->tu_nsig;
320 tu->tu_nswp -= oldtu->tu_nswp;
321 tu->tu_nscl -= oldtu->tu_nscl;
322 tu->tu_utime -= oldtu->tu_utime;
323 tu->tu_stime -= oldtu->tu_stime;
324
325 tu->tu_startsec = oldtu->tu_finishsec;
326 tu->tu_startnsec = oldtu->tu_finishnsec;
327 /*
328 * Copy the data from our temporary storage to the task's
329 * previous interval usage structure for future reference.
330 */
331 bcopy(newtu, oldtu, sizeof (task_usage_t));
332 } else {
333 /*
334 * Store current statistics in the task's previous interval
335 * usage structure for future references.
336 */
337 *prevusage = *tu_buf;
338 bcopy(tu, *prevusage, sizeof (task_usage_t));
339 *tu_buf = NULL;
340 }
341 }
342
343 static void
exacct_snapshot_task_usage(task_t * tk,task_usage_t * tu)344 exacct_snapshot_task_usage(task_t *tk, task_usage_t *tu)
345 {
346 timestruc_t ts;
347 proc_t *p;
348
349 ASSERT(MUTEX_HELD(&pidlock));
350
351 if ((p = tk->tk_memb_list) == NULL)
352 return;
353
354 /*
355 * exacct_snapshot_task_usage() provides an approximate snapshot of the
356 * usage of the potentially many members of the task. Since we don't
357 * guarantee exactness, we don't acquire the p_lock of any of the member
358 * processes.
359 */
360 do {
361 mutex_enter(&p->p_lock);
362 tu->tu_utime += mstate_aggr_state(p, LMS_USER);
363 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM);
364 mutex_exit(&p->p_lock);
365 tu->tu_minflt += p->p_ru.minflt;
366 tu->tu_majflt += p->p_ru.majflt;
367 tu->tu_sndmsg += p->p_ru.msgsnd;
368 tu->tu_rcvmsg += p->p_ru.msgrcv;
369 tu->tu_ioch += p->p_ru.ioch;
370 tu->tu_iblk += p->p_ru.inblock;
371 tu->tu_oblk += p->p_ru.oublock;
372 tu->tu_vcsw += p->p_ru.nvcsw;
373 tu->tu_icsw += p->p_ru.nivcsw;
374 tu->tu_nsig += p->p_ru.nsignals;
375 tu->tu_nswp += p->p_ru.nswap;
376 tu->tu_nscl += p->p_ru.sysc;
377 } while ((p = p->p_tasknext) != tk->tk_memb_list);
378
379 /*
380 * The resource usage accounted for so far will include that
381 * contributed by the task's first process. If this process
382 * came from another task, then its accumulated resource usage
383 * will include a contribution from work performed there.
384 * We must therefore subtract any resource usage that was
385 * inherited with the first process.
386 */
387 exacct_sub_task_mstate(tu, tk->tk_inherited);
388
389 gethrestime(&ts);
390 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec;
391 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec;
392 }
393
394 /*
395 * void exacct_update_task_mstate(proc_t *)
396 *
397 * Overview
398 * exacct_update_task_mstate() updates the task usage; it is intended
399 * to be called from proc_exit().
400 *
401 * Return values
402 * None.
403 *
404 * Caller's context
405 * p_lock must be held at entry.
406 */
407 void
exacct_update_task_mstate(proc_t * p)408 exacct_update_task_mstate(proc_t *p)
409 {
410 task_usage_t *tu;
411
412 mutex_enter(&p->p_task->tk_usage_lock);
413 tu = p->p_task->tk_usage;
414 tu->tu_utime += mstate_aggr_state(p, LMS_USER);
415 tu->tu_stime += mstate_aggr_state(p, LMS_SYSTEM);
416 tu->tu_minflt += p->p_ru.minflt;
417 tu->tu_majflt += p->p_ru.majflt;
418 tu->tu_sndmsg += p->p_ru.msgsnd;
419 tu->tu_rcvmsg += p->p_ru.msgrcv;
420 tu->tu_ioch += p->p_ru.ioch;
421 tu->tu_iblk += p->p_ru.inblock;
422 tu->tu_oblk += p->p_ru.oublock;
423 tu->tu_vcsw += p->p_ru.nvcsw;
424 tu->tu_icsw += p->p_ru.nivcsw;
425 tu->tu_nsig += p->p_ru.nsignals;
426 tu->tu_nswp += p->p_ru.nswap;
427 tu->tu_nscl += p->p_ru.sysc;
428 mutex_exit(&p->p_task->tk_usage_lock);
429 }
430
431 static void
exacct_calculate_task_usage(task_t * tk,task_usage_t * tu,int flag)432 exacct_calculate_task_usage(task_t *tk, task_usage_t *tu, int flag)
433 {
434 timestruc_t ts;
435 task_usage_t *tu_buf;
436
437 switch (flag) {
438 case EW_PARTIAL:
439 /*
440 * For partial records we must report the sum of current
441 * accounting statistics with previously accumulated
442 * statistics.
443 */
444 mutex_enter(&pidlock);
445 mutex_enter(&tk->tk_usage_lock);
446
447 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t));
448 exacct_snapshot_task_usage(tk, tu);
449
450 mutex_exit(&tk->tk_usage_lock);
451 mutex_exit(&pidlock);
452 break;
453 case EW_INTERVAL:
454 /*
455 * We need to allocate spare task_usage_t buffer before
456 * grabbing pidlock because we might need it later in
457 * exacct_get_interval_task_usage().
458 */
459 tu_buf = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP);
460 mutex_enter(&pidlock);
461 mutex_enter(&tk->tk_usage_lock);
462
463 /*
464 * For interval records, we deduct the previous microstate
465 * accounting data and cpu usage times from previously saved
466 * results and update the previous task usage structure.
467 */
468 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t));
469 exacct_snapshot_task_usage(tk, tu);
470 exacct_get_interval_task_usage(tk, tu, &tu_buf);
471
472 mutex_exit(&tk->tk_usage_lock);
473 mutex_exit(&pidlock);
474
475 if (tu_buf != NULL)
476 kmem_free(tu_buf, sizeof (task_usage_t));
477 break;
478 case EW_FINAL:
479 /*
480 * For final records, we deduct, from the task's current
481 * usage, any usage that was inherited with the arrival
482 * of a process from a previous task. We then record
483 * the task's finish time.
484 */
485 mutex_enter(&tk->tk_usage_lock);
486 (void) bcopy(tk->tk_usage, tu, sizeof (task_usage_t));
487 exacct_sub_task_mstate(tu, tk->tk_inherited);
488 mutex_exit(&tk->tk_usage_lock);
489
490 gethrestime(&ts);
491 tu->tu_finishsec = (uint64_t)(ulong_t)ts.tv_sec;
492 tu->tu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec;
493
494 break;
495 }
496 }
497
498 static int
exacct_attach_task_item(task_t * tk,task_usage_t * tu,ea_object_t * record,int res)499 exacct_attach_task_item(task_t *tk, task_usage_t *tu, ea_object_t *record,
500 int res)
501 {
502 int attached = 1;
503
504 switch (res) {
505 case AC_TASK_TASKID:
506 (void) ea_attach_item(record, &tk->tk_tkid,
507 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_TASKID);
508 break;
509 case AC_TASK_PROJID:
510 (void) ea_attach_item(record, &tk->tk_proj->kpj_id,
511 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_PROJID);
512 break;
513 case AC_TASK_CPU: {
514 timestruc_t ts;
515 uint64_t ui;
516
517 hrt2ts(tu->tu_stime, &ts);
518 ui = ts.tv_sec;
519 (void) ea_attach_item(record, &ui, sizeof (uint64_t),
520 EXT_UINT64 | EXD_TASK_CPU_SYS_SEC);
521 ui = ts.tv_nsec;
522 (void) ea_attach_item(record, &ui, sizeof (uint64_t),
523 EXT_UINT64 | EXD_TASK_CPU_SYS_NSEC);
524
525 hrt2ts(tu->tu_utime, &ts);
526 ui = ts.tv_sec;
527 (void) ea_attach_item(record, &ui, sizeof (uint64_t),
528 EXT_UINT64 | EXD_TASK_CPU_USER_SEC);
529 ui = ts.tv_nsec;
530 (void) ea_attach_item(record, &ui, sizeof (uint64_t),
531 EXT_UINT64 | EXD_TASK_CPU_USER_NSEC);
532 }
533 break;
534 case AC_TASK_TIME:
535 (void) ea_attach_item(record, &tu->tu_startsec,
536 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_SEC);
537 (void) ea_attach_item(record, &tu->tu_startnsec,
538 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_START_NSEC);
539 (void) ea_attach_item(record, &tu->tu_finishsec,
540 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_SEC);
541 (void) ea_attach_item(record, &tu->tu_finishnsec,
542 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FINISH_NSEC);
543 break;
544 case AC_TASK_HOSTNAME:
545 (void) ea_attach_item(record, tk->tk_zone->zone_nodename,
546 strlen(tk->tk_zone->zone_nodename) + 1,
547 EXT_STRING | EXD_TASK_HOSTNAME);
548 break;
549 case AC_TASK_MICROSTATE:
550 (void) ea_attach_item(record, &tu->tu_majflt,
551 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MAJOR);
552 (void) ea_attach_item(record, &tu->tu_minflt,
553 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_FAULTS_MINOR);
554 (void) ea_attach_item(record, &tu->tu_sndmsg,
555 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_SND);
556 (void) ea_attach_item(record, &tu->tu_rcvmsg,
557 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_MESSAGES_RCV);
558 (void) ea_attach_item(record, &tu->tu_iblk,
559 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_IN);
560 (void) ea_attach_item(record, &tu->tu_oblk,
561 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_BLOCKS_OUT);
562 (void) ea_attach_item(record, &tu->tu_ioch,
563 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CHARS_RDWR);
564 (void) ea_attach_item(record, &tu->tu_vcsw,
565 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_VOL);
566 (void) ea_attach_item(record, &tu->tu_icsw,
567 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_CONTEXT_INV);
568 (void) ea_attach_item(record, &tu->tu_nsig,
569 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SIGNALS);
570 (void) ea_attach_item(record, &tu->tu_nswp,
571 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SWAPS);
572 (void) ea_attach_item(record, &tu->tu_nscl,
573 sizeof (uint64_t), EXT_UINT64 | EXD_TASK_SYSCALLS);
574 break;
575 case AC_TASK_ANCTASKID:
576 (void) ea_attach_item(record, &tu->tu_anctaskid,
577 sizeof (uint32_t), EXT_UINT32 | EXD_TASK_ANCTASKID);
578 break;
579 case AC_TASK_ZONENAME:
580 (void) ea_attach_item(record, tk->tk_zone->zone_name,
581 strlen(tk->tk_zone->zone_name) + 1,
582 EXT_STRING | EXD_TASK_ZONENAME);
583 break;
584 default:
585 attached = 0;
586 }
587 return (attached);
588 }
589
590 static ea_object_t *
exacct_assemble_task_record(task_t * tk,task_usage_t * tu,ulong_t * mask,ea_catalog_t record_type)591 exacct_assemble_task_record(task_t *tk, task_usage_t *tu, ulong_t *mask,
592 ea_catalog_t record_type)
593 {
594 int res, count;
595 ea_object_t *record;
596
597 /*
598 * Assemble usage values into group.
599 */
600 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type);
601 for (res = 1, count = 0; res <= AC_TASK_MAX_RES; res++)
602 if (BT_TEST(mask, res))
603 count += exacct_attach_task_item(tk, tu, record, res);
604 if (count == 0) {
605 ea_free_object(record, EUP_ALLOC);
606 record = NULL;
607 }
608 return (record);
609 }
610
611 /*
612 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *,
613 * size_t, size_t *), void *, size_t, size_t *, int)
614 *
615 * Overview
616 * exacct_assemble_task_usage() builds the packed exacct buffer for the
617 * indicated task, executes the given callback function, and free the packed
618 * buffer.
619 *
620 * Return values
621 * Returns 0 on success; otherwise the appropriate error code is returned.
622 *
623 * Caller's context
624 * Suitable for KM_SLEEP allocations.
625 */
626 int
exacct_assemble_task_usage(ac_info_t * ac_task,task_t * tk,int (* callback)(ac_info_t *,void *,size_t,void *,size_t,size_t *),void * ubuf,size_t ubufsize,size_t * actual,int flag)627 exacct_assemble_task_usage(ac_info_t *ac_task, task_t *tk,
628 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *),
629 void *ubuf, size_t ubufsize, size_t *actual, int flag)
630 {
631 ulong_t mask[AC_MASK_SZ];
632 ea_object_t *task_record;
633 ea_catalog_t record_type;
634 task_usage_t *tu;
635 void *buf;
636 size_t bufsize;
637 int ret;
638
639 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL || flag == EW_INTERVAL);
640
641 mutex_enter(&ac_task->ac_lock);
642 if (ac_task->ac_state == AC_OFF) {
643 mutex_exit(&ac_task->ac_lock);
644 return (ENOTACTIVE);
645 }
646 bt_copy(ac_task->ac_mask, mask, AC_MASK_SZ);
647 mutex_exit(&ac_task->ac_lock);
648
649 switch (flag) {
650 case EW_FINAL:
651 record_type = EXD_GROUP_TASK;
652 break;
653 case EW_PARTIAL:
654 record_type = EXD_GROUP_TASK_PARTIAL;
655 break;
656 case EW_INTERVAL:
657 record_type = EXD_GROUP_TASK_INTERVAL;
658 break;
659 default:
660 return (0);
661 }
662
663 /*
664 * Calculate task usage and assemble it into the task record.
665 */
666 tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP);
667 exacct_calculate_task_usage(tk, tu, flag);
668 task_record = exacct_assemble_task_record(tk, tu, mask, record_type);
669 if (task_record == NULL) {
670 /*
671 * The current configuration of the accounting system has
672 * resulted in records with no data; accordingly, we don't write
673 * these, but we return success.
674 */
675 kmem_free(tu, sizeof (task_usage_t));
676 return (0);
677 }
678
679 /*
680 * Pack object into buffer and run callback on it.
681 */
682 bufsize = ea_pack_object(task_record, NULL, 0);
683 buf = kmem_alloc(bufsize, KM_SLEEP);
684 (void) ea_pack_object(task_record, buf, bufsize);
685 ret = callback(ac_task, ubuf, ubufsize, buf, bufsize, actual);
686
687 /*
688 * Free all previously allocated structures.
689 */
690 kmem_free(buf, bufsize);
691 ea_free_object(task_record, EUP_ALLOC);
692 kmem_free(tu, sizeof (task_usage_t));
693 return (ret);
694 }
695
696 /*
697 * void exacct_commit_task(void *)
698 *
699 * Overview
700 * exacct_commit_task() calculates the final usage for a task, updating the
701 * task usage if task accounting is active, and writing a task record if task
702 * accounting is active. exacct_commit_task() is intended for being called
703 * from a task queue (taskq_t).
704 *
705 * Return values
706 * None.
707 *
708 * Caller's context
709 * Suitable for KM_SLEEP allocations.
710 */
711
712 void
exacct_commit_task(void * arg)713 exacct_commit_task(void *arg)
714 {
715 task_t *tk = (task_t *)arg;
716 size_t size;
717 zone_t *zone = tk->tk_zone;
718 struct exacct_globals *acg;
719
720 ASSERT(tk != task0p);
721 ASSERT(tk->tk_memb_list == NULL);
722
723 /*
724 * Don't do any extra work if the acctctl module isn't loaded.
725 * If acctctl module is loaded when zone is in down state then
726 * zone_getspecific can return NULL for that zone.
727 */
728 if (exacct_zone_key != ZONE_KEY_UNINITIALIZED) {
729 acg = zone_getspecific(exacct_zone_key, zone);
730 if (acg == NULL)
731 goto err;
732 (void) exacct_assemble_task_usage(&acg->ac_task, tk,
733 exacct_commit_callback, NULL, 0, &size, EW_FINAL);
734 if (tk->tk_zone != global_zone) {
735 acg = zone_getspecific(exacct_zone_key, global_zone);
736 (void) exacct_assemble_task_usage(&acg->ac_task, tk,
737 exacct_commit_callback, NULL, 0, &size, EW_FINAL);
738 }
739 }
740 /*
741 * Release associated project and finalize task.
742 */
743 err:
744 task_end(tk);
745 }
746
747 static int
exacct_attach_proc_item(proc_usage_t * pu,ea_object_t * record,int res)748 exacct_attach_proc_item(proc_usage_t *pu, ea_object_t *record, int res)
749 {
750 int attached = 1;
751
752 switch (res) {
753 case AC_PROC_PID:
754 (void) ea_attach_item(record, &pu->pu_pid,
755 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PID);
756 break;
757 case AC_PROC_UID:
758 (void) ea_attach_item(record, &pu->pu_ruid,
759 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_UID);
760 break;
761 case AC_PROC_FLAG:
762 (void) ea_attach_item(record, &pu->pu_acflag,
763 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ACCT_FLAGS);
764 break;
765 case AC_PROC_GID:
766 (void) ea_attach_item(record, &pu->pu_rgid,
767 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_GID);
768 break;
769 case AC_PROC_PROJID:
770 (void) ea_attach_item(record, &pu->pu_projid,
771 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_PROJID);
772 break;
773 case AC_PROC_TASKID:
774 (void) ea_attach_item(record, &pu->pu_taskid,
775 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TASKID);
776 break;
777 case AC_PROC_CPU:
778 (void) ea_attach_item(record, &pu->pu_utimesec,
779 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_SEC);
780 (void) ea_attach_item(record, &pu->pu_utimensec,
781 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_USER_NSEC);
782 (void) ea_attach_item(record, &pu->pu_stimesec,
783 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_SEC);
784 (void) ea_attach_item(record, &pu->pu_stimensec,
785 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CPU_SYS_NSEC);
786 break;
787 case AC_PROC_TIME:
788 (void) ea_attach_item(record, &pu->pu_startsec,
789 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_SEC);
790 (void) ea_attach_item(record, &pu->pu_startnsec,
791 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_START_NSEC);
792 (void) ea_attach_item(record, &pu->pu_finishsec,
793 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_SEC);
794 (void) ea_attach_item(record, &pu->pu_finishnsec,
795 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FINISH_NSEC);
796 break;
797 case AC_PROC_COMMAND:
798 (void) ea_attach_item(record, pu->pu_command,
799 strlen(pu->pu_command) + 1, EXT_STRING | EXD_PROC_COMMAND);
800 break;
801 case AC_PROC_HOSTNAME:
802 (void) ea_attach_item(record, pu->pu_nodename,
803 strlen(pu->pu_nodename) + 1,
804 EXT_STRING | EXD_PROC_HOSTNAME);
805 break;
806 case AC_PROC_TTY:
807 (void) ea_attach_item(record, &pu->pu_major,
808 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MAJOR);
809 (void) ea_attach_item(record, &pu->pu_minor,
810 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_TTY_MINOR);
811 break;
812 case AC_PROC_MICROSTATE:
813 (void) ea_attach_item(record, &pu->pu_majflt,
814 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MAJOR);
815 (void) ea_attach_item(record, &pu->pu_minflt,
816 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_FAULTS_MINOR);
817 (void) ea_attach_item(record, &pu->pu_sndmsg,
818 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_SND);
819 (void) ea_attach_item(record, &pu->pu_rcvmsg,
820 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MESSAGES_RCV);
821 (void) ea_attach_item(record, &pu->pu_iblk,
822 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_IN);
823 (void) ea_attach_item(record, &pu->pu_oblk,
824 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_BLOCKS_OUT);
825 (void) ea_attach_item(record, &pu->pu_ioch,
826 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CHARS_RDWR);
827 (void) ea_attach_item(record, &pu->pu_vcsw,
828 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_VOL);
829 (void) ea_attach_item(record, &pu->pu_icsw,
830 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_CONTEXT_INV);
831 (void) ea_attach_item(record, &pu->pu_nsig,
832 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SIGNALS);
833 (void) ea_attach_item(record, &pu->pu_nswp,
834 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SWAPS);
835 (void) ea_attach_item(record, &pu->pu_nscl,
836 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_SYSCALLS);
837 break;
838 case AC_PROC_ANCPID:
839 (void) ea_attach_item(record, &pu->pu_ancpid,
840 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_ANCPID);
841 break;
842 case AC_PROC_WAIT_STATUS:
843 (void) ea_attach_item(record, &pu->pu_wstat,
844 sizeof (uint32_t), EXT_UINT32 | EXD_PROC_WAIT_STATUS);
845 break;
846 case AC_PROC_ZONENAME:
847 (void) ea_attach_item(record, pu->pu_zonename,
848 strlen(pu->pu_zonename) + 1,
849 EXT_STRING | EXD_PROC_ZONENAME);
850 break;
851 case AC_PROC_MEM:
852 (void) ea_attach_item(record, &pu->pu_mem_rss_avg,
853 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_AVG_K);
854 (void) ea_attach_item(record, &pu->pu_mem_rss_max,
855 sizeof (uint64_t), EXT_UINT64 | EXD_PROC_MEM_RSS_MAX_K);
856 break;
857 default:
858 attached = 0;
859 }
860 return (attached);
861 }
862
863 static ea_object_t *
exacct_assemble_proc_record(proc_usage_t * pu,ulong_t * mask,ea_catalog_t record_type)864 exacct_assemble_proc_record(proc_usage_t *pu, ulong_t *mask,
865 ea_catalog_t record_type)
866 {
867 int res, count;
868 ea_object_t *record;
869
870 /*
871 * Assemble usage values into group.
872 */
873 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type);
874 for (res = 1, count = 0; res <= AC_PROC_MAX_RES; res++)
875 if (BT_TEST(mask, res))
876 count += exacct_attach_proc_item(pu, record, res);
877 if (count == 0) {
878 ea_free_object(record, EUP_ALLOC);
879 record = NULL;
880 }
881 return (record);
882 }
883
884 /*
885 * The following two routines assume that process's p_lock is held or
886 * exacct_commit_proc has been called from exit() when all lwps are stopped.
887 */
888 static void
exacct_calculate_proc_mstate(proc_t * p,proc_usage_t * pu)889 exacct_calculate_proc_mstate(proc_t *p, proc_usage_t *pu)
890 {
891 kthread_t *t;
892
893 ASSERT(MUTEX_HELD(&p->p_lock));
894 if ((t = p->p_tlist) == NULL)
895 return;
896
897 do {
898 pu->pu_minflt += t->t_lwp->lwp_ru.minflt;
899 pu->pu_majflt += t->t_lwp->lwp_ru.majflt;
900 pu->pu_sndmsg += t->t_lwp->lwp_ru.msgsnd;
901 pu->pu_rcvmsg += t->t_lwp->lwp_ru.msgrcv;
902 pu->pu_ioch += t->t_lwp->lwp_ru.ioch;
903 pu->pu_iblk += t->t_lwp->lwp_ru.inblock;
904 pu->pu_oblk += t->t_lwp->lwp_ru.oublock;
905 pu->pu_vcsw += t->t_lwp->lwp_ru.nvcsw;
906 pu->pu_icsw += t->t_lwp->lwp_ru.nivcsw;
907 pu->pu_nsig += t->t_lwp->lwp_ru.nsignals;
908 pu->pu_nswp += t->t_lwp->lwp_ru.nswap;
909 pu->pu_nscl += t->t_lwp->lwp_ru.sysc;
910 } while ((t = t->t_forw) != p->p_tlist);
911 }
912
913 static void
exacct_copy_proc_mstate(proc_t * p,proc_usage_t * pu)914 exacct_copy_proc_mstate(proc_t *p, proc_usage_t *pu)
915 {
916 pu->pu_minflt = p->p_ru.minflt;
917 pu->pu_majflt = p->p_ru.majflt;
918 pu->pu_sndmsg = p->p_ru.msgsnd;
919 pu->pu_rcvmsg = p->p_ru.msgrcv;
920 pu->pu_ioch = p->p_ru.ioch;
921 pu->pu_iblk = p->p_ru.inblock;
922 pu->pu_oblk = p->p_ru.oublock;
923 pu->pu_vcsw = p->p_ru.nvcsw;
924 pu->pu_icsw = p->p_ru.nivcsw;
925 pu->pu_nsig = p->p_ru.nsignals;
926 pu->pu_nswp = p->p_ru.nswap;
927 pu->pu_nscl = p->p_ru.sysc;
928 }
929
930 void
exacct_calculate_proc_usage(proc_t * p,proc_usage_t * pu,ulong_t * mask,int flag,int wstat)931 exacct_calculate_proc_usage(proc_t *p, proc_usage_t *pu, ulong_t *mask,
932 int flag, int wstat)
933 {
934 timestruc_t ts, ts_run;
935
936 ASSERT(MUTEX_HELD(&p->p_lock));
937
938 /*
939 * Convert CPU and execution times to sec/nsec format.
940 */
941 if (BT_TEST(mask, AC_PROC_CPU)) {
942 hrt2ts(mstate_aggr_state(p, LMS_USER), &ts);
943 pu->pu_utimesec = (uint64_t)(ulong_t)ts.tv_sec;
944 pu->pu_utimensec = (uint64_t)(ulong_t)ts.tv_nsec;
945 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &ts);
946 pu->pu_stimesec = (uint64_t)(ulong_t)ts.tv_sec;
947 pu->pu_stimensec = (uint64_t)(ulong_t)ts.tv_nsec;
948 }
949 if (BT_TEST(mask, AC_PROC_TIME)) {
950 gethrestime(&ts);
951 pu->pu_finishsec = (uint64_t)(ulong_t)ts.tv_sec;
952 pu->pu_finishnsec = (uint64_t)(ulong_t)ts.tv_nsec;
953 hrt2ts(gethrtime() - p->p_mstart, &ts_run);
954 ts.tv_sec -= ts_run.tv_sec;
955 ts.tv_nsec -= ts_run.tv_nsec;
956 if (ts.tv_nsec < 0) {
957 ts.tv_sec--;
958 if ((ts.tv_nsec = ts.tv_nsec + NANOSEC) >= NANOSEC) {
959 ts.tv_sec++;
960 ts.tv_nsec -= NANOSEC;
961 }
962 }
963 pu->pu_startsec = (uint64_t)(ulong_t)ts.tv_sec;
964 pu->pu_startnsec = (uint64_t)(ulong_t)ts.tv_nsec;
965 }
966
967 pu->pu_pid = p->p_pidp->pid_id;
968 pu->pu_acflag = p->p_user.u_acflag;
969 pu->pu_projid = p->p_task->tk_proj->kpj_id;
970 pu->pu_taskid = p->p_task->tk_tkid;
971 pu->pu_major = getmajor(p->p_sessp->s_dev);
972 pu->pu_minor = getminor(p->p_sessp->s_dev);
973 pu->pu_ancpid = p->p_ancpid;
974 pu->pu_wstat = wstat;
975 /*
976 * Compute average RSS in K. The denominator is the number of
977 * samples: the number of clock ticks plus the initial value.
978 */
979 pu->pu_mem_rss_avg = (PTOU(p)->u_mem / (p->p_stime + p->p_utime + 1)) *
980 (PAGESIZE / 1024);
981 pu->pu_mem_rss_max = PTOU(p)->u_mem_max * (PAGESIZE / 1024);
982
983 mutex_enter(&p->p_crlock);
984 pu->pu_ruid = crgetruid(p->p_cred);
985 pu->pu_rgid = crgetrgid(p->p_cred);
986 mutex_exit(&p->p_crlock);
987
988 bcopy(p->p_user.u_comm, pu->pu_command, strlen(p->p_user.u_comm) + 1);
989 bcopy(p->p_zone->zone_name, pu->pu_zonename,
990 strlen(p->p_zone->zone_name) + 1);
991 bcopy(p->p_zone->zone_nodename, pu->pu_nodename,
992 strlen(p->p_zone->zone_nodename) + 1);
993
994 /*
995 * Calculate microstate accounting data for a process that is still
996 * running. Presently, we explicitly collect all of the LWP usage into
997 * the proc usage structure here.
998 */
999 if (flag & EW_PARTIAL)
1000 exacct_calculate_proc_mstate(p, pu);
1001 if (flag & EW_FINAL)
1002 exacct_copy_proc_mstate(p, pu);
1003 }
1004
1005 /*
1006 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void
1007 * *, size_t, size_t *), void *, size_t, size_t *)
1008 *
1009 * Overview
1010 * Assemble record with miscellaneous accounting information about the process
1011 * and execute the callback on it. It is the callback's job to set "actual" to
1012 * the size of record.
1013 *
1014 * Return values
1015 * The result of the callback function, unless the extended process accounting
1016 * feature is not active, in which case ENOTACTIVE is returned.
1017 *
1018 * Caller's context
1019 * Suitable for KM_SLEEP allocations.
1020 */
1021 int
exacct_assemble_proc_usage(ac_info_t * ac_proc,proc_usage_t * pu,int (* callback)(ac_info_t *,void *,size_t,void *,size_t,size_t *),void * ubuf,size_t ubufsize,size_t * actual,int flag)1022 exacct_assemble_proc_usage(ac_info_t *ac_proc, proc_usage_t *pu,
1023 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *),
1024 void *ubuf, size_t ubufsize, size_t *actual, int flag)
1025 {
1026 ulong_t mask[AC_MASK_SZ];
1027 ea_object_t *proc_record;
1028 ea_catalog_t record_type;
1029 void *buf;
1030 size_t bufsize;
1031 int ret;
1032
1033 ASSERT(flag == EW_FINAL || flag == EW_PARTIAL);
1034
1035 mutex_enter(&ac_proc->ac_lock);
1036 if (ac_proc->ac_state == AC_OFF) {
1037 mutex_exit(&ac_proc->ac_lock);
1038 return (ENOTACTIVE);
1039 }
1040 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ);
1041 mutex_exit(&ac_proc->ac_lock);
1042
1043 switch (flag) {
1044 case EW_FINAL:
1045 record_type = EXD_GROUP_PROC;
1046 break;
1047 case EW_PARTIAL:
1048 record_type = EXD_GROUP_PROC_PARTIAL;
1049 break;
1050 default:
1051 record_type = EXD_NONE;
1052 break;
1053 }
1054
1055 proc_record = exacct_assemble_proc_record(pu, mask, record_type);
1056 if (proc_record == NULL)
1057 return (0);
1058
1059 /*
1060 * Pack object into buffer and pass to callback.
1061 */
1062 bufsize = ea_pack_object(proc_record, NULL, 0);
1063 buf = kmem_alloc(bufsize, KM_SLEEP);
1064 (void) ea_pack_object(proc_record, buf, bufsize);
1065
1066 ret = callback(ac_proc, ubuf, ubufsize, buf, bufsize, actual);
1067
1068 /*
1069 * Free all previously allocations.
1070 */
1071 kmem_free(buf, bufsize);
1072 ea_free_object(proc_record, EUP_ALLOC);
1073 return (ret);
1074 }
1075
1076 /*
1077 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t,
1078 * size_t *)
1079 *
1080 * Overview
1081 * exacct_commit_callback() writes the indicated buffer to the indicated
1082 * extended accounting file.
1083 *
1084 * Return values
1085 * The result of the write operation is returned. "actual" is updated to
1086 * contain the number of bytes actually written.
1087 *
1088 * Caller's context
1089 * Suitable for a vn_rdwr() operation.
1090 */
1091 /*ARGSUSED*/
1092 int
exacct_commit_callback(ac_info_t * info,void * ubuf,size_t ubufsize,void * buf,size_t bufsize,size_t * actual)1093 exacct_commit_callback(ac_info_t *info, void *ubuf, size_t ubufsize,
1094 void *buf, size_t bufsize, size_t *actual)
1095 {
1096 int error = 0;
1097
1098 *actual = 0;
1099 if ((error = exacct_vn_write(info, buf, bufsize)) == 0)
1100 *actual = bufsize;
1101 return (error);
1102 }
1103
1104 static void
exacct_do_commit_proc(ac_info_t * ac_proc,proc_t * p,int wstat)1105 exacct_do_commit_proc(ac_info_t *ac_proc, proc_t *p, int wstat)
1106 {
1107 size_t size;
1108 proc_usage_t *pu;
1109 ulong_t mask[AC_MASK_SZ];
1110
1111 mutex_enter(&ac_proc->ac_lock);
1112 if (ac_proc->ac_state == AC_ON) {
1113 bt_copy(&ac_proc->ac_mask[0], mask, AC_MASK_SZ);
1114 mutex_exit(&ac_proc->ac_lock);
1115 } else {
1116 mutex_exit(&ac_proc->ac_lock);
1117 return;
1118 }
1119
1120 mutex_enter(&p->p_lock);
1121 size = strlen(p->p_user.u_comm) + 1;
1122 mutex_exit(&p->p_lock);
1123
1124 pu = kmem_alloc(sizeof (proc_usage_t), KM_SLEEP);
1125 pu->pu_command = kmem_alloc(size, KM_SLEEP);
1126 mutex_enter(&p->p_lock);
1127 exacct_calculate_proc_usage(p, pu, mask, EW_FINAL, wstat);
1128 mutex_exit(&p->p_lock);
1129
1130 (void) exacct_assemble_proc_usage(ac_proc, pu,
1131 exacct_commit_callback, NULL, 0, &size, EW_FINAL);
1132
1133 kmem_free(pu->pu_command, strlen(pu->pu_command) + 1);
1134 kmem_free(pu, sizeof (proc_usage_t));
1135 }
1136
1137 /*
1138 * void exacct_commit_proc(proc_t *, int)
1139 *
1140 * Overview
1141 * exacct_commit_proc() calculates the final usage for a process, updating the
1142 * task usage if task accounting is active, and writing a process record if
1143 * process accounting is active. exacct_commit_proc() is intended for being
1144 * called from proc_exit().
1145 *
1146 * Return values
1147 * None.
1148 *
1149 * Caller's context
1150 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry.
1151 */
1152 void
exacct_commit_proc(proc_t * p,int wstat)1153 exacct_commit_proc(proc_t *p, int wstat)
1154 {
1155 zone_t *zone = p->p_zone;
1156 struct exacct_globals *acg, *gacg = NULL;
1157
1158 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) {
1159 /*
1160 * acctctl module not loaded. Nothing to do.
1161 */
1162 return;
1163 }
1164
1165 /*
1166 * If acctctl module is loaded when zone is in down state then
1167 * zone_getspecific can return NULL for that zone.
1168 */
1169 acg = zone_getspecific(exacct_zone_key, zone);
1170 if (acg == NULL)
1171 return;
1172 exacct_do_commit_proc(&acg->ac_proc, p, wstat);
1173 if (zone != global_zone) {
1174 gacg = zone_getspecific(exacct_zone_key, global_zone);
1175 exacct_do_commit_proc(&gacg->ac_proc, p, wstat);
1176 }
1177 }
1178
1179 static int
exacct_attach_netstat_item(net_stat_t * ns,ea_object_t * record,int res)1180 exacct_attach_netstat_item(net_stat_t *ns, ea_object_t *record, int res)
1181 {
1182 int attached = 1;
1183
1184 switch (res) {
1185 case AC_NET_NAME:
1186 (void) ea_attach_item(record, ns->ns_name,
1187 strlen(ns->ns_name) + 1, EXT_STRING | EXD_NET_STATS_NAME);
1188 break;
1189 case AC_NET_CURTIME:
1190 {
1191 uint64_t now;
1192 timestruc_t ts;
1193
1194 gethrestime(&ts);
1195 now = (uint64_t)(ulong_t)ts.tv_sec;
1196 (void) ea_attach_item(record, &now, sizeof (uint64_t),
1197 EXT_UINT64 | EXD_NET_STATS_CURTIME);
1198 }
1199 break;
1200 case AC_NET_IBYTES:
1201 (void) ea_attach_item(record, &ns->ns_ibytes,
1202 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IBYTES);
1203 break;
1204 case AC_NET_OBYTES:
1205 (void) ea_attach_item(record, &ns->ns_obytes,
1206 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OBYTES);
1207 break;
1208 case AC_NET_IPKTS:
1209 (void) ea_attach_item(record, &ns->ns_ipackets,
1210 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IPKTS);
1211 break;
1212 case AC_NET_OPKTS:
1213 (void) ea_attach_item(record, &ns->ns_opackets,
1214 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OPKTS);
1215 break;
1216 case AC_NET_IERRPKTS:
1217 (void) ea_attach_item(record, &ns->ns_ierrors,
1218 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_IERRPKTS);
1219 break;
1220 case AC_NET_OERRPKTS:
1221 (void) ea_attach_item(record, &ns->ns_oerrors,
1222 sizeof (uint64_t), EXT_UINT64 | EXD_NET_STATS_OERRPKTS);
1223 break;
1224 default:
1225 attached = 0;
1226 }
1227 return (attached);
1228 }
1229
1230 static int
exacct_attach_netdesc_item(net_desc_t * nd,ea_object_t * record,int res)1231 exacct_attach_netdesc_item(net_desc_t *nd, ea_object_t *record, int res)
1232 {
1233 int attached = 1;
1234
1235 switch (res) {
1236 case AC_NET_NAME:
1237 (void) ea_attach_item(record, nd->nd_name,
1238 strlen(nd->nd_name) + 1, EXT_STRING | EXD_NET_DESC_NAME);
1239 break;
1240 case AC_NET_DEVNAME:
1241 (void) ea_attach_item(record, nd->nd_devname,
1242 strlen(nd->nd_devname) + 1, EXT_STRING |
1243 EXD_NET_DESC_DEVNAME);
1244 break;
1245 case AC_NET_EHOST:
1246 (void) ea_attach_item(record, &nd->nd_ehost,
1247 sizeof (nd->nd_ehost), EXT_RAW | EXD_NET_DESC_EHOST);
1248 break;
1249 case AC_NET_EDEST:
1250 (void) ea_attach_item(record, &nd->nd_edest,
1251 sizeof (nd->nd_edest), EXT_RAW | EXD_NET_DESC_EDEST);
1252 break;
1253 case AC_NET_VLAN_TPID:
1254 (void) ea_attach_item(record, &nd->nd_vlan_tpid,
1255 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TPID);
1256 break;
1257 case AC_NET_VLAN_TCI:
1258 (void) ea_attach_item(record, &nd->nd_vlan_tci,
1259 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_VLAN_TCI);
1260 break;
1261 case AC_NET_SAP:
1262 (void) ea_attach_item(record, &nd->nd_sap,
1263 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_SAP);
1264 break;
1265 case AC_NET_PRIORITY:
1266 (void) ea_attach_item(record, &nd->nd_priority,
1267 sizeof (ushort_t), EXT_UINT16 | EXD_NET_DESC_PRIORITY);
1268 break;
1269 case AC_NET_BWLIMIT:
1270 (void) ea_attach_item(record, &nd->nd_bw_limit,
1271 sizeof (uint64_t), EXT_UINT64 | EXD_NET_DESC_BWLIMIT);
1272 break;
1273 case AC_NET_SADDR:
1274 if (nd->nd_isv4) {
1275 (void) ea_attach_item(record, &nd->nd_saddr[3],
1276 sizeof (uint32_t), EXT_UINT32 |
1277 EXD_NET_DESC_V4SADDR);
1278 } else {
1279 (void) ea_attach_item(record, &nd->nd_saddr,
1280 sizeof (nd->nd_saddr), EXT_RAW |
1281 EXD_NET_DESC_V6SADDR);
1282 }
1283 break;
1284 case AC_NET_DADDR:
1285 if (nd->nd_isv4) {
1286 (void) ea_attach_item(record, &nd->nd_daddr[3],
1287 sizeof (uint32_t), EXT_UINT32 |
1288 EXD_NET_DESC_V4DADDR);
1289 } else {
1290 (void) ea_attach_item(record, &nd->nd_daddr,
1291 sizeof (nd->nd_daddr), EXT_RAW |
1292 EXD_NET_DESC_V6DADDR);
1293 }
1294 break;
1295 case AC_NET_SPORT:
1296 (void) ea_attach_item(record, &nd->nd_sport,
1297 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_SPORT);
1298 break;
1299 case AC_NET_DPORT:
1300 (void) ea_attach_item(record, &nd->nd_dport,
1301 sizeof (uint16_t), EXT_UINT16 | EXD_NET_DESC_DPORT);
1302 break;
1303 case AC_NET_PROTOCOL:
1304 (void) ea_attach_item(record, &nd->nd_protocol,
1305 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_PROTOCOL);
1306 break;
1307 case AC_NET_DSFIELD:
1308 (void) ea_attach_item(record, &nd->nd_dsfield,
1309 sizeof (uint8_t), EXT_UINT8 | EXD_NET_DESC_DSFIELD);
1310 break;
1311 default:
1312 attached = 0;
1313 }
1314 return (attached);
1315 }
1316
1317 static ea_object_t *
exacct_assemble_net_record(void * ninfo,ulong_t * mask,ea_catalog_t record_type,int what)1318 exacct_assemble_net_record(void *ninfo, ulong_t *mask, ea_catalog_t record_type,
1319 int what)
1320 {
1321 int res;
1322 int count;
1323 ea_object_t *record;
1324
1325 /*
1326 * Assemble usage values into group.
1327 */
1328 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type);
1329 for (res = 1, count = 0; res <= AC_NET_MAX_RES; res++)
1330 if (BT_TEST(mask, res)) {
1331 if (what == EX_NET_LNDESC_REC ||
1332 what == EX_NET_FLDESC_REC) {
1333 count += exacct_attach_netdesc_item(
1334 (net_desc_t *)ninfo, record, res);
1335 } else {
1336 count += exacct_attach_netstat_item(
1337 (net_stat_t *)ninfo, record, res);
1338 }
1339 }
1340 if (count == 0) {
1341 ea_free_object(record, EUP_ALLOC);
1342 record = NULL;
1343 }
1344 return (record);
1345 }
1346
1347 int
exacct_assemble_net_usage(ac_info_t * ac_net,void * ninfo,int (* callback)(ac_info_t *,void *,size_t,void *,size_t,size_t *),void * ubuf,size_t ubufsize,size_t * actual,int what)1348 exacct_assemble_net_usage(ac_info_t *ac_net, void *ninfo,
1349 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *),
1350 void *ubuf, size_t ubufsize, size_t *actual, int what)
1351 {
1352 ulong_t mask[AC_MASK_SZ];
1353 ea_object_t *net_desc;
1354 ea_catalog_t record_type;
1355 void *buf;
1356 size_t bufsize;
1357 int ret;
1358
1359 mutex_enter(&ac_net->ac_lock);
1360 if (ac_net->ac_state == AC_OFF) {
1361 mutex_exit(&ac_net->ac_lock);
1362 return (ENOTACTIVE);
1363 }
1364 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ);
1365 mutex_exit(&ac_net->ac_lock);
1366
1367 switch (what) {
1368 case EX_NET_LNDESC_REC:
1369 record_type = EXD_GROUP_NET_LINK_DESC;
1370 break;
1371 case EX_NET_LNSTAT_REC:
1372 record_type = EXD_GROUP_NET_LINK_STATS;
1373 break;
1374 case EX_NET_FLDESC_REC:
1375 record_type = EXD_GROUP_NET_FLOW_DESC;
1376 break;
1377 case EX_NET_FLSTAT_REC:
1378 record_type = EXD_GROUP_NET_FLOW_STATS;
1379 break;
1380 default:
1381 return (0);
1382 }
1383
1384 net_desc = exacct_assemble_net_record(ninfo, mask, record_type, what);
1385 if (net_desc == NULL)
1386 return (0);
1387
1388 /*
1389 * Pack object into buffer and pass to callback.
1390 */
1391 bufsize = ea_pack_object(net_desc, NULL, 0);
1392 buf = kmem_alloc(bufsize, KM_NOSLEEP);
1393 if (buf == NULL)
1394 return (ENOMEM);
1395
1396 (void) ea_pack_object(net_desc, buf, bufsize);
1397
1398 ret = callback(ac_net, ubuf, ubufsize, buf, bufsize, actual);
1399
1400 /*
1401 * Free all previously allocations.
1402 */
1403 kmem_free(buf, bufsize);
1404 ea_free_object(net_desc, EUP_ALLOC);
1405 return (ret);
1406 }
1407
1408 int
exacct_commit_netinfo(void * arg,int what)1409 exacct_commit_netinfo(void *arg, int what)
1410 {
1411 size_t size;
1412 ulong_t mask[AC_MASK_SZ];
1413 struct exacct_globals *acg;
1414 ac_info_t *ac_net;
1415
1416 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) {
1417 /*
1418 * acctctl module not loaded. Nothing to do.
1419 */
1420 return (ENOTACTIVE);
1421 }
1422
1423 /*
1424 * Even though each zone nominally has its own flow accounting settings
1425 * (ac_flow), these are only maintained by and for the global zone.
1426 *
1427 * If this were to change in the future, this function should grow a
1428 * second zoneid (or zone) argument, and use the corresponding zone's
1429 * settings rather than always using those of the global zone.
1430 */
1431 acg = zone_getspecific(exacct_zone_key, global_zone);
1432 ac_net = &acg->ac_net;
1433
1434 mutex_enter(&ac_net->ac_lock);
1435 if (ac_net->ac_state == AC_OFF) {
1436 mutex_exit(&ac_net->ac_lock);
1437 return (ENOTACTIVE);
1438 }
1439 bt_copy(&ac_net->ac_mask[0], mask, AC_MASK_SZ);
1440 mutex_exit(&ac_net->ac_lock);
1441
1442 return (exacct_assemble_net_usage(ac_net, arg, exacct_commit_callback,
1443 NULL, 0, &size, what));
1444 }
1445
1446 static int
exacct_attach_flow_item(flow_usage_t * fu,ea_object_t * record,int res)1447 exacct_attach_flow_item(flow_usage_t *fu, ea_object_t *record, int res)
1448 {
1449 int attached = 1;
1450
1451 switch (res) {
1452 case AC_FLOW_SADDR:
1453 if (fu->fu_isv4) {
1454 (void) ea_attach_item(record, &fu->fu_saddr[3],
1455 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4SADDR);
1456 } else {
1457 (void) ea_attach_item(record, &fu->fu_saddr,
1458 sizeof (fu->fu_saddr), EXT_RAW |
1459 EXD_FLOW_V6SADDR);
1460 }
1461 break;
1462 case AC_FLOW_DADDR:
1463 if (fu->fu_isv4) {
1464 (void) ea_attach_item(record, &fu->fu_daddr[3],
1465 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_V4DADDR);
1466 } else {
1467 (void) ea_attach_item(record, &fu->fu_daddr,
1468 sizeof (fu->fu_daddr), EXT_RAW |
1469 EXD_FLOW_V6DADDR);
1470 }
1471 break;
1472 case AC_FLOW_SPORT:
1473 (void) ea_attach_item(record, &fu->fu_sport,
1474 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_SPORT);
1475 break;
1476 case AC_FLOW_DPORT:
1477 (void) ea_attach_item(record, &fu->fu_dport,
1478 sizeof (uint16_t), EXT_UINT16 | EXD_FLOW_DPORT);
1479 break;
1480 case AC_FLOW_PROTOCOL:
1481 (void) ea_attach_item(record, &fu->fu_protocol,
1482 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_PROTOCOL);
1483 break;
1484 case AC_FLOW_DSFIELD:
1485 (void) ea_attach_item(record, &fu->fu_dsfield,
1486 sizeof (uint8_t), EXT_UINT8 | EXD_FLOW_DSFIELD);
1487 break;
1488 case AC_FLOW_CTIME:
1489 (void) ea_attach_item(record, &fu->fu_ctime,
1490 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_CTIME);
1491 break;
1492 case AC_FLOW_LSEEN:
1493 (void) ea_attach_item(record, &fu->fu_lseen,
1494 sizeof (uint64_t), EXT_UINT64 | EXD_FLOW_LSEEN);
1495 break;
1496 case AC_FLOW_NBYTES:
1497 (void) ea_attach_item(record, &fu->fu_nbytes,
1498 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NBYTES);
1499 break;
1500 case AC_FLOW_NPKTS:
1501 (void) ea_attach_item(record, &fu->fu_npackets,
1502 sizeof (uint64_t), EXT_UINT32 | EXD_FLOW_NPKTS);
1503 break;
1504 case AC_FLOW_PROJID:
1505 if (fu->fu_projid >= 0) {
1506 (void) ea_attach_item(record, &fu->fu_projid,
1507 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_PROJID);
1508 }
1509 break;
1510 case AC_FLOW_UID:
1511 (void) ea_attach_item(record, &fu->fu_userid,
1512 sizeof (uint32_t), EXT_UINT32 | EXD_FLOW_UID);
1513 break;
1514 case AC_FLOW_ANAME:
1515 (void) ea_attach_item(record, fu->fu_aname,
1516 strlen(fu->fu_aname) + 1, EXT_STRING | EXD_FLOW_ANAME);
1517 break;
1518 default:
1519 attached = 0;
1520 }
1521 return (attached);
1522 }
1523
1524 static ea_object_t *
exacct_assemble_flow_record(flow_usage_t * fu,ulong_t * mask,ea_catalog_t record_type)1525 exacct_assemble_flow_record(flow_usage_t *fu, ulong_t *mask,
1526 ea_catalog_t record_type)
1527 {
1528 int res, count;
1529 ea_object_t *record;
1530
1531 /*
1532 * Assemble usage values into group.
1533 */
1534 record = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | record_type);
1535 for (res = 1, count = 0; res <= AC_FLOW_MAX_RES; res++)
1536 if (BT_TEST(mask, res))
1537 count += exacct_attach_flow_item(fu, record, res);
1538 if (count == 0) {
1539 ea_free_object(record, EUP_ALLOC);
1540 record = NULL;
1541 }
1542 return (record);
1543 }
1544
1545 int
exacct_assemble_flow_usage(ac_info_t * ac_flow,flow_usage_t * fu,int (* callback)(ac_info_t *,void *,size_t,void *,size_t,size_t *),void * ubuf,size_t ubufsize,size_t * actual)1546 exacct_assemble_flow_usage(ac_info_t *ac_flow, flow_usage_t *fu,
1547 int (*callback)(ac_info_t *, void *, size_t, void *, size_t, size_t *),
1548 void *ubuf, size_t ubufsize, size_t *actual)
1549 {
1550 ulong_t mask[AC_MASK_SZ];
1551 ea_object_t *flow_usage;
1552 ea_catalog_t record_type;
1553 void *buf;
1554 size_t bufsize;
1555 int ret;
1556
1557 mutex_enter(&ac_flow->ac_lock);
1558 if (ac_flow->ac_state == AC_OFF) {
1559 mutex_exit(&ac_flow->ac_lock);
1560 return (ENOTACTIVE);
1561 }
1562 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ);
1563 mutex_exit(&ac_flow->ac_lock);
1564
1565 record_type = EXD_GROUP_FLOW;
1566
1567 flow_usage = exacct_assemble_flow_record(fu, mask, record_type);
1568 if (flow_usage == NULL) {
1569 return (0);
1570 }
1571
1572 /*
1573 * Pack object into buffer and pass to callback.
1574 */
1575 bufsize = ea_pack_object(flow_usage, NULL, 0);
1576 buf = kmem_alloc(bufsize, KM_NOSLEEP);
1577 if (buf == NULL) {
1578 return (ENOMEM);
1579 }
1580
1581 (void) ea_pack_object(flow_usage, buf, bufsize);
1582
1583 ret = callback(ac_flow, ubuf, ubufsize, buf, bufsize, actual);
1584
1585 /*
1586 * Free all previously allocations.
1587 */
1588 kmem_free(buf, bufsize);
1589 ea_free_object(flow_usage, EUP_ALLOC);
1590 return (ret);
1591 }
1592
1593 void
exacct_commit_flow(void * arg)1594 exacct_commit_flow(void *arg)
1595 {
1596 flow_usage_t *f = (flow_usage_t *)arg;
1597 size_t size;
1598 ulong_t mask[AC_MASK_SZ];
1599 struct exacct_globals *acg;
1600 ac_info_t *ac_flow;
1601
1602 if (exacct_zone_key == ZONE_KEY_UNINITIALIZED) {
1603 /*
1604 * acctctl module not loaded. Nothing to do.
1605 */
1606 return;
1607 }
1608
1609 /*
1610 * Even though each zone nominally has its own flow accounting settings
1611 * (ac_flow), these are only maintained by and for the global zone.
1612 *
1613 * If this were to change in the future, this function should grow a
1614 * second zoneid (or zone) argument, and use the corresponding zone's
1615 * settings rather than always using those of the global zone.
1616 */
1617 acg = zone_getspecific(exacct_zone_key, global_zone);
1618 ac_flow = &acg->ac_flow;
1619
1620 mutex_enter(&ac_flow->ac_lock);
1621 if (ac_flow->ac_state == AC_OFF) {
1622 mutex_exit(&ac_flow->ac_lock);
1623 return;
1624 }
1625 bt_copy(&ac_flow->ac_mask[0], mask, AC_MASK_SZ);
1626 mutex_exit(&ac_flow->ac_lock);
1627
1628 (void) exacct_assemble_flow_usage(ac_flow, f, exacct_commit_callback,
1629 NULL, 0, &size);
1630 }
1631
1632 /*
1633 * int exacct_tag_task(task_t *, void *, size_t, int)
1634 *
1635 * Overview
1636 * exacct_tag_task() provides the exacct record construction and writing
1637 * support required by putacct(2) for task entities.
1638 *
1639 * Return values
1640 * The result of the write operation is returned, unless the extended
1641 * accounting facility is not active, in which case ENOTACTIVE is returned.
1642 *
1643 * Caller's context
1644 * Suitable for KM_SLEEP allocations.
1645 */
1646 int
exacct_tag_task(ac_info_t * ac_task,task_t * tk,void * ubuf,size_t ubufsz,int flags)1647 exacct_tag_task(ac_info_t *ac_task, task_t *tk, void *ubuf, size_t ubufsz,
1648 int flags)
1649 {
1650 int error = 0;
1651 void *buf;
1652 size_t bufsize;
1653 ea_catalog_t cat;
1654 ea_object_t *tag;
1655
1656 mutex_enter(&ac_task->ac_lock);
1657 if (ac_task->ac_state == AC_OFF || ac_task->ac_vnode == NULL) {
1658 mutex_exit(&ac_task->ac_lock);
1659 return (ENOTACTIVE);
1660 }
1661 mutex_exit(&ac_task->ac_lock);
1662
1663 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_TASK_TAG);
1664 (void) ea_attach_item(tag, &tk->tk_tkid, 0,
1665 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID);
1666 (void) ea_attach_item(tag, tk->tk_zone->zone_nodename, 0,
1667 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME);
1668 if (flags == EP_RAW)
1669 cat = EXT_RAW | EXC_DEFAULT | EXD_TASK_TAG;
1670 else
1671 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_TASK_TAG;
1672 (void) ea_attach_item(tag, ubuf, ubufsz, cat);
1673
1674 bufsize = ea_pack_object(tag, NULL, 0);
1675 buf = kmem_alloc(bufsize, KM_SLEEP);
1676 (void) ea_pack_object(tag, buf, bufsize);
1677 error = exacct_vn_write(ac_task, buf, bufsize);
1678 kmem_free(buf, bufsize);
1679 ea_free_object(tag, EUP_ALLOC);
1680 return (error);
1681 }
1682
1683 /*
1684 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *)
1685 *
1686 * Overview
1687 * exacct_tag_proc() provides the exacct record construction and writing
1688 * support required by putacct(2) for processes.
1689 *
1690 * Return values
1691 * The result of the write operation is returned, unless the extended
1692 * accounting facility is not active, in which case ENOTACTIVE is returned.
1693 *
1694 * Caller's context
1695 * Suitable for KM_SLEEP allocations.
1696 */
1697 int
exacct_tag_proc(ac_info_t * ac_proc,pid_t pid,taskid_t tkid,void * ubuf,size_t ubufsz,int flags,const char * hostname)1698 exacct_tag_proc(ac_info_t *ac_proc, pid_t pid, taskid_t tkid, void *ubuf,
1699 size_t ubufsz, int flags, const char *hostname)
1700 {
1701 int error = 0;
1702 void *buf;
1703 size_t bufsize;
1704 ea_catalog_t cat;
1705 ea_object_t *tag;
1706
1707 mutex_enter(&ac_proc->ac_lock);
1708 if (ac_proc->ac_state == AC_OFF || ac_proc->ac_vnode == NULL) {
1709 mutex_exit(&ac_proc->ac_lock);
1710 return (ENOTACTIVE);
1711 }
1712 mutex_exit(&ac_proc->ac_lock);
1713
1714 tag = ea_alloc_group(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_PROC_TAG);
1715 (void) ea_attach_item(tag, &pid, sizeof (uint32_t),
1716 EXT_UINT32 | EXC_DEFAULT | EXD_PROC_PID);
1717 (void) ea_attach_item(tag, &tkid, 0,
1718 EXT_UINT32 | EXC_DEFAULT | EXD_TASK_TASKID);
1719 (void) ea_attach_item(tag, (void *)hostname, 0,
1720 EXT_STRING | EXC_DEFAULT | EXD_TASK_HOSTNAME);
1721 if (flags == EP_RAW)
1722 cat = EXT_RAW | EXC_DEFAULT | EXD_PROC_TAG;
1723 else
1724 cat = EXT_EXACCT_OBJECT | EXC_DEFAULT | EXD_PROC_TAG;
1725 (void) ea_attach_item(tag, ubuf, ubufsz, cat);
1726
1727 bufsize = ea_pack_object(tag, NULL, 0);
1728 buf = kmem_alloc(bufsize, KM_SLEEP);
1729 (void) ea_pack_object(tag, buf, bufsize);
1730 error = exacct_vn_write(ac_proc, buf, bufsize);
1731 kmem_free(buf, bufsize);
1732 ea_free_object(tag, EUP_ALLOC);
1733 return (error);
1734 }
1735
1736 /*
1737 * void exacct_init(void)
1738 *
1739 * Overview
1740 * Initialized the extended accounting subsystem.
1741 *
1742 * Return values
1743 * None.
1744 *
1745 * Caller's context
1746 * Suitable for KM_SLEEP allocations.
1747 */
1748 void
exacct_init()1749 exacct_init()
1750 {
1751 exacct_queue = system_taskq;
1752 exacct_object_cache = kmem_cache_create("exacct_object_cache",
1753 sizeof (ea_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
1754 task_commit_thread_init();
1755 }
1756
1757 /*
1758 * exacct_snapshot_proc_mstate() copies a process's microstate accounting data
1759 * and resource usage counters into a given task_usage_t. It differs from
1760 * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t,
1761 * b) p_lock will have been acquired earlier in the call path and c) we
1762 * are here including the process's user and system times.
1763 */
1764 static void
exacct_snapshot_proc_mstate(proc_t * p,task_usage_t * tu)1765 exacct_snapshot_proc_mstate(proc_t *p, task_usage_t *tu)
1766 {
1767 tu->tu_utime = mstate_aggr_state(p, LMS_USER);
1768 tu->tu_stime = mstate_aggr_state(p, LMS_SYSTEM);
1769 tu->tu_minflt = p->p_ru.minflt;
1770 tu->tu_majflt = p->p_ru.majflt;
1771 tu->tu_sndmsg = p->p_ru.msgsnd;
1772 tu->tu_rcvmsg = p->p_ru.msgrcv;
1773 tu->tu_ioch = p->p_ru.ioch;
1774 tu->tu_iblk = p->p_ru.inblock;
1775 tu->tu_oblk = p->p_ru.oublock;
1776 tu->tu_vcsw = p->p_ru.nvcsw;
1777 tu->tu_icsw = p->p_ru.nivcsw;
1778 tu->tu_nsig = p->p_ru.nsignals;
1779 tu->tu_nswp = p->p_ru.nswap;
1780 tu->tu_nscl = p->p_ru.sysc;
1781 }
1782
1783 /*
1784 * void exacct_move_mstate(proc_t *, task_t *, task_t *)
1785 *
1786 * Overview
1787 * exacct_move_mstate() is called by task_change() and accounts for
1788 * a process's resource usage when it is moved from one task to another.
1789 *
1790 * The process's usage at this point is recorded in the new task so
1791 * that it can be excluded from the calculation of resources consumed
1792 * by that task.
1793 *
1794 * The resource usage inherited by the new task is also added to the
1795 * aggregate maintained by the old task for processes that have exited.
1796 *
1797 * Return values
1798 * None.
1799 *
1800 * Caller's context
1801 * pidlock and p_lock held across exacct_move_mstate().
1802 */
1803 void
exacct_move_mstate(proc_t * p,task_t * oldtk,task_t * newtk)1804 exacct_move_mstate(proc_t *p, task_t *oldtk, task_t *newtk)
1805 {
1806 task_usage_t tu;
1807
1808 /* Take a snapshot of this process's mstate and RU counters */
1809 exacct_snapshot_proc_mstate(p, &tu);
1810
1811 /*
1812 * Use the snapshot to increment the aggregate usage of the old
1813 * task, and the inherited usage of the new one.
1814 */
1815 mutex_enter(&oldtk->tk_usage_lock);
1816 exacct_add_task_mstate(oldtk->tk_usage, &tu);
1817 mutex_exit(&oldtk->tk_usage_lock);
1818 mutex_enter(&newtk->tk_usage_lock);
1819 exacct_add_task_mstate(newtk->tk_inherited, &tu);
1820 mutex_exit(&newtk->tk_usage_lock);
1821 }
1822