1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/proc.h>
27 #include <sys/systm.h>
28 #include <sys/param.h>
29 #include <sys/kmem.h>
30 #include <sys/sysmacros.h>
31 #include <sys/types.h>
32 #include <sys/cmn_err.h>
33 #include <sys/user.h>
34 #include <sys/cred.h>
35 #include <sys/vnode.h>
36 #include <sys/file.h>
37 #include <sys/pathname.h>
38 #include <sys/modctl.h>
39 #include <sys/acctctl.h>
40 #include <sys/bitmap.h>
41 #include <sys/exacct.h>
42 #include <sys/policy.h>
43
44 /*
45 * acctctl(2)
46 *
47 * acctctl() provides the administrative interface to the extended accounting
48 * subsystem. The process and task accounting facilities are configurable:
49 * resources can be individually specified for recording in the appropriate
50 * accounting file.
51 *
52 * The current implementation of acctctl() requires that the process and task
53 * and flow files be distinct across all zones.
54 *
55 * Locking
56 * Each accounting species has an ac_info_t which contains a mutex,
57 * used to protect the ac_info_t's contents, and to serialize access to the
58 * appropriate file.
59 */
60
61 static list_t exacct_globals_list;
62 static kmutex_t exacct_globals_list_lock;
63
64 static int
ac_state_set(ac_info_t * info,void * buf,size_t bufsz)65 ac_state_set(ac_info_t *info, void *buf, size_t bufsz)
66 {
67 int state;
68
69 if (buf == NULL || (bufsz != sizeof (int)))
70 return (EINVAL);
71
72 if (copyin(buf, &state, bufsz) != 0)
73 return (EFAULT);
74
75 if (state != AC_ON && state != AC_OFF)
76 return (EINVAL);
77
78 mutex_enter(&info->ac_lock);
79 info->ac_state = state;
80 mutex_exit(&info->ac_lock);
81 return (0);
82 }
83
84 static int
ac_state_get(ac_info_t * info,void * buf,size_t bufsz)85 ac_state_get(ac_info_t *info, void *buf, size_t bufsz)
86 {
87 if (buf == NULL || (bufsz != sizeof (int)))
88 return (EINVAL);
89
90 mutex_enter(&info->ac_lock);
91 if (copyout(&info->ac_state, buf, bufsz) != 0) {
92 mutex_exit(&info->ac_lock);
93 return (EFAULT);
94 }
95 mutex_exit(&info->ac_lock);
96 return (0);
97 }
98
99 static boolean_t
ac_file_in_use(vnode_t * vp)100 ac_file_in_use(vnode_t *vp)
101 {
102 boolean_t in_use = B_FALSE;
103 struct exacct_globals *acg;
104
105 if (vp == NULL)
106 return (B_FALSE);
107 mutex_enter(&exacct_globals_list_lock);
108 /*
109 * Start off by grabbing all locks.
110 */
111 for (acg = list_head(&exacct_globals_list); acg != NULL;
112 acg = list_next(&exacct_globals_list, acg)) {
113 mutex_enter(&acg->ac_proc.ac_lock);
114 mutex_enter(&acg->ac_task.ac_lock);
115 mutex_enter(&acg->ac_flow.ac_lock);
116 mutex_enter(&acg->ac_net.ac_lock);
117 }
118
119 for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL;
120 acg = list_next(&exacct_globals_list, acg)) {
121 /*
122 * We need to verify that we aren't already using this file for
123 * accounting in any zone.
124 */
125 if (vn_compare(acg->ac_proc.ac_vnode, vp) ||
126 vn_compare(acg->ac_task.ac_vnode, vp) ||
127 vn_compare(acg->ac_flow.ac_vnode, vp) ||
128 vn_compare(acg->ac_net.ac_vnode, vp))
129 in_use = B_TRUE;
130 }
131
132 /*
133 * Drop all locks.
134 */
135 for (acg = list_head(&exacct_globals_list); acg != NULL;
136 acg = list_next(&exacct_globals_list, acg)) {
137 mutex_exit(&acg->ac_proc.ac_lock);
138 mutex_exit(&acg->ac_task.ac_lock);
139 mutex_exit(&acg->ac_flow.ac_lock);
140 mutex_exit(&acg->ac_net.ac_lock);
141 }
142 mutex_exit(&exacct_globals_list_lock);
143 return (in_use);
144 }
145
146 static int
ac_file_set(ac_info_t * info,void * ubuf,size_t bufsz)147 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz)
148 {
149 int error = 0;
150 void *kbuf;
151 void *namebuf;
152 int namelen;
153 vnode_t *vp;
154 void *hdr;
155 size_t hdrsize;
156 vattr_t va;
157
158 if (ubuf == NULL) {
159 mutex_enter(&info->ac_lock);
160
161 /*
162 * Closing accounting file
163 */
164 if (info->ac_vnode != NULL) {
165 error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0,
166 CRED(), NULL);
167 if (error) {
168 mutex_exit(&info->ac_lock);
169 return (error);
170 }
171 VN_RELE(info->ac_vnode);
172 info->ac_vnode = NULL;
173 }
174 if (info->ac_file != NULL) {
175 kmem_free(info->ac_file, strlen(info->ac_file) + 1);
176 info->ac_file = NULL;
177 }
178
179 mutex_exit(&info->ac_lock);
180 return (error);
181 }
182
183 if (bufsz < 2 || bufsz > MAXPATHLEN)
184 return (EINVAL);
185
186 /*
187 * We have to copy in the whole buffer since we can't tell the length
188 * of the string in user's address space.
189 */
190 kbuf = kmem_zalloc(bufsz, KM_SLEEP);
191 if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) {
192 kmem_free(kbuf, bufsz);
193 return (error);
194 }
195 if (*((char *)kbuf) != '/') {
196 kmem_free(kbuf, bufsz);
197 return (EINVAL);
198 }
199
200 /*
201 * Now, allocate the space where we are going to save the
202 * name of the accounting file and kmem_free kbuf. We have to do this
203 * now because it is not good to sleep in kmem_alloc() while
204 * holding ac_info's lock.
205 */
206 namelen = strlen(kbuf) + 1;
207 namebuf = kmem_alloc(namelen, KM_SLEEP);
208 (void) strcpy(namebuf, kbuf);
209 kmem_free(kbuf, bufsz);
210
211 /*
212 * Check if this file already exists.
213 */
214 error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
215
216 /*
217 * Check if the file is already in use.
218 */
219 if (!error) {
220 if (ac_file_in_use(vp)) {
221 /*
222 * If we're already using it then return EBUSY
223 */
224 kmem_free(namebuf, namelen);
225 VN_RELE(vp);
226 return (EBUSY);
227 }
228 VN_RELE(vp);
229 }
230
231 /*
232 * Create an exacct header here because exacct_create_header() may
233 * sleep so we should not be holding ac_lock. At this point we cannot
234 * reliably know if we need the header or not, so we may end up not
235 * using the header.
236 */
237 hdr = exacct_create_header(&hdrsize);
238
239 /*
240 * Now, grab info's ac_lock and try to set up everything.
241 */
242 mutex_enter(&info->ac_lock);
243
244 if ((error = vn_open(namebuf, UIO_SYSSPACE,
245 FCREAT | FWRITE | FOFFMAX, 0600, &vp, CRCREAT, 0)) != 0) {
246 mutex_exit(&info->ac_lock);
247 kmem_free(namebuf, namelen);
248 kmem_free(hdr, hdrsize);
249 return (error);
250 }
251
252 if (vp->v_type != VREG) {
253 VN_RELE(vp);
254 mutex_exit(&info->ac_lock);
255 kmem_free(namebuf, namelen);
256 kmem_free(hdr, hdrsize);
257 return (EACCES);
258 }
259
260 if (info->ac_vnode != NULL) {
261 /*
262 * Switch from an old file to a new file by swapping
263 * their vnode pointers.
264 */
265 vnode_t *oldvp;
266 oldvp = info->ac_vnode;
267 info->ac_vnode = vp;
268 vp = oldvp;
269 } else {
270 /*
271 * Start writing accounting records to a new file.
272 */
273 info->ac_vnode = vp;
274 vp = NULL;
275 }
276 if (vp) {
277 /*
278 * We still need to close the old file.
279 */
280 if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) {
281 VN_RELE(vp);
282 mutex_exit(&info->ac_lock);
283 kmem_free(namebuf, namelen);
284 kmem_free(hdr, hdrsize);
285 return (error);
286 }
287 VN_RELE(vp);
288 if (info->ac_file != NULL) {
289 kmem_free(info->ac_file,
290 strlen(info->ac_file) + 1);
291 info->ac_file = NULL;
292 }
293 }
294 info->ac_file = namebuf;
295
296 /*
297 * Write the exacct header only if the file is empty.
298 */
299 error = VOP_GETATTR(info->ac_vnode, &va, AT_SIZE, CRED(), NULL);
300 if (error == 0 && va.va_size == 0)
301 error = exacct_write_header(info, hdr, hdrsize);
302
303 mutex_exit(&info->ac_lock);
304 kmem_free(hdr, hdrsize);
305 return (error);
306 }
307
308 static int
ac_file_get(ac_info_t * info,void * buf,size_t bufsz)309 ac_file_get(ac_info_t *info, void *buf, size_t bufsz)
310 {
311 int error = 0;
312 vnode_t *vnode;
313 char *file;
314
315 mutex_enter(&info->ac_lock);
316 file = info->ac_file;
317 vnode = info->ac_vnode;
318
319 if (file == NULL || vnode == NULL) {
320 mutex_exit(&info->ac_lock);
321 return (ENOTACTIVE);
322 }
323
324 if (strlen(file) >= bufsz)
325 error = ENOMEM;
326 else
327 error = copyoutstr(file, buf, MAXPATHLEN, NULL);
328
329 mutex_exit(&info->ac_lock);
330 return (error);
331 }
332
333 static int
ac_res_set(ac_info_t * info,void * buf,size_t bufsz,int maxres)334 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres)
335 {
336 ac_res_t *res;
337 ac_res_t *tmp;
338 ulong_t *maskp;
339 int id;
340 uint_t counter = 0;
341
342 /*
343 * Validate that a non-zero buffer, sized within limits and to an
344 * integral number of ac_res_t's has been specified.
345 */
346 if (bufsz == 0 ||
347 bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) ||
348 (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz)
349 return (EINVAL);
350
351 tmp = res = kmem_alloc(bufsz, KM_SLEEP);
352 if (copyin(buf, res, bufsz) != 0) {
353 kmem_free(res, bufsz);
354 return (EFAULT);
355 }
356
357 maskp = (ulong_t *)&info->ac_mask;
358
359 mutex_enter(&info->ac_lock);
360 while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) {
361 if (id > maxres || id < 0) {
362 mutex_exit(&info->ac_lock);
363 kmem_free(res, bufsz);
364 return (EINVAL);
365 }
366 if (tmp->ar_state == AC_ON) {
367 BT_SET(maskp, id);
368 } else if (tmp->ar_state == AC_OFF) {
369 BT_CLEAR(maskp, id);
370 } else {
371 mutex_exit(&info->ac_lock);
372 kmem_free(res, bufsz);
373 return (EINVAL);
374 }
375 tmp++;
376 counter++;
377 }
378 mutex_exit(&info->ac_lock);
379 kmem_free(res, bufsz);
380 return (0);
381 }
382
383 static int
ac_res_get(ac_info_t * info,void * buf,size_t bufsz,int maxres)384 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres)
385 {
386 int error = 0;
387 ac_res_t *res;
388 ac_res_t *tmp;
389 size_t ressz = sizeof (ac_res_t) * (maxres + 1);
390 ulong_t *maskp;
391 int id;
392
393 if (bufsz < ressz)
394 return (EINVAL);
395 tmp = res = kmem_alloc(ressz, KM_SLEEP);
396
397 mutex_enter(&info->ac_lock);
398 maskp = (ulong_t *)&info->ac_mask;
399 for (id = 1; id <= maxres; id++) {
400 tmp->ar_id = id;
401 tmp->ar_state = BT_TEST(maskp, id);
402 tmp++;
403 }
404 tmp->ar_id = AC_NONE;
405 tmp->ar_state = AC_OFF;
406 mutex_exit(&info->ac_lock);
407 error = copyout(res, buf, ressz);
408 kmem_free(res, ressz);
409 return (error);
410 }
411
412 /*
413 * acctctl()
414 *
415 * Overview
416 * acctctl() is the entry point for the acctctl(2) system call.
417 *
418 * Return values
419 * On successful completion, return 0; otherwise -1 is returned and errno is
420 * set appropriately.
421 *
422 * Caller's context
423 * Called from the system call path.
424 */
425 int
acctctl(int cmd,void * buf,size_t bufsz)426 acctctl(int cmd, void *buf, size_t bufsz)
427 {
428 int error = 0;
429 int mode = AC_MODE(cmd);
430 int option = AC_OPTION(cmd);
431 int maxres;
432 ac_info_t *info;
433 zone_t *zone = curproc->p_zone;
434 struct exacct_globals *acg;
435
436 acg = zone_getspecific(exacct_zone_key, zone);
437 /*
438 * exacct_zone_key and associated per-zone state were initialized when
439 * the module was loaded.
440 */
441 ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED);
442 ASSERT(acg != NULL);
443
444 switch (mode) { /* sanity check */
445 case AC_TASK:
446 info = &acg->ac_task;
447 maxres = AC_TASK_MAX_RES;
448 break;
449 case AC_PROC:
450 info = &acg->ac_proc;
451 maxres = AC_PROC_MAX_RES;
452 break;
453 /*
454 * Flow/net accounting isn't configurable in non-global
455 * zones, but we have this field on a per-zone basis for future
456 * expansion as well as the ability to return default "unset"
457 * values for the various AC_*_GET queries. AC_*_SET commands
458 * fail with EPERM for AC_FLOW and AC_NET in non-global zones.
459 */
460 case AC_FLOW:
461 info = &acg->ac_flow;
462 maxres = AC_FLOW_MAX_RES;
463 break;
464 case AC_NET:
465 info = &acg->ac_net;
466 maxres = AC_NET_MAX_RES;
467 break;
468 default:
469 return (set_errno(EINVAL));
470 }
471
472 switch (option) {
473 case AC_STATE_SET:
474 if ((error = secpolicy_acct(CRED())) != 0)
475 break;
476 if ((mode == AC_FLOW || mode == AC_NET) &&
477 getzoneid() != GLOBAL_ZONEID) {
478 error = EPERM;
479 break;
480 }
481 error = ac_state_set(info, buf, bufsz);
482 break;
483 case AC_STATE_GET:
484 error = ac_state_get(info, buf, bufsz);
485 break;
486 case AC_FILE_SET:
487 if ((error = secpolicy_acct(CRED())) != 0)
488 break;
489 if ((mode == AC_FLOW || mode == AC_NET) &&
490 getzoneid() != GLOBAL_ZONEID) {
491 error = EPERM;
492 break;
493 }
494 error = ac_file_set(info, buf, bufsz);
495 break;
496 case AC_FILE_GET:
497 error = ac_file_get(info, buf, bufsz);
498 break;
499 case AC_RES_SET:
500 if ((error = secpolicy_acct(CRED())) != 0)
501 break;
502 if ((mode == AC_FLOW || mode == AC_NET) &&
503 getzoneid() != GLOBAL_ZONEID) {
504 error = EPERM;
505 break;
506 }
507 error = ac_res_set(info, buf, bufsz, maxres);
508 break;
509 case AC_RES_GET:
510 error = ac_res_get(info, buf, bufsz, maxres);
511 break;
512 default:
513 return (set_errno(EINVAL));
514 }
515 if (error)
516 return (set_errno(error));
517 return (0);
518 }
519
520 static struct sysent ac_sysent = {
521 3,
522 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
523 acctctl
524 };
525
526 static struct modlsys modlsys = {
527 &mod_syscallops,
528 "acctctl system call",
529 &ac_sysent
530 };
531
532 #ifdef _SYSCALL32_IMPL
533 static struct modlsys modlsys32 = {
534 &mod_syscallops32,
535 "32-bit acctctl system call",
536 &ac_sysent
537 };
538 #endif
539
540 static struct modlinkage modlinkage = {
541 MODREV_1,
542 &modlsys,
543 #ifdef _SYSCALL32_IMPL
544 &modlsys32,
545 #endif
546 NULL
547 };
548
549 /* ARGSUSED */
550 static void *
exacct_zone_init(zoneid_t zoneid)551 exacct_zone_init(zoneid_t zoneid)
552 {
553 struct exacct_globals *acg;
554
555 acg = kmem_zalloc(sizeof (*acg), KM_SLEEP);
556 mutex_enter(&exacct_globals_list_lock);
557 list_insert_tail(&exacct_globals_list, acg);
558 mutex_exit(&exacct_globals_list_lock);
559 return (acg);
560 }
561
562 static void
exacct_free_info(ac_info_t * info)563 exacct_free_info(ac_info_t *info)
564 {
565 mutex_enter(&info->ac_lock);
566 if (info->ac_vnode) {
567 (void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL);
568 VN_RELE(info->ac_vnode);
569 kmem_free(info->ac_file, strlen(info->ac_file) + 1);
570 }
571 info->ac_state = AC_OFF;
572 info->ac_vnode = NULL;
573 info->ac_file = NULL;
574 mutex_exit(&info->ac_lock);
575 }
576
577 /* ARGSUSED */
578 static void
exacct_zone_shutdown(zoneid_t zoneid,void * data)579 exacct_zone_shutdown(zoneid_t zoneid, void *data)
580 {
581 struct exacct_globals *acg = data;
582
583 /*
584 * The accounting files need to be closed during shutdown rather than
585 * destroy, since otherwise the filesystem they reside on may fail to
586 * unmount, thus causing the entire zone halt/reboot to fail.
587 */
588 exacct_free_info(&acg->ac_proc);
589 exacct_free_info(&acg->ac_task);
590 exacct_free_info(&acg->ac_flow);
591 exacct_free_info(&acg->ac_net);
592 }
593
594 /* ARGSUSED */
595 static void
exacct_zone_fini(zoneid_t zoneid,void * data)596 exacct_zone_fini(zoneid_t zoneid, void *data)
597 {
598 struct exacct_globals *acg = data;
599
600 mutex_enter(&exacct_globals_list_lock);
601 list_remove(&exacct_globals_list, acg);
602 mutex_exit(&exacct_globals_list_lock);
603
604 mutex_destroy(&acg->ac_proc.ac_lock);
605 mutex_destroy(&acg->ac_task.ac_lock);
606 mutex_destroy(&acg->ac_flow.ac_lock);
607 mutex_destroy(&acg->ac_net.ac_lock);
608 kmem_free(acg, sizeof (*acg));
609 }
610
611 int
_init()612 _init()
613 {
614 int error;
615
616 mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL);
617 list_create(&exacct_globals_list, sizeof (struct exacct_globals),
618 offsetof(struct exacct_globals, ac_link));
619 zone_key_create(&exacct_zone_key, exacct_zone_init,
620 exacct_zone_shutdown, exacct_zone_fini);
621
622 if ((error = mod_install(&modlinkage)) != 0) {
623 (void) zone_key_delete(exacct_zone_key);
624 exacct_zone_key = ZONE_KEY_UNINITIALIZED;
625 mutex_destroy(&exacct_globals_list_lock);
626 list_destroy(&exacct_globals_list);
627 }
628 return (error);
629 }
630
631 int
_info(struct modinfo * modinfop)632 _info(struct modinfo *modinfop)
633 {
634 return (mod_info(&modlinkage, modinfop));
635 }
636
637 int
_fini()638 _fini()
639 {
640 return (EBUSY);
641 }
642