xref: /illumos-gate/usr/src/uts/common/syscall/acctctl.c (revision 3cf6f95f0e20ed31de99608fdb0a120190d5438f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/proc.h>
29 #include <sys/systm.h>
30 #include <sys/param.h>
31 #include <sys/kmem.h>
32 #include <sys/sysmacros.h>
33 #include <sys/types.h>
34 #include <sys/cmn_err.h>
35 #include <sys/user.h>
36 #include <sys/cred.h>
37 #include <sys/vnode.h>
38 #include <sys/file.h>
39 #include <sys/pathname.h>
40 #include <sys/modctl.h>
41 #include <sys/acctctl.h>
42 #include <sys/bitmap.h>
43 #include <sys/exacct.h>
44 #include <sys/policy.h>
45 
46 /*
47  * acctctl(2)
48  *
49  *   acctctl() provides the administrative interface to the extended accounting
50  *   subsystem.  The process and task accounting facilities are configurable:
51  *   resources can be individually specified for recording in the appropriate
52  *   accounting file.
53  *
54  *   The current implementation of acctctl() requires that the process and task
55  *   and flow files be distinct across all zones.
56  *
57  * Locking
58  *   Each accounting species has an ac_info_t which contains a mutex,
59  *   used to protect the ac_info_t's contents, and to serialize access to the
60  *   appropriate file.
61  */
62 
63 static list_t exacct_globals_list;
64 static kmutex_t exacct_globals_list_lock;
65 
66 static int
67 ac_state_set(ac_info_t *info, void *buf, size_t bufsz)
68 {
69 	int state;
70 
71 	if (buf == NULL || (bufsz != sizeof (int)))
72 		return (EINVAL);
73 
74 	if (copyin(buf, &state, bufsz) != 0)
75 		return (EFAULT);
76 
77 	if (state != AC_ON && state != AC_OFF)
78 		return (EINVAL);
79 
80 	mutex_enter(&info->ac_lock);
81 	info->ac_state = state;
82 	mutex_exit(&info->ac_lock);
83 	return (0);
84 }
85 
86 static int
87 ac_state_get(ac_info_t *info, void *buf, size_t bufsz)
88 {
89 	if (buf == NULL || (bufsz != sizeof (int)))
90 		return (EINVAL);
91 
92 	mutex_enter(&info->ac_lock);
93 	if (copyout(&info->ac_state, buf, bufsz) != 0) {
94 		mutex_exit(&info->ac_lock);
95 		return (EFAULT);
96 	}
97 	mutex_exit(&info->ac_lock);
98 	return (0);
99 }
100 
101 static boolean_t
102 ac_file_in_use(vnode_t *vp)
103 {
104 	boolean_t in_use = B_FALSE;
105 	struct exacct_globals *acg;
106 
107 	if (vp == NULL)
108 		return (B_FALSE);
109 	mutex_enter(&exacct_globals_list_lock);
110 	/*
111 	 * Start off by grabbing all locks.
112 	 */
113 	for (acg = list_head(&exacct_globals_list); acg != NULL;
114 	    acg = list_next(&exacct_globals_list, acg)) {
115 		mutex_enter(&acg->ac_proc.ac_lock);
116 		mutex_enter(&acg->ac_task.ac_lock);
117 		mutex_enter(&acg->ac_flow.ac_lock);
118 	}
119 
120 	for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL;
121 	    acg = list_next(&exacct_globals_list, acg)) {
122 		/*
123 		 * We need to verify that we aren't already using this file for
124 		 * accounting in any zone.
125 		 */
126 		if (vn_compare(acg->ac_proc.ac_vnode, vp) ||
127 		    vn_compare(acg->ac_task.ac_vnode, vp) ||
128 		    vn_compare(acg->ac_flow.ac_vnode, vp))
129 			in_use = B_TRUE;
130 	}
131 
132 	/*
133 	 * Drop all locks.
134 	 */
135 	for (acg = list_head(&exacct_globals_list); acg != NULL;
136 	    acg = list_next(&exacct_globals_list, acg)) {
137 		mutex_exit(&acg->ac_proc.ac_lock);
138 		mutex_exit(&acg->ac_task.ac_lock);
139 		mutex_exit(&acg->ac_flow.ac_lock);
140 	}
141 	mutex_exit(&exacct_globals_list_lock);
142 	return (in_use);
143 }
144 
145 static int
146 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz)
147 {
148 	int error = 0;
149 	void *kbuf;
150 	void *namebuf;
151 	int namelen;
152 	vnode_t *vp;
153 	void *hdr;
154 	size_t hdrsize;
155 	vattr_t va;
156 
157 	if (ubuf == NULL) {
158 		mutex_enter(&info->ac_lock);
159 
160 		/*
161 		 * Closing accounting file
162 		 */
163 		if (info->ac_vnode != NULL) {
164 			error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0,
165 			    CRED(), NULL);
166 			if (error) {
167 				mutex_exit(&info->ac_lock);
168 				return (error);
169 			}
170 			VN_RELE(info->ac_vnode);
171 			info->ac_vnode = NULL;
172 		}
173 		if (info->ac_file != NULL) {
174 			kmem_free(info->ac_file, strlen(info->ac_file) + 1);
175 			info->ac_file = NULL;
176 		}
177 
178 		mutex_exit(&info->ac_lock);
179 		return (error);
180 	}
181 
182 	if (bufsz < 2 || bufsz > MAXPATHLEN)
183 		return (EINVAL);
184 
185 	/*
186 	 * We have to copy in the whole buffer since we can't tell the length
187 	 * of the string in user's address space.
188 	 */
189 	kbuf = kmem_zalloc(bufsz, KM_SLEEP);
190 	if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) {
191 		kmem_free(kbuf, bufsz);
192 		return (error);
193 	}
194 	if (*((char *)kbuf) != '/') {
195 		kmem_free(kbuf, bufsz);
196 		return (EINVAL);
197 	}
198 
199 	/*
200 	 * Now, allocate the space where we are going to save the
201 	 * name of the accounting file and kmem_free kbuf. We have to do this
202 	 * now because it is not good to sleep in kmem_alloc() while
203 	 * holding ac_info's lock.
204 	 */
205 	namelen = strlen(kbuf) + 1;
206 	namebuf = kmem_alloc(namelen, KM_SLEEP);
207 	(void) strcpy(namebuf, kbuf);
208 	kmem_free(kbuf, bufsz);
209 
210 	/*
211 	 * Check if this file already exists.
212 	 */
213 	error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
214 
215 	/*
216 	 * Check if the file is already in use.
217 	 */
218 	if (!error) {
219 		if (ac_file_in_use(vp)) {
220 			/*
221 			 * If we're already using it then return EBUSY
222 			 */
223 			kmem_free(namebuf, namelen);
224 			VN_RELE(vp);
225 			return (EBUSY);
226 		}
227 		VN_RELE(vp);
228 	}
229 
230 	/*
231 	 * Create an exacct header here because exacct_create_header() may
232 	 * sleep so we should not be holding ac_lock. At this point we cannot
233 	 * reliably know if we need the header or not, so we may end up not
234 	 * using the header.
235 	 */
236 	hdr = exacct_create_header(&hdrsize);
237 
238 	/*
239 	 * Now, grab info's ac_lock and try to set up everything.
240 	 */
241 	mutex_enter(&info->ac_lock);
242 
243 	if ((error = vn_open(namebuf, UIO_SYSSPACE,
244 	    FCREAT | FWRITE | FOFFMAX, 0600, &vp, CRCREAT, 0)) != 0) {
245 		mutex_exit(&info->ac_lock);
246 		kmem_free(namebuf, namelen);
247 		kmem_free(hdr, hdrsize);
248 		return (error);
249 	}
250 
251 	if (vp->v_type != VREG) {
252 		VN_RELE(vp);
253 		mutex_exit(&info->ac_lock);
254 		kmem_free(namebuf, namelen);
255 		kmem_free(hdr, hdrsize);
256 		return (EACCES);
257 	}
258 
259 	if (info->ac_vnode != NULL) {
260 		/*
261 		 * Switch from an old file to a new file by swapping
262 		 * their vnode pointers.
263 		 */
264 		vnode_t *oldvp;
265 		oldvp = info->ac_vnode;
266 		info->ac_vnode = vp;
267 		vp = oldvp;
268 	} else {
269 		/*
270 		 * Start writing accounting records to a new file.
271 		 */
272 		info->ac_vnode = vp;
273 		vp = NULL;
274 	}
275 	if (vp) {
276 		/*
277 		 * We still need to close the old file.
278 		 */
279 		if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) {
280 			VN_RELE(vp);
281 			mutex_exit(&info->ac_lock);
282 			kmem_free(namebuf, namelen);
283 			kmem_free(hdr, hdrsize);
284 			return (error);
285 		}
286 		VN_RELE(vp);
287 		if (info->ac_file != NULL) {
288 			kmem_free(info->ac_file,
289 			    strlen(info->ac_file) + 1);
290 			info->ac_file = NULL;
291 		}
292 	}
293 	info->ac_file = namebuf;
294 
295 	/*
296 	 * Write the exacct header only if the file is empty.
297 	 */
298 	error = VOP_GETATTR(info->ac_vnode, &va, AT_SIZE, CRED(), NULL);
299 	if (error == 0 && va.va_size == 0)
300 		error = exacct_write_header(info, hdr, hdrsize);
301 
302 	mutex_exit(&info->ac_lock);
303 	kmem_free(hdr, hdrsize);
304 	return (error);
305 }
306 
307 static int
308 ac_file_get(ac_info_t *info, void *buf, size_t bufsz)
309 {
310 	int error = 0;
311 	vnode_t *vnode;
312 	char *file;
313 
314 	mutex_enter(&info->ac_lock);
315 	file = info->ac_file;
316 	vnode = info->ac_vnode;
317 
318 	if (file == NULL || vnode == NULL) {
319 		mutex_exit(&info->ac_lock);
320 		return (ENOTACTIVE);
321 	}
322 
323 	if (strlen(file) >= bufsz)
324 		error = ENOMEM;
325 	else
326 		error = copyoutstr(file, buf, MAXPATHLEN, NULL);
327 
328 	mutex_exit(&info->ac_lock);
329 	return (error);
330 }
331 
332 static int
333 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres)
334 {
335 	ac_res_t *res;
336 	ac_res_t *tmp;
337 	ulong_t *maskp;
338 	int id;
339 	uint_t counter = 0;
340 
341 	/*
342 	 * Validate that a non-zero buffer, sized within limits and to an
343 	 * integral number of ac_res_t's has been specified.
344 	 */
345 	if (bufsz == 0 ||
346 	    bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) ||
347 	    (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz)
348 		return (EINVAL);
349 
350 	tmp = res = kmem_alloc(bufsz, KM_SLEEP);
351 	if (copyin(buf, res, bufsz) != 0) {
352 		kmem_free(res, bufsz);
353 		return (EFAULT);
354 	}
355 
356 	maskp = (ulong_t *)&info->ac_mask;
357 
358 	mutex_enter(&info->ac_lock);
359 	while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) {
360 		if (id > maxres || id < 0) {
361 			mutex_exit(&info->ac_lock);
362 			kmem_free(res, bufsz);
363 			return (EINVAL);
364 		}
365 		if (tmp->ar_state == AC_ON) {
366 			BT_SET(maskp, id);
367 		} else if (tmp->ar_state == AC_OFF) {
368 			BT_CLEAR(maskp, id);
369 		} else {
370 			mutex_exit(&info->ac_lock);
371 			kmem_free(res, bufsz);
372 			return (EINVAL);
373 		}
374 		tmp++;
375 		counter++;
376 	}
377 	mutex_exit(&info->ac_lock);
378 	kmem_free(res, bufsz);
379 	return (0);
380 }
381 
382 static int
383 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres)
384 {
385 	int error = 0;
386 	ac_res_t *res;
387 	ac_res_t *tmp;
388 	size_t ressz = sizeof (ac_res_t) * (maxres + 1);
389 	ulong_t *maskp;
390 	int id;
391 
392 	if (bufsz < ressz)
393 		return (EINVAL);
394 	tmp = res = kmem_alloc(ressz, KM_SLEEP);
395 
396 	mutex_enter(&info->ac_lock);
397 	maskp = (ulong_t *)&info->ac_mask;
398 	for (id = 1; id <= maxres; id++) {
399 		tmp->ar_id = id;
400 		tmp->ar_state = BT_TEST(maskp, id);
401 		tmp++;
402 	}
403 	tmp->ar_id = AC_NONE;
404 	tmp->ar_state = AC_OFF;
405 	mutex_exit(&info->ac_lock);
406 	error = copyout(res, buf, ressz);
407 	kmem_free(res, ressz);
408 	return (error);
409 }
410 
411 /*
412  * acctctl()
413  *
414  * Overview
415  *   acctctl() is the entry point for the acctctl(2) system call.
416  *
417  * Return values
418  *   On successful completion, return 0; otherwise -1 is returned and errno is
419  *   set appropriately.
420  *
421  * Caller's context
422  *   Called from the system call path.
423  */
424 int
425 acctctl(int cmd, void *buf, size_t bufsz)
426 {
427 	int error = 0;
428 	int mode = AC_MODE(cmd);
429 	int option = AC_OPTION(cmd);
430 	int maxres;
431 	ac_info_t *info;
432 	zone_t *zone = curproc->p_zone;
433 	struct exacct_globals *acg;
434 
435 	acg = zone_getspecific(exacct_zone_key, zone);
436 	/*
437 	 * exacct_zone_key and associated per-zone state were initialized when
438 	 * the module was loaded.
439 	 */
440 	ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED);
441 	ASSERT(acg != NULL);
442 
443 	switch (mode) {	/* sanity check */
444 	case AC_TASK:
445 		info = &acg->ac_task;
446 		maxres = AC_TASK_MAX_RES;
447 		break;
448 	case AC_PROC:
449 		info = &acg->ac_proc;
450 		maxres = AC_PROC_MAX_RES;
451 		break;
452 	case AC_FLOW:
453 		/*
454 		 * Flow accounting isn't currently configurable in non-global
455 		 * zones, but we have this field on a per-zone basis for future
456 		 * expansion as well as the ability to return default "unset"
457 		 * values for the various AC_*_GET queries.  AC_*_SET commands
458 		 * fail with EPERM for AC_FLOW in non-global zones.
459 		 */
460 		info = &acg->ac_flow;
461 		maxres = AC_FLOW_MAX_RES;
462 		break;
463 	default:
464 		return (set_errno(EINVAL));
465 	}
466 
467 	switch (option) {
468 	case AC_STATE_SET:
469 		if ((error = secpolicy_acct(CRED())) != 0)
470 			break;
471 		if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) {
472 			error = EPERM;
473 			break;
474 		}
475 		error = ac_state_set(info, buf, bufsz);
476 		break;
477 	case AC_STATE_GET:
478 		error = ac_state_get(info, buf, bufsz);
479 		break;
480 	case AC_FILE_SET:
481 		if ((error = secpolicy_acct(CRED())) != 0)
482 			break;
483 		if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) {
484 			error = EPERM;
485 			break;
486 		}
487 		error = ac_file_set(info, buf, bufsz);
488 		break;
489 	case AC_FILE_GET:
490 		error = ac_file_get(info, buf, bufsz);
491 		break;
492 	case AC_RES_SET:
493 		if ((error = secpolicy_acct(CRED())) != 0)
494 			break;
495 		if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) {
496 			error = EPERM;
497 			break;
498 		}
499 		error = ac_res_set(info, buf, bufsz, maxres);
500 		break;
501 	case AC_RES_GET:
502 		error = ac_res_get(info, buf, bufsz, maxres);
503 		break;
504 	default:
505 		return (set_errno(EINVAL));
506 	}
507 	if (error)
508 		return (set_errno(error));
509 	return (0);
510 }
511 
512 static struct sysent ac_sysent = {
513 	3,
514 	SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
515 	acctctl
516 };
517 
518 static struct modlsys modlsys = {
519 	&mod_syscallops,
520 	"acctctl system call",
521 	&ac_sysent
522 };
523 
524 #ifdef _SYSCALL32_IMPL
525 static struct modlsys modlsys32 = {
526 	&mod_syscallops32,
527 	"32-bit acctctl system call",
528 	&ac_sysent
529 };
530 #endif
531 
532 static struct modlinkage modlinkage = {
533 	MODREV_1,
534 	&modlsys,
535 #ifdef _SYSCALL32_IMPL
536 	&modlsys32,
537 #endif
538 	NULL
539 };
540 
541 /* ARGSUSED */
542 static void *
543 exacct_zone_init(zoneid_t zoneid)
544 {
545 	struct exacct_globals *acg;
546 
547 	acg = kmem_zalloc(sizeof (*acg), KM_SLEEP);
548 	mutex_enter(&exacct_globals_list_lock);
549 	list_insert_tail(&exacct_globals_list, acg);
550 	mutex_exit(&exacct_globals_list_lock);
551 	return (acg);
552 }
553 
554 static void
555 exacct_free_info(ac_info_t *info)
556 {
557 	mutex_enter(&info->ac_lock);
558 	if (info->ac_vnode) {
559 		(void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL);
560 		VN_RELE(info->ac_vnode);
561 		kmem_free(info->ac_file, strlen(info->ac_file) + 1);
562 	}
563 	info->ac_state = AC_OFF;
564 	info->ac_vnode = NULL;
565 	info->ac_file = NULL;
566 	mutex_exit(&info->ac_lock);
567 }
568 
569 /* ARGSUSED */
570 static void
571 exacct_zone_shutdown(zoneid_t zoneid, void *data)
572 {
573 	struct exacct_globals *acg = data;
574 
575 	/*
576 	 * The accounting files need to be closed during shutdown rather than
577 	 * destroy, since otherwise the filesystem they reside on may fail to
578 	 * unmount, thus causing the entire zone halt/reboot to fail.
579 	 */
580 	exacct_free_info(&acg->ac_proc);
581 	exacct_free_info(&acg->ac_task);
582 	exacct_free_info(&acg->ac_flow);
583 }
584 
585 /* ARGSUSED */
586 static void
587 exacct_zone_fini(zoneid_t zoneid, void *data)
588 {
589 	struct exacct_globals *acg = data;
590 
591 	mutex_enter(&exacct_globals_list_lock);
592 	list_remove(&exacct_globals_list, acg);
593 	mutex_exit(&exacct_globals_list_lock);
594 
595 	mutex_destroy(&acg->ac_proc.ac_lock);
596 	mutex_destroy(&acg->ac_task.ac_lock);
597 	mutex_destroy(&acg->ac_flow.ac_lock);
598 	kmem_free(acg, sizeof (*acg));
599 }
600 
601 int
602 _init()
603 {
604 	int error;
605 
606 	mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL);
607 	list_create(&exacct_globals_list, sizeof (struct exacct_globals),
608 	    offsetof(struct exacct_globals, ac_link));
609 	zone_key_create(&exacct_zone_key, exacct_zone_init,
610 	    exacct_zone_shutdown, exacct_zone_fini);
611 
612 	if ((error = mod_install(&modlinkage)) != 0) {
613 		(void) zone_key_delete(exacct_zone_key);
614 		exacct_zone_key = ZONE_KEY_UNINITIALIZED;
615 		mutex_destroy(&exacct_globals_list_lock);
616 		list_destroy(&exacct_globals_list);
617 	}
618 	return (error);
619 }
620 
621 int
622 _info(struct modinfo *modinfop)
623 {
624 	return (mod_info(&modlinkage, modinfop));
625 }
626 
627 int
628 _fini()
629 {
630 	return (EBUSY);
631 }
632