xref: /titanic_52/usr/src/uts/common/syscall/acctctl.c (revision d5862a2559fc326793dc3e6823d449fc4c5c4fda)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/proc.h>
27 #include <sys/systm.h>
28 #include <sys/param.h>
29 #include <sys/kmem.h>
30 #include <sys/sysmacros.h>
31 #include <sys/types.h>
32 #include <sys/cmn_err.h>
33 #include <sys/user.h>
34 #include <sys/cred.h>
35 #include <sys/vnode.h>
36 #include <sys/file.h>
37 #include <sys/pathname.h>
38 #include <sys/modctl.h>
39 #include <sys/acctctl.h>
40 #include <sys/bitmap.h>
41 #include <sys/exacct.h>
42 #include <sys/policy.h>
43 
44 /*
45  * acctctl(2)
46  *
47  *   acctctl() provides the administrative interface to the extended accounting
48  *   subsystem.  The process and task accounting facilities are configurable:
49  *   resources can be individually specified for recording in the appropriate
50  *   accounting file.
51  *
52  *   The current implementation of acctctl() requires that the process and task
53  *   and flow files be distinct across all zones.
54  *
55  * Locking
56  *   Each accounting species has an ac_info_t which contains a mutex,
57  *   used to protect the ac_info_t's contents, and to serialize access to the
58  *   appropriate file.
59  */
60 
61 static list_t exacct_globals_list;
62 static kmutex_t exacct_globals_list_lock;
63 
64 static int
65 ac_state_set(ac_info_t *info, void *buf, size_t bufsz)
66 {
67 	int state;
68 
69 	if (buf == NULL || (bufsz != sizeof (int)))
70 		return (EINVAL);
71 
72 	if (copyin(buf, &state, bufsz) != 0)
73 		return (EFAULT);
74 
75 	if (state != AC_ON && state != AC_OFF)
76 		return (EINVAL);
77 
78 	mutex_enter(&info->ac_lock);
79 	info->ac_state = state;
80 	mutex_exit(&info->ac_lock);
81 	return (0);
82 }
83 
84 static int
85 ac_state_get(ac_info_t *info, void *buf, size_t bufsz)
86 {
87 	if (buf == NULL || (bufsz != sizeof (int)))
88 		return (EINVAL);
89 
90 	mutex_enter(&info->ac_lock);
91 	if (copyout(&info->ac_state, buf, bufsz) != 0) {
92 		mutex_exit(&info->ac_lock);
93 		return (EFAULT);
94 	}
95 	mutex_exit(&info->ac_lock);
96 	return (0);
97 }
98 
99 static boolean_t
100 ac_file_in_use(vnode_t *vp)
101 {
102 	boolean_t in_use = B_FALSE;
103 	struct exacct_globals *acg;
104 
105 	if (vp == NULL)
106 		return (B_FALSE);
107 	mutex_enter(&exacct_globals_list_lock);
108 	/*
109 	 * Start off by grabbing all locks.
110 	 */
111 	for (acg = list_head(&exacct_globals_list); acg != NULL;
112 	    acg = list_next(&exacct_globals_list, acg)) {
113 		mutex_enter(&acg->ac_proc.ac_lock);
114 		mutex_enter(&acg->ac_task.ac_lock);
115 		mutex_enter(&acg->ac_flow.ac_lock);
116 		mutex_enter(&acg->ac_net.ac_lock);
117 	}
118 
119 	for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL;
120 	    acg = list_next(&exacct_globals_list, acg)) {
121 		/*
122 		 * We need to verify that we aren't already using this file for
123 		 * accounting in any zone.
124 		 */
125 		if (vn_compare(acg->ac_proc.ac_vnode, vp) ||
126 		    vn_compare(acg->ac_task.ac_vnode, vp) ||
127 		    vn_compare(acg->ac_flow.ac_vnode, vp) ||
128 		    vn_compare(acg->ac_net.ac_vnode, vp))
129 			in_use = B_TRUE;
130 	}
131 
132 	/*
133 	 * Drop all locks.
134 	 */
135 	for (acg = list_head(&exacct_globals_list); acg != NULL;
136 	    acg = list_next(&exacct_globals_list, acg)) {
137 		mutex_exit(&acg->ac_proc.ac_lock);
138 		mutex_exit(&acg->ac_task.ac_lock);
139 		mutex_exit(&acg->ac_flow.ac_lock);
140 		mutex_exit(&acg->ac_net.ac_lock);
141 	}
142 	mutex_exit(&exacct_globals_list_lock);
143 	return (in_use);
144 }
145 
146 static int
147 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz)
148 {
149 	int error = 0;
150 	void *kbuf;
151 	void *namebuf;
152 	int namelen;
153 	vnode_t *vp;
154 	void *hdr;
155 	size_t hdrsize;
156 	vattr_t va;
157 
158 	if (ubuf == NULL) {
159 		mutex_enter(&info->ac_lock);
160 
161 		/*
162 		 * Closing accounting file
163 		 */
164 		if (info->ac_vnode != NULL) {
165 			error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0,
166 			    CRED(), NULL);
167 			if (error) {
168 				mutex_exit(&info->ac_lock);
169 				return (error);
170 			}
171 			VN_RELE(info->ac_vnode);
172 			info->ac_vnode = NULL;
173 		}
174 		if (info->ac_file != NULL) {
175 			kmem_free(info->ac_file, strlen(info->ac_file) + 1);
176 			info->ac_file = NULL;
177 		}
178 
179 		mutex_exit(&info->ac_lock);
180 		return (error);
181 	}
182 
183 	if (bufsz < 2 || bufsz > MAXPATHLEN)
184 		return (EINVAL);
185 
186 	/*
187 	 * We have to copy in the whole buffer since we can't tell the length
188 	 * of the string in user's address space.
189 	 */
190 	kbuf = kmem_zalloc(bufsz, KM_SLEEP);
191 	if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) {
192 		kmem_free(kbuf, bufsz);
193 		return (error);
194 	}
195 	if (*((char *)kbuf) != '/') {
196 		kmem_free(kbuf, bufsz);
197 		return (EINVAL);
198 	}
199 
200 	/*
201 	 * Now, allocate the space where we are going to save the
202 	 * name of the accounting file and kmem_free kbuf. We have to do this
203 	 * now because it is not good to sleep in kmem_alloc() while
204 	 * holding ac_info's lock.
205 	 */
206 	namelen = strlen(kbuf) + 1;
207 	namebuf = kmem_alloc(namelen, KM_SLEEP);
208 	(void) strcpy(namebuf, kbuf);
209 	kmem_free(kbuf, bufsz);
210 
211 	/*
212 	 * Check if this file already exists.
213 	 */
214 	error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
215 
216 	/*
217 	 * Check if the file is already in use.
218 	 */
219 	if (!error) {
220 		if (ac_file_in_use(vp)) {
221 			/*
222 			 * If we're already using it then return EBUSY
223 			 */
224 			kmem_free(namebuf, namelen);
225 			VN_RELE(vp);
226 			return (EBUSY);
227 		}
228 		VN_RELE(vp);
229 	}
230 
231 	/*
232 	 * Create an exacct header here because exacct_create_header() may
233 	 * sleep so we should not be holding ac_lock. At this point we cannot
234 	 * reliably know if we need the header or not, so we may end up not
235 	 * using the header.
236 	 */
237 	hdr = exacct_create_header(&hdrsize);
238 
239 	/*
240 	 * Now, grab info's ac_lock and try to set up everything.
241 	 */
242 	mutex_enter(&info->ac_lock);
243 
244 	if ((error = vn_open(namebuf, UIO_SYSSPACE,
245 	    FCREAT | FWRITE | FOFFMAX, 0600, &vp, CRCREAT, 0)) != 0) {
246 		mutex_exit(&info->ac_lock);
247 		kmem_free(namebuf, namelen);
248 		kmem_free(hdr, hdrsize);
249 		return (error);
250 	}
251 
252 	if (vp->v_type != VREG) {
253 		VN_RELE(vp);
254 		mutex_exit(&info->ac_lock);
255 		kmem_free(namebuf, namelen);
256 		kmem_free(hdr, hdrsize);
257 		return (EACCES);
258 	}
259 
260 	if (info->ac_vnode != NULL) {
261 		/*
262 		 * Switch from an old file to a new file by swapping
263 		 * their vnode pointers.
264 		 */
265 		vnode_t *oldvp;
266 		oldvp = info->ac_vnode;
267 		info->ac_vnode = vp;
268 		vp = oldvp;
269 	} else {
270 		/*
271 		 * Start writing accounting records to a new file.
272 		 */
273 		info->ac_vnode = vp;
274 		vp = NULL;
275 	}
276 	if (vp) {
277 		/*
278 		 * We still need to close the old file.
279 		 */
280 		if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) {
281 			VN_RELE(vp);
282 			mutex_exit(&info->ac_lock);
283 			kmem_free(namebuf, namelen);
284 			kmem_free(hdr, hdrsize);
285 			return (error);
286 		}
287 		VN_RELE(vp);
288 		if (info->ac_file != NULL) {
289 			kmem_free(info->ac_file,
290 			    strlen(info->ac_file) + 1);
291 			info->ac_file = NULL;
292 		}
293 	}
294 	info->ac_file = namebuf;
295 
296 	/*
297 	 * Write the exacct header only if the file is empty.
298 	 */
299 	error = VOP_GETATTR(info->ac_vnode, &va, AT_SIZE, CRED(), NULL);
300 	if (error == 0 && va.va_size == 0)
301 		error = exacct_write_header(info, hdr, hdrsize);
302 
303 	mutex_exit(&info->ac_lock);
304 	kmem_free(hdr, hdrsize);
305 	return (error);
306 }
307 
308 static int
309 ac_file_get(ac_info_t *info, void *buf, size_t bufsz)
310 {
311 	int error = 0;
312 	vnode_t *vnode;
313 	char *file;
314 
315 	mutex_enter(&info->ac_lock);
316 	file = info->ac_file;
317 	vnode = info->ac_vnode;
318 
319 	if (file == NULL || vnode == NULL) {
320 		mutex_exit(&info->ac_lock);
321 		return (ENOTACTIVE);
322 	}
323 
324 	if (strlen(file) >= bufsz)
325 		error = ENOMEM;
326 	else
327 		error = copyoutstr(file, buf, MAXPATHLEN, NULL);
328 
329 	mutex_exit(&info->ac_lock);
330 	return (error);
331 }
332 
333 static int
334 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres)
335 {
336 	ac_res_t *res;
337 	ac_res_t *tmp;
338 	ulong_t *maskp;
339 	int id;
340 	uint_t counter = 0;
341 
342 	/*
343 	 * Validate that a non-zero buffer, sized within limits and to an
344 	 * integral number of ac_res_t's has been specified.
345 	 */
346 	if (bufsz == 0 ||
347 	    bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) ||
348 	    (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz)
349 		return (EINVAL);
350 
351 	tmp = res = kmem_alloc(bufsz, KM_SLEEP);
352 	if (copyin(buf, res, bufsz) != 0) {
353 		kmem_free(res, bufsz);
354 		return (EFAULT);
355 	}
356 
357 	maskp = (ulong_t *)&info->ac_mask;
358 
359 	mutex_enter(&info->ac_lock);
360 	while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) {
361 		if (id > maxres || id < 0) {
362 			mutex_exit(&info->ac_lock);
363 			kmem_free(res, bufsz);
364 			return (EINVAL);
365 		}
366 		if (tmp->ar_state == AC_ON) {
367 			BT_SET(maskp, id);
368 		} else if (tmp->ar_state == AC_OFF) {
369 			BT_CLEAR(maskp, id);
370 		} else {
371 			mutex_exit(&info->ac_lock);
372 			kmem_free(res, bufsz);
373 			return (EINVAL);
374 		}
375 		tmp++;
376 		counter++;
377 	}
378 	mutex_exit(&info->ac_lock);
379 	kmem_free(res, bufsz);
380 	return (0);
381 }
382 
383 static int
384 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres)
385 {
386 	int error = 0;
387 	ac_res_t *res;
388 	ac_res_t *tmp;
389 	size_t ressz = sizeof (ac_res_t) * (maxres + 1);
390 	ulong_t *maskp;
391 	int id;
392 
393 	if (bufsz < ressz)
394 		return (EINVAL);
395 	tmp = res = kmem_alloc(ressz, KM_SLEEP);
396 
397 	mutex_enter(&info->ac_lock);
398 	maskp = (ulong_t *)&info->ac_mask;
399 	for (id = 1; id <= maxres; id++) {
400 		tmp->ar_id = id;
401 		tmp->ar_state = BT_TEST(maskp, id);
402 		tmp++;
403 	}
404 	tmp->ar_id = AC_NONE;
405 	tmp->ar_state = AC_OFF;
406 	mutex_exit(&info->ac_lock);
407 	error = copyout(res, buf, ressz);
408 	kmem_free(res, ressz);
409 	return (error);
410 }
411 
412 /*
413  * acctctl()
414  *
415  * Overview
416  *   acctctl() is the entry point for the acctctl(2) system call.
417  *
418  * Return values
419  *   On successful completion, return 0; otherwise -1 is returned and errno is
420  *   set appropriately.
421  *
422  * Caller's context
423  *   Called from the system call path.
424  */
425 int
426 acctctl(int cmd, void *buf, size_t bufsz)
427 {
428 	int error = 0;
429 	int mode = AC_MODE(cmd);
430 	int option = AC_OPTION(cmd);
431 	int maxres;
432 	ac_info_t *info;
433 	zone_t *zone = curproc->p_zone;
434 	struct exacct_globals *acg;
435 
436 	acg = zone_getspecific(exacct_zone_key, zone);
437 	/*
438 	 * exacct_zone_key and associated per-zone state were initialized when
439 	 * the module was loaded.
440 	 */
441 	ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED);
442 	ASSERT(acg != NULL);
443 
444 	switch (mode) {	/* sanity check */
445 	case AC_TASK:
446 		info = &acg->ac_task;
447 		maxres = AC_TASK_MAX_RES;
448 		break;
449 	case AC_PROC:
450 		info = &acg->ac_proc;
451 		maxres = AC_PROC_MAX_RES;
452 		break;
453 	/*
454 	 * Flow/net accounting isn't configurable in non-global
455 	 * zones, but we have this field on a per-zone basis for future
456 	 * expansion as well as the ability to return default "unset"
457 	 * values for the various AC_*_GET queries.  AC_*_SET commands
458 	 * fail with EPERM for AC_FLOW and AC_NET in non-global zones.
459 	 */
460 	case AC_FLOW:
461 		info = &acg->ac_flow;
462 		maxres = AC_FLOW_MAX_RES;
463 		break;
464 	case AC_NET:
465 		info = &acg->ac_net;
466 		maxres = AC_NET_MAX_RES;
467 		break;
468 	default:
469 		return (set_errno(EINVAL));
470 	}
471 
472 	switch (option) {
473 	case AC_STATE_SET:
474 		if ((error = secpolicy_acct(CRED())) != 0)
475 			break;
476 		if ((mode == AC_FLOW || mode == AC_NET) &&
477 		    getzoneid() != GLOBAL_ZONEID) {
478 			error = EPERM;
479 			break;
480 		}
481 		error = ac_state_set(info, buf, bufsz);
482 		break;
483 	case AC_STATE_GET:
484 		error = ac_state_get(info, buf, bufsz);
485 		break;
486 	case AC_FILE_SET:
487 		if ((error = secpolicy_acct(CRED())) != 0)
488 			break;
489 		if ((mode == AC_FLOW || mode == AC_NET) &&
490 		    getzoneid() != GLOBAL_ZONEID) {
491 			error = EPERM;
492 			break;
493 		}
494 		error = ac_file_set(info, buf, bufsz);
495 		break;
496 	case AC_FILE_GET:
497 		error = ac_file_get(info, buf, bufsz);
498 		break;
499 	case AC_RES_SET:
500 		if ((error = secpolicy_acct(CRED())) != 0)
501 			break;
502 		if ((mode == AC_FLOW || mode == AC_NET) &&
503 		    getzoneid() != GLOBAL_ZONEID) {
504 			error = EPERM;
505 			break;
506 		}
507 		error = ac_res_set(info, buf, bufsz, maxres);
508 		break;
509 	case AC_RES_GET:
510 		error = ac_res_get(info, buf, bufsz, maxres);
511 		break;
512 	default:
513 		return (set_errno(EINVAL));
514 	}
515 	if (error)
516 		return (set_errno(error));
517 	return (0);
518 }
519 
520 static struct sysent ac_sysent = {
521 	3,
522 	SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
523 	acctctl
524 };
525 
526 static struct modlsys modlsys = {
527 	&mod_syscallops,
528 	"acctctl system call",
529 	&ac_sysent
530 };
531 
532 #ifdef _SYSCALL32_IMPL
533 static struct modlsys modlsys32 = {
534 	&mod_syscallops32,
535 	"32-bit acctctl system call",
536 	&ac_sysent
537 };
538 #endif
539 
540 static struct modlinkage modlinkage = {
541 	MODREV_1,
542 	&modlsys,
543 #ifdef _SYSCALL32_IMPL
544 	&modlsys32,
545 #endif
546 	NULL
547 };
548 
549 /* ARGSUSED */
550 static void *
551 exacct_zone_init(zoneid_t zoneid)
552 {
553 	struct exacct_globals *acg;
554 
555 	acg = kmem_zalloc(sizeof (*acg), KM_SLEEP);
556 	mutex_enter(&exacct_globals_list_lock);
557 	list_insert_tail(&exacct_globals_list, acg);
558 	mutex_exit(&exacct_globals_list_lock);
559 	return (acg);
560 }
561 
562 static void
563 exacct_free_info(ac_info_t *info)
564 {
565 	mutex_enter(&info->ac_lock);
566 	if (info->ac_vnode) {
567 		(void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL);
568 		VN_RELE(info->ac_vnode);
569 		kmem_free(info->ac_file, strlen(info->ac_file) + 1);
570 	}
571 	info->ac_state = AC_OFF;
572 	info->ac_vnode = NULL;
573 	info->ac_file = NULL;
574 	mutex_exit(&info->ac_lock);
575 }
576 
577 /* ARGSUSED */
578 static void
579 exacct_zone_shutdown(zoneid_t zoneid, void *data)
580 {
581 	struct exacct_globals *acg = data;
582 
583 	/*
584 	 * The accounting files need to be closed during shutdown rather than
585 	 * destroy, since otherwise the filesystem they reside on may fail to
586 	 * unmount, thus causing the entire zone halt/reboot to fail.
587 	 */
588 	exacct_free_info(&acg->ac_proc);
589 	exacct_free_info(&acg->ac_task);
590 	exacct_free_info(&acg->ac_flow);
591 	exacct_free_info(&acg->ac_net);
592 }
593 
594 /* ARGSUSED */
595 static void
596 exacct_zone_fini(zoneid_t zoneid, void *data)
597 {
598 	struct exacct_globals *acg = data;
599 
600 	mutex_enter(&exacct_globals_list_lock);
601 	list_remove(&exacct_globals_list, acg);
602 	mutex_exit(&exacct_globals_list_lock);
603 
604 	mutex_destroy(&acg->ac_proc.ac_lock);
605 	mutex_destroy(&acg->ac_task.ac_lock);
606 	mutex_destroy(&acg->ac_flow.ac_lock);
607 	mutex_destroy(&acg->ac_net.ac_lock);
608 	kmem_free(acg, sizeof (*acg));
609 }
610 
611 int
612 _init()
613 {
614 	int error;
615 
616 	mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL);
617 	list_create(&exacct_globals_list, sizeof (struct exacct_globals),
618 	    offsetof(struct exacct_globals, ac_link));
619 	zone_key_create(&exacct_zone_key, exacct_zone_init,
620 	    exacct_zone_shutdown, exacct_zone_fini);
621 
622 	if ((error = mod_install(&modlinkage)) != 0) {
623 		(void) zone_key_delete(exacct_zone_key);
624 		exacct_zone_key = ZONE_KEY_UNINITIALIZED;
625 		mutex_destroy(&exacct_globals_list_lock);
626 		list_destroy(&exacct_globals_list);
627 	}
628 	return (error);
629 }
630 
631 int
632 _info(struct modinfo *modinfop)
633 {
634 	return (mod_info(&modlinkage, modinfop));
635 }
636 
637 int
638 _fini()
639 {
640 	return (EBUSY);
641 }
642