xref: /freebsd/sys/kern/kern_rctl.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62 
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67 
68 FEATURE(rctl, "Resource Limits");
69 
70 #define	HRF_DEFAULT		0
71 #define	HRF_DONT_INHERIT	1
72 #define	HRF_DONT_ACCUMULATE	2
73 
74 /* Default buffer size for rctl_get_rules(2). */
75 #define	RCTL_DEFAULT_BUFSIZE	4096
76 #define	RCTL_LOG_BUFSIZE	128
77 
78 /*
79  * 'rctl_rule_link' connects a rule with every racct it's related to.
80  * For example, rule 'user:X:openfiles:deny=N/process' is linked
81  * with uidinfo for user X, and to each process of that user.
82  */
83 struct rctl_rule_link {
84 	LIST_ENTRY(rctl_rule_link)	rrl_next;
85 	struct rctl_rule		*rrl_rule;
86 	int				rrl_exceeded;
87 };
88 
89 struct dict {
90 	const char	*d_name;
91 	int		d_value;
92 };
93 
94 static struct dict subjectnames[] = {
95 	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
96 	{ "user", RCTL_SUBJECT_TYPE_USER },
97 	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98 	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
99 	{ NULL, -1 }};
100 
101 static struct dict resourcenames[] = {
102 	{ "cpu", RACCT_CPU },
103 	{ "data", RACCT_DATA },
104 	{ "stack", RACCT_STACK },
105 	{ "core", RACCT_CORE },
106 	{ "rss", RACCT_RSS },
107 	{ "memlock", RACCT_MEMLOCK },
108 	{ "nproc", RACCT_NPROC },
109 	{ "nofile", RACCT_NOFILE },
110 	{ "vmem", RACCT_VMEM },
111 	{ "npts", RACCT_NPTS },
112 	{ "swap", RACCT_SWAP },
113 	{ "nthr", RACCT_NTHR },
114 	{ "msgqqueued", RACCT_MSGQQUEUED },
115 	{ "msgqsize", RACCT_MSGQSIZE },
116 	{ "nmsgq", RACCT_NMSGQ },
117 	{ "nsem", RACCT_NSEM },
118 	{ "nsemop", RACCT_NSEMOP },
119 	{ "nshm", RACCT_NSHM },
120 	{ "shmsize", RACCT_SHMSIZE },
121 	{ "wallclock", RACCT_WALLCLOCK },
122 	{ NULL, -1 }};
123 
124 static struct dict actionnames[] = {
125 	{ "sighup", RCTL_ACTION_SIGHUP },
126 	{ "sigint", RCTL_ACTION_SIGINT },
127 	{ "sigquit", RCTL_ACTION_SIGQUIT },
128 	{ "sigill", RCTL_ACTION_SIGILL },
129 	{ "sigtrap", RCTL_ACTION_SIGTRAP },
130 	{ "sigabrt", RCTL_ACTION_SIGABRT },
131 	{ "sigemt", RCTL_ACTION_SIGEMT },
132 	{ "sigfpe", RCTL_ACTION_SIGFPE },
133 	{ "sigkill", RCTL_ACTION_SIGKILL },
134 	{ "sigbus", RCTL_ACTION_SIGBUS },
135 	{ "sigsegv", RCTL_ACTION_SIGSEGV },
136 	{ "sigsys", RCTL_ACTION_SIGSYS },
137 	{ "sigpipe", RCTL_ACTION_SIGPIPE },
138 	{ "sigalrm", RCTL_ACTION_SIGALRM },
139 	{ "sigterm", RCTL_ACTION_SIGTERM },
140 	{ "sigurg", RCTL_ACTION_SIGURG },
141 	{ "sigstop", RCTL_ACTION_SIGSTOP },
142 	{ "sigtstp", RCTL_ACTION_SIGTSTP },
143 	{ "sigchld", RCTL_ACTION_SIGCHLD },
144 	{ "sigttin", RCTL_ACTION_SIGTTIN },
145 	{ "sigttou", RCTL_ACTION_SIGTTOU },
146 	{ "sigio", RCTL_ACTION_SIGIO },
147 	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
148 	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
149 	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
150 	{ "sigprof", RCTL_ACTION_SIGPROF },
151 	{ "sigwinch", RCTL_ACTION_SIGWINCH },
152 	{ "siginfo", RCTL_ACTION_SIGINFO },
153 	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
154 	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
155 	{ "sigthr", RCTL_ACTION_SIGTHR },
156 	{ "deny", RCTL_ACTION_DENY },
157 	{ "log", RCTL_ACTION_LOG },
158 	{ "devctl", RCTL_ACTION_DEVCTL },
159 	{ NULL, -1 }};
160 
161 static void rctl_init(void);
162 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
163 
164 static uma_zone_t rctl_rule_link_zone;
165 static uma_zone_t rctl_rule_zone;
166 static struct rwlock rctl_lock;
167 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
168 
169 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
170 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
171 
172 MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
173 
174 static const char *
175 rctl_subject_type_name(int subject)
176 {
177 	int i;
178 
179 	for (i = 0; subjectnames[i].d_name != NULL; i++) {
180 		if (subjectnames[i].d_value == subject)
181 			return (subjectnames[i].d_name);
182 	}
183 
184 	panic("rctl_subject_type_name: unknown subject type %d", subject);
185 }
186 
187 static const char *
188 rctl_action_name(int action)
189 {
190 	int i;
191 
192 	for (i = 0; actionnames[i].d_name != NULL; i++) {
193 		if (actionnames[i].d_value == action)
194 			return (actionnames[i].d_name);
195 	}
196 
197 	panic("rctl_action_name: unknown action %d", action);
198 }
199 
200 const char *
201 rctl_resource_name(int resource)
202 {
203 	int i;
204 
205 	for (i = 0; resourcenames[i].d_name != NULL; i++) {
206 		if (resourcenames[i].d_value == resource)
207 			return (resourcenames[i].d_name);
208 	}
209 
210 	panic("rctl_resource_name: unknown resource %d", resource);
211 }
212 
213 /*
214  * Return the amount of resource that can be allocated by 'p' before
215  * hitting 'rule'.
216  */
217 static int64_t
218 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
219 {
220 	int resource;
221 	int64_t available = INT64_MAX;
222 	struct ucred *cred = p->p_ucred;
223 
224 	rw_assert(&rctl_lock, RA_LOCKED);
225 
226 	resource = rule->rr_resource;
227 	switch (rule->rr_per) {
228 	case RCTL_SUBJECT_TYPE_PROCESS:
229 		available = rule->rr_amount -
230 		    p->p_racct->r_resources[resource];
231 		break;
232 	case RCTL_SUBJECT_TYPE_USER:
233 		available = rule->rr_amount -
234 		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
235 		break;
236 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
237 		available = rule->rr_amount -
238 		    cred->cr_loginclass->lc_racct->r_resources[resource];
239 		break;
240 	case RCTL_SUBJECT_TYPE_JAIL:
241 		available = rule->rr_amount -
242 		    cred->cr_prison->pr_prison_racct->prr_racct->
243 		        r_resources[resource];
244 		break;
245 	default:
246 		panic("rctl_compute_available: unknown per %d",
247 		    rule->rr_per);
248 	}
249 
250 	return (available);
251 }
252 
253 /*
254  * Return non-zero if allocating 'amount' by proc 'p' would exceed
255  * resource limit specified by 'rule'.
256  */
257 static int
258 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
259     int64_t amount)
260 {
261 	int64_t available;
262 
263 	rw_assert(&rctl_lock, RA_LOCKED);
264 
265 	available = rctl_available_resource(p, rule);
266 	if (available >= amount)
267 		return (0);
268 
269 	return (1);
270 }
271 
272 /*
273  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
274  * to what it keeps allocated now.  Returns non-zero if the allocation should
275  * be denied, 0 otherwise.
276  */
277 int
278 rctl_enforce(struct proc *p, int resource, uint64_t amount)
279 {
280 	struct rctl_rule *rule;
281 	struct rctl_rule_link *link;
282 	struct sbuf sb;
283 	int should_deny = 0;
284 	char *buf;
285 	static int curtime = 0;
286 	static struct timeval lasttime;
287 
288 	rw_rlock(&rctl_lock);
289 
290 	/*
291 	 * There may be more than one matching rule; go through all of them.
292 	 * Denial should be done last, after logging and sending signals.
293 	 */
294 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
295 		rule = link->rrl_rule;
296 		if (rule->rr_resource != resource)
297 			continue;
298 		if (!rctl_would_exceed(p, rule, amount)) {
299 			link->rrl_exceeded = 0;
300 			continue;
301 		}
302 
303 		switch (rule->rr_action) {
304 		case RCTL_ACTION_DENY:
305 			should_deny = 1;
306 			continue;
307 		case RCTL_ACTION_LOG:
308 			/*
309 			 * If rrl_exceeded != 0, it means we've already
310 			 * logged a warning for this process.
311 			 */
312 			if (link->rrl_exceeded != 0)
313 				continue;
314 
315 			if (!ppsratecheck(&lasttime, &curtime, 10))
316 				continue;
317 
318 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
319 			if (buf == NULL) {
320 				printf("rctl_enforce: out of memory\n");
321 				continue;
322 			}
323 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
324 			rctl_rule_to_sbuf(&sb, rule);
325 			sbuf_finish(&sb);
326 			printf("rctl: rule \"%s\" matched by pid %d "
327 			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
328 			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
329 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
330 			sbuf_delete(&sb);
331 			free(buf, M_RCTL);
332 			link->rrl_exceeded = 1;
333 			continue;
334 		case RCTL_ACTION_DEVCTL:
335 			if (link->rrl_exceeded != 0)
336 				continue;
337 
338 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
339 			if (buf == NULL) {
340 				printf("rctl_enforce: out of memory\n");
341 				continue;
342 			}
343 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
344 			sbuf_printf(&sb, "rule=");
345 			rctl_rule_to_sbuf(&sb, rule);
346 			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
347 			    p->p_pid, p->p_ucred->cr_ruid,
348 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
349 			sbuf_finish(&sb);
350 			devctl_notify_f("RCTL", "rule", "matched",
351 			    sbuf_data(&sb), M_NOWAIT);
352 			sbuf_delete(&sb);
353 			free(buf, M_RCTL);
354 			link->rrl_exceeded = 1;
355 			continue;
356 		default:
357 			if (link->rrl_exceeded != 0)
358 				continue;
359 
360 			KASSERT(rule->rr_action > 0 &&
361 			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
362 			    ("rctl_enforce: unknown action %d",
363 			     rule->rr_action));
364 
365 			/*
366 			 * We're using the fact that RCTL_ACTION_SIG* values
367 			 * are equal to their counterparts from sys/signal.h.
368 			 */
369 			psignal(p, rule->rr_action);
370 			link->rrl_exceeded = 1;
371 			continue;
372 		}
373 	}
374 
375 	rw_runlock(&rctl_lock);
376 
377 	if (should_deny) {
378 		/*
379 		 * Return fake error code; the caller should change it
380 		 * into one proper for the situation - EFSIZ, ENOMEM etc.
381 		 */
382 		return (EDOOFUS);
383 	}
384 
385 	return (0);
386 }
387 
388 uint64_t
389 rctl_get_limit(struct proc *p, int resource)
390 {
391 	struct rctl_rule *rule;
392 	struct rctl_rule_link *link;
393 	uint64_t amount = UINT64_MAX;
394 
395 	rw_rlock(&rctl_lock);
396 
397 	/*
398 	 * There may be more than one matching rule; go through all of them.
399 	 * Denial should be done last, after logging and sending signals.
400 	 */
401 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
402 		rule = link->rrl_rule;
403 		if (rule->rr_resource != resource)
404 			continue;
405 		if (rule->rr_action != RCTL_ACTION_DENY)
406 			continue;
407 		if (rule->rr_amount < amount)
408 			amount = rule->rr_amount;
409 	}
410 
411 	rw_runlock(&rctl_lock);
412 
413 	return (amount);
414 }
415 
416 uint64_t
417 rctl_get_available(struct proc *p, int resource)
418 {
419 	struct rctl_rule *rule;
420 	struct rctl_rule_link *link;
421 	int64_t available, minavailable, allocated;
422 
423 	minavailable = INT64_MAX;
424 
425 	rw_rlock(&rctl_lock);
426 
427 	/*
428 	 * There may be more than one matching rule; go through all of them.
429 	 * Denial should be done last, after logging and sending signals.
430 	 */
431 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
432 		rule = link->rrl_rule;
433 		if (rule->rr_resource != resource)
434 			continue;
435 		if (rule->rr_action != RCTL_ACTION_DENY)
436 			continue;
437 		available = rctl_available_resource(p, rule);
438 		if (available < minavailable)
439 			minavailable = available;
440 	}
441 
442 	rw_runlock(&rctl_lock);
443 
444 	/*
445 	 * XXX: Think about this _hard_.
446 	 */
447 	allocated = p->p_racct->r_resources[resource];
448 	if (minavailable < INT64_MAX - allocated)
449 		minavailable += allocated;
450 	if (minavailable < 0)
451 		minavailable = 0;
452 	return (minavailable);
453 }
454 
455 static int
456 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
457 {
458 
459 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
460 		if (rule->rr_subject_type != filter->rr_subject_type)
461 			return (0);
462 
463 		switch (filter->rr_subject_type) {
464 		case RCTL_SUBJECT_TYPE_PROCESS:
465 			if (filter->rr_subject.rs_proc != NULL &&
466 			    rule->rr_subject.rs_proc !=
467 			    filter->rr_subject.rs_proc)
468 				return (0);
469 			break;
470 		case RCTL_SUBJECT_TYPE_USER:
471 			if (filter->rr_subject.rs_uip != NULL &&
472 			    rule->rr_subject.rs_uip !=
473 			    filter->rr_subject.rs_uip)
474 				return (0);
475 			break;
476 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
477 			if (filter->rr_subject.rs_loginclass != NULL &&
478 			    rule->rr_subject.rs_loginclass !=
479 			    filter->rr_subject.rs_loginclass)
480 				return (0);
481 			break;
482 		case RCTL_SUBJECT_TYPE_JAIL:
483 			if (filter->rr_subject.rs_prison_racct != NULL &&
484 			    rule->rr_subject.rs_prison_racct !=
485 			    filter->rr_subject.rs_prison_racct)
486 				return (0);
487 			break;
488 		default:
489 			panic("rctl_rule_matches: unknown subject type %d",
490 			    filter->rr_subject_type);
491 		}
492 	}
493 
494 	if (filter->rr_resource != RACCT_UNDEFINED) {
495 		if (rule->rr_resource != filter->rr_resource)
496 			return (0);
497 	}
498 
499 	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
500 		if (rule->rr_action != filter->rr_action)
501 			return (0);
502 	}
503 
504 	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
505 		if (rule->rr_amount != filter->rr_amount)
506 			return (0);
507 	}
508 
509 	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
510 		if (rule->rr_per != filter->rr_per)
511 			return (0);
512 	}
513 
514 	return (1);
515 }
516 
517 static int
518 str2value(const char *str, int *value, struct dict *table)
519 {
520 	int i;
521 
522 	if (value == NULL)
523 		return (EINVAL);
524 
525 	for (i = 0; table[i].d_name != NULL; i++) {
526 		if (strcasecmp(table[i].d_name, str) == 0) {
527 			*value =  table[i].d_value;
528 			return (0);
529 		}
530 	}
531 
532 	return (EINVAL);
533 }
534 
535 static int
536 str2id(const char *str, id_t *value)
537 {
538 	char *end;
539 
540 	if (str == NULL)
541 		return (EINVAL);
542 
543 	*value = strtoul(str, &end, 10);
544 	if ((size_t)(end - str) != strlen(str))
545 		return (EINVAL);
546 
547 	return (0);
548 }
549 
550 static int
551 str2int64(const char *str, int64_t *value)
552 {
553 	char *end;
554 
555 	if (str == NULL)
556 		return (EINVAL);
557 
558 	*value = strtoul(str, &end, 10);
559 	if ((size_t)(end - str) != strlen(str))
560 		return (EINVAL);
561 
562 	return (0);
563 }
564 
565 /*
566  * Connect the rule to the racct, increasing refcount for the rule.
567  */
568 static void
569 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
570 {
571 	struct rctl_rule_link *link;
572 
573 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
574 
575 	rctl_rule_acquire(rule);
576 	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
577 	link->rrl_rule = rule;
578 	link->rrl_exceeded = 0;
579 
580 	rw_wlock(&rctl_lock);
581 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
582 	rw_wunlock(&rctl_lock);
583 }
584 
585 static int
586 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
587 {
588 	struct rctl_rule_link *link;
589 
590 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
591 	rw_assert(&rctl_lock, RA_WLOCKED);
592 
593 	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
594 	if (link == NULL)
595 		return (ENOMEM);
596 	rctl_rule_acquire(rule);
597 	link->rrl_rule = rule;
598 	link->rrl_exceeded = 0;
599 
600 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
601 	return (0);
602 }
603 
604 /*
605  * Remove limits for a rules matching the filter and release
606  * the refcounts for the rules, possibly freeing them.  Returns
607  * the number of limit structures removed.
608  */
609 static int
610 rctl_racct_remove_rules(struct racct *racct,
611     const struct rctl_rule *filter)
612 {
613 	int removed = 0;
614 	struct rctl_rule_link *link, *linktmp;
615 
616 	rw_assert(&rctl_lock, RA_WLOCKED);
617 
618 	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
619 		if (!rctl_rule_matches(link->rrl_rule, filter))
620 			continue;
621 
622 		LIST_REMOVE(link, rrl_next);
623 		rctl_rule_release(link->rrl_rule);
624 		uma_zfree(rctl_rule_link_zone, link);
625 		removed++;
626 	}
627 	return (removed);
628 }
629 
630 static void
631 rctl_rule_acquire_subject(struct rctl_rule *rule)
632 {
633 
634 	switch (rule->rr_subject_type) {
635 	case RCTL_SUBJECT_TYPE_UNDEFINED:
636 	case RCTL_SUBJECT_TYPE_PROCESS:
637 		break;
638 	case RCTL_SUBJECT_TYPE_JAIL:
639 		if (rule->rr_subject.rs_prison_racct != NULL)
640 			prison_racct_hold(rule->rr_subject.rs_prison_racct);
641 		break;
642 	case RCTL_SUBJECT_TYPE_USER:
643 		if (rule->rr_subject.rs_uip != NULL)
644 			uihold(rule->rr_subject.rs_uip);
645 		break;
646 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
647 		if (rule->rr_subject.rs_loginclass != NULL)
648 			loginclass_hold(rule->rr_subject.rs_loginclass);
649 		break;
650 	default:
651 		panic("rctl_rule_acquire_subject: unknown subject type %d",
652 		    rule->rr_subject_type);
653 	}
654 }
655 
656 static void
657 rctl_rule_release_subject(struct rctl_rule *rule)
658 {
659 
660 	switch (rule->rr_subject_type) {
661 	case RCTL_SUBJECT_TYPE_UNDEFINED:
662 	case RCTL_SUBJECT_TYPE_PROCESS:
663 		break;
664 	case RCTL_SUBJECT_TYPE_JAIL:
665 		if (rule->rr_subject.rs_prison_racct != NULL)
666 			prison_racct_free(rule->rr_subject.rs_prison_racct);
667 		break;
668 	case RCTL_SUBJECT_TYPE_USER:
669 		if (rule->rr_subject.rs_uip != NULL)
670 			uifree(rule->rr_subject.rs_uip);
671 		break;
672 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
673 		if (rule->rr_subject.rs_loginclass != NULL)
674 			loginclass_free(rule->rr_subject.rs_loginclass);
675 		break;
676 	default:
677 		panic("rctl_rule_release_subject: unknown subject type %d",
678 		    rule->rr_subject_type);
679 	}
680 }
681 
682 struct rctl_rule *
683 rctl_rule_alloc(int flags)
684 {
685 	struct rctl_rule *rule;
686 
687 	rule = uma_zalloc(rctl_rule_zone, flags);
688 	if (rule == NULL)
689 		return (NULL);
690 	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
691 	rule->rr_subject.rs_proc = NULL;
692 	rule->rr_subject.rs_uip = NULL;
693 	rule->rr_subject.rs_loginclass = NULL;
694 	rule->rr_subject.rs_prison_racct = NULL;
695 	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
696 	rule->rr_resource = RACCT_UNDEFINED;
697 	rule->rr_action = RCTL_ACTION_UNDEFINED;
698 	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
699 	refcount_init(&rule->rr_refcount, 1);
700 
701 	return (rule);
702 }
703 
704 struct rctl_rule *
705 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
706 {
707 	struct rctl_rule *copy;
708 
709 	copy = uma_zalloc(rctl_rule_zone, flags);
710 	if (copy == NULL)
711 		return (NULL);
712 	copy->rr_subject_type = rule->rr_subject_type;
713 	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
714 	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
715 	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
716 	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
717 	copy->rr_per = rule->rr_per;
718 	copy->rr_resource = rule->rr_resource;
719 	copy->rr_action = rule->rr_action;
720 	copy->rr_amount = rule->rr_amount;
721 	refcount_init(&copy->rr_refcount, 1);
722 	rctl_rule_acquire_subject(copy);
723 
724 	return (copy);
725 }
726 
727 void
728 rctl_rule_acquire(struct rctl_rule *rule)
729 {
730 
731 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
732 
733 	refcount_acquire(&rule->rr_refcount);
734 }
735 
736 static void
737 rctl_rule_free(void *context, int pending)
738 {
739 	struct rctl_rule *rule;
740 
741 	rule = (struct rctl_rule *)context;
742 
743 	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
744 
745 	/*
746 	 * We don't need locking here; rule is guaranteed to be inaccessible.
747 	 */
748 
749 	rctl_rule_release_subject(rule);
750 	uma_zfree(rctl_rule_zone, rule);
751 }
752 
753 void
754 rctl_rule_release(struct rctl_rule *rule)
755 {
756 
757 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
758 
759 	if (refcount_release(&rule->rr_refcount)) {
760 		/*
761 		 * rctl_rule_release() is often called when iterating
762 		 * over all the uidinfo structures in the system,
763 		 * holding uihashtbl_lock.  Since rctl_rule_free()
764 		 * might end up calling uifree(), this would lead
765 		 * to lock recursion.  Use taskqueue to avoid this.
766 		 */
767 		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
768 		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
769 	}
770 }
771 
772 static int
773 rctl_rule_fully_specified(const struct rctl_rule *rule)
774 {
775 
776 	switch (rule->rr_subject_type) {
777 	case RCTL_SUBJECT_TYPE_UNDEFINED:
778 		return (0);
779 	case RCTL_SUBJECT_TYPE_PROCESS:
780 		if (rule->rr_subject.rs_proc == NULL)
781 			return (0);
782 		break;
783 	case RCTL_SUBJECT_TYPE_USER:
784 		if (rule->rr_subject.rs_uip == NULL)
785 			return (0);
786 		break;
787 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
788 		if (rule->rr_subject.rs_loginclass == NULL)
789 			return (0);
790 		break;
791 	case RCTL_SUBJECT_TYPE_JAIL:
792 		if (rule->rr_subject.rs_prison_racct == NULL)
793 			return (0);
794 		break;
795 	default:
796 		panic("rctl_rule_fully_specified: unknown subject type %d",
797 		    rule->rr_subject_type);
798 	}
799 	if (rule->rr_resource == RACCT_UNDEFINED)
800 		return (0);
801 	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
802 		return (0);
803 	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
804 		return (0);
805 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
806 		return (0);
807 
808 	return (1);
809 }
810 
811 static int
812 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
813 {
814 	int error = 0;
815 	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
816 	     *amountstr, *perstr;
817 	struct rctl_rule *rule;
818 	id_t id;
819 
820 	rule = rctl_rule_alloc(M_WAITOK);
821 
822 	subjectstr = strsep(&rulestr, ":");
823 	subject_idstr = strsep(&rulestr, ":");
824 	resourcestr = strsep(&rulestr, ":");
825 	actionstr = strsep(&rulestr, "=/");
826 	amountstr = strsep(&rulestr, "/");
827 	perstr = rulestr;
828 
829 	if (subjectstr == NULL || subjectstr[0] == '\0')
830 		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
831 	else {
832 		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
833 		if (error != 0)
834 			goto out;
835 	}
836 
837 	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
838 		rule->rr_subject.rs_proc = NULL;
839 		rule->rr_subject.rs_uip = NULL;
840 		rule->rr_subject.rs_loginclass = NULL;
841 		rule->rr_subject.rs_prison_racct = NULL;
842 	} else {
843 		switch (rule->rr_subject_type) {
844 		case RCTL_SUBJECT_TYPE_UNDEFINED:
845 			error = EINVAL;
846 			goto out;
847 		case RCTL_SUBJECT_TYPE_PROCESS:
848 			error = str2id(subject_idstr, &id);
849 			if (error != 0)
850 				goto out;
851 			sx_assert(&allproc_lock, SA_LOCKED);
852 			rule->rr_subject.rs_proc = pfind(id);
853 			if (rule->rr_subject.rs_proc == NULL) {
854 				error = ESRCH;
855 				goto out;
856 			}
857 			PROC_UNLOCK(rule->rr_subject.rs_proc);
858 			break;
859 		case RCTL_SUBJECT_TYPE_USER:
860 			error = str2id(subject_idstr, &id);
861 			if (error != 0)
862 				goto out;
863 			rule->rr_subject.rs_uip = uifind(id);
864 			break;
865 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
866 			rule->rr_subject.rs_loginclass =
867 			    loginclass_find(subject_idstr);
868 			if (rule->rr_subject.rs_loginclass == NULL) {
869 				error = ENAMETOOLONG;
870 				goto out;
871 			}
872 			break;
873 		case RCTL_SUBJECT_TYPE_JAIL:
874 			rule->rr_subject.rs_prison_racct =
875 			    prison_racct_find(subject_idstr);
876 			if (rule->rr_subject.rs_prison_racct == NULL) {
877 				error = ENAMETOOLONG;
878 				goto out;
879 			}
880 			break;
881                default:
882                        panic("rctl_string_to_rule: unknown subject type %d",
883                            rule->rr_subject_type);
884                }
885 	}
886 
887 	if (resourcestr == NULL || resourcestr[0] == '\0')
888 		rule->rr_resource = RACCT_UNDEFINED;
889 	else {
890 		error = str2value(resourcestr, &rule->rr_resource,
891 		    resourcenames);
892 		if (error != 0)
893 			goto out;
894 	}
895 
896 	if (actionstr == NULL || actionstr[0] == '\0')
897 		rule->rr_action = RCTL_ACTION_UNDEFINED;
898 	else {
899 		error = str2value(actionstr, &rule->rr_action, actionnames);
900 		if (error != 0)
901 			goto out;
902 	}
903 
904 	if (amountstr == NULL || amountstr[0] == '\0')
905 		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
906 	else {
907 		error = str2int64(amountstr, &rule->rr_amount);
908 		if (error != 0)
909 			goto out;
910 		if (racct_is_in_thousands(rule->rr_resource))
911 			rule->rr_amount *= 1000;
912 	}
913 
914 	if (perstr == NULL || perstr[0] == '\0')
915 		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
916 	else {
917 		error = str2value(perstr, &rule->rr_per, subjectnames);
918 		if (error != 0)
919 			goto out;
920 	}
921 
922 out:
923 	if (error == 0)
924 		*rulep = rule;
925 	else
926 		rctl_rule_release(rule);
927 
928 	return (error);
929 }
930 
931 /*
932  * Link a rule with all the subjects it applies to.
933  */
934 int
935 rctl_rule_add(struct rctl_rule *rule)
936 {
937 	struct proc *p;
938 	struct ucred *cred;
939 	struct uidinfo *uip;
940 	struct prison *pr;
941 	struct prison_racct *prr;
942 	struct loginclass *lc;
943 	struct rctl_rule *rule2;
944 	int match;
945 
946 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
947 
948 	/*
949 	 * Some rules just don't make sense.  Note that the one below
950 	 * cannot be rewritten using racct_is_deniable(); the RACCT_PCTCPU,
951 	 * for example, is not deniable in the racct sense, but the
952 	 * limit is enforced in a different way, so "deny" rules for %CPU
953 	 * do make sense.
954 	 */
955 	if (rule->rr_action == RCTL_ACTION_DENY &&
956 	    (rule->rr_resource == RACCT_CPU ||
957 	    rule->rr_resource == RACCT_WALLCLOCK))
958 		return (EOPNOTSUPP);
959 
960 	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
961 	    racct_is_sloppy(rule->rr_resource))
962 		return (EOPNOTSUPP);
963 
964 	/*
965 	 * Make sure there are no duplicated rules.  Also, for the "deny"
966 	 * rules, remove ones differing only by "amount".
967 	 */
968 	if (rule->rr_action == RCTL_ACTION_DENY) {
969 		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
970 		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
971 		rctl_rule_remove(rule2);
972 		rctl_rule_release(rule2);
973 	} else
974 		rctl_rule_remove(rule);
975 
976 	switch (rule->rr_subject_type) {
977 	case RCTL_SUBJECT_TYPE_PROCESS:
978 		p = rule->rr_subject.rs_proc;
979 		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
980 		/*
981 		 * No resource limits for system processes.
982 		 */
983 		if (p->p_flag & P_SYSTEM)
984 			return (EPERM);
985 
986 		rctl_racct_add_rule(p->p_racct, rule);
987 		/*
988 		 * In case of per-process rule, we don't have anything more
989 		 * to do.
990 		 */
991 		return (0);
992 
993 	case RCTL_SUBJECT_TYPE_USER:
994 		uip = rule->rr_subject.rs_uip;
995 		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
996 		rctl_racct_add_rule(uip->ui_racct, rule);
997 		break;
998 
999 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1000 		lc = rule->rr_subject.rs_loginclass;
1001 		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1002 		rctl_racct_add_rule(lc->lc_racct, rule);
1003 		break;
1004 
1005 	case RCTL_SUBJECT_TYPE_JAIL:
1006 		prr = rule->rr_subject.rs_prison_racct;
1007 		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1008 		rctl_racct_add_rule(prr->prr_racct, rule);
1009 		break;
1010 
1011 	default:
1012 		panic("rctl_rule_add: unknown subject type %d",
1013 		    rule->rr_subject_type);
1014 	}
1015 
1016 	/*
1017 	 * Now go through all the processes and add the new rule to the ones
1018 	 * it applies to.
1019 	 */
1020 	sx_assert(&allproc_lock, SA_LOCKED);
1021 	FOREACH_PROC_IN_SYSTEM(p) {
1022 		if (p->p_flag & P_SYSTEM)
1023 			continue;
1024 		cred = p->p_ucred;
1025 		switch (rule->rr_subject_type) {
1026 		case RCTL_SUBJECT_TYPE_USER:
1027 			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1028 			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1029 				break;
1030 			continue;
1031 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1032 			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1033 				break;
1034 			continue;
1035 		case RCTL_SUBJECT_TYPE_JAIL:
1036 			match = 0;
1037 			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1038 				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1039 					match = 1;
1040 					break;
1041 				}
1042 			}
1043 			if (match)
1044 				break;
1045 			continue;
1046 		default:
1047 			panic("rctl_rule_add: unknown subject type %d",
1048 			    rule->rr_subject_type);
1049 		}
1050 
1051 		rctl_racct_add_rule(p->p_racct, rule);
1052 	}
1053 
1054 	return (0);
1055 }
1056 
1057 static void
1058 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1059 {
1060 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1061 	int found = 0;
1062 
1063 	rw_wlock(&rctl_lock);
1064 	found += rctl_racct_remove_rules(racct, filter);
1065 	rw_wunlock(&rctl_lock);
1066 
1067 	*((int *)arg3) += found;
1068 }
1069 
1070 /*
1071  * Remove all rules that match the filter.
1072  */
1073 int
1074 rctl_rule_remove(struct rctl_rule *filter)
1075 {
1076 	int found = 0;
1077 	struct proc *p;
1078 
1079 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1080 	    filter->rr_subject.rs_proc != NULL) {
1081 		p = filter->rr_subject.rs_proc;
1082 		rw_wlock(&rctl_lock);
1083 		found = rctl_racct_remove_rules(p->p_racct, filter);
1084 		rw_wunlock(&rctl_lock);
1085 		if (found)
1086 			return (0);
1087 		return (ESRCH);
1088 	}
1089 
1090 	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1091 	    (void *)&found);
1092 	ui_racct_foreach(rctl_rule_remove_callback, filter,
1093 	    (void *)&found);
1094 	prison_racct_foreach(rctl_rule_remove_callback, filter,
1095 	    (void *)&found);
1096 
1097 	sx_assert(&allproc_lock, SA_LOCKED);
1098 	rw_wlock(&rctl_lock);
1099 	FOREACH_PROC_IN_SYSTEM(p) {
1100 		found += rctl_racct_remove_rules(p->p_racct, filter);
1101 	}
1102 	rw_wunlock(&rctl_lock);
1103 
1104 	if (found)
1105 		return (0);
1106 	return (ESRCH);
1107 }
1108 
1109 /*
1110  * Appends a rule to the sbuf.
1111  */
1112 static void
1113 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1114 {
1115 	int64_t amount;
1116 
1117 	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1118 
1119 	switch (rule->rr_subject_type) {
1120 	case RCTL_SUBJECT_TYPE_PROCESS:
1121 		if (rule->rr_subject.rs_proc == NULL)
1122 			sbuf_printf(sb, ":");
1123 		else
1124 			sbuf_printf(sb, "%d:",
1125 			    rule->rr_subject.rs_proc->p_pid);
1126 		break;
1127 	case RCTL_SUBJECT_TYPE_USER:
1128 		if (rule->rr_subject.rs_uip == NULL)
1129 			sbuf_printf(sb, ":");
1130 		else
1131 			sbuf_printf(sb, "%d:",
1132 			    rule->rr_subject.rs_uip->ui_uid);
1133 		break;
1134 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1135 		if (rule->rr_subject.rs_loginclass == NULL)
1136 			sbuf_printf(sb, ":");
1137 		else
1138 			sbuf_printf(sb, "%s:",
1139 			    rule->rr_subject.rs_loginclass->lc_name);
1140 		break;
1141 	case RCTL_SUBJECT_TYPE_JAIL:
1142 		if (rule->rr_subject.rs_prison_racct == NULL)
1143 			sbuf_printf(sb, ":");
1144 		else
1145 			sbuf_printf(sb, "%s:",
1146 			    rule->rr_subject.rs_prison_racct->prr_name);
1147 		break;
1148 	default:
1149 		panic("rctl_rule_to_sbuf: unknown subject type %d",
1150 		    rule->rr_subject_type);
1151 	}
1152 
1153 	amount = rule->rr_amount;
1154 	if (amount != RCTL_AMOUNT_UNDEFINED &&
1155 	    racct_is_in_thousands(rule->rr_resource))
1156 		amount /= 1000;
1157 
1158 	sbuf_printf(sb, "%s:%s=%jd",
1159 	    rctl_resource_name(rule->rr_resource),
1160 	    rctl_action_name(rule->rr_action),
1161 	    amount);
1162 
1163 	if (rule->rr_per != rule->rr_subject_type)
1164 		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1165 }
1166 
1167 /*
1168  * Routine used by RCTL syscalls to read in input string.
1169  */
1170 static int
1171 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1172 {
1173 	int error;
1174 	char *str;
1175 
1176 	if (inbuflen <= 0)
1177 		return (EINVAL);
1178 
1179 	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1180 	error = copyinstr(inbufp, str, inbuflen, NULL);
1181 	if (error != 0) {
1182 		free(str, M_RCTL);
1183 		return (error);
1184 	}
1185 
1186 	*inputstr = str;
1187 
1188 	return (0);
1189 }
1190 
1191 /*
1192  * Routine used by RCTL syscalls to write out output string.
1193  */
1194 static int
1195 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1196 {
1197 	int error;
1198 
1199 	if (outputsbuf == NULL)
1200 		return (0);
1201 
1202 	sbuf_finish(outputsbuf);
1203 	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1204 		sbuf_delete(outputsbuf);
1205 		return (ERANGE);
1206 	}
1207 	error = copyout(sbuf_data(outputsbuf), outbufp,
1208 	    sbuf_len(outputsbuf) + 1);
1209 	sbuf_delete(outputsbuf);
1210 	return (error);
1211 }
1212 
1213 static struct sbuf *
1214 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1215 {
1216 	int i;
1217 	int64_t amount;
1218 	struct sbuf *sb;
1219 
1220 	sb = sbuf_new_auto();
1221 	for (i = 0; i <= RACCT_MAX; i++) {
1222 		if (sloppy == 0 && racct_is_sloppy(i))
1223 			continue;
1224 		amount = racct->r_resources[i];
1225 		if (racct_is_in_thousands(i))
1226 			amount /= 1000;
1227 		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1228 	}
1229 	sbuf_setpos(sb, sbuf_len(sb) - 1);
1230 	return (sb);
1231 }
1232 
1233 int
1234 rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1235 {
1236 	int error;
1237 	char *inputstr;
1238 	struct rctl_rule *filter;
1239 	struct sbuf *outputsbuf = NULL;
1240 	struct proc *p;
1241 	struct uidinfo *uip;
1242 	struct loginclass *lc;
1243 	struct prison_racct *prr;
1244 
1245 	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1246 	if (error != 0)
1247 		return (error);
1248 
1249 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1250 	if (error != 0)
1251 		return (error);
1252 
1253 	sx_slock(&allproc_lock);
1254 	error = rctl_string_to_rule(inputstr, &filter);
1255 	free(inputstr, M_RCTL);
1256 	if (error != 0) {
1257 		sx_sunlock(&allproc_lock);
1258 		return (error);
1259 	}
1260 
1261 	switch (filter->rr_subject_type) {
1262 	case RCTL_SUBJECT_TYPE_PROCESS:
1263 		p = filter->rr_subject.rs_proc;
1264 		if (p == NULL) {
1265 			error = EINVAL;
1266 			goto out;
1267 		}
1268 		if (p->p_flag & P_SYSTEM) {
1269 			error = EINVAL;
1270 			goto out;
1271 		}
1272 		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1273 		break;
1274 	case RCTL_SUBJECT_TYPE_USER:
1275 		uip = filter->rr_subject.rs_uip;
1276 		if (uip == NULL) {
1277 			error = EINVAL;
1278 			goto out;
1279 		}
1280 		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1281 		break;
1282 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1283 		lc = filter->rr_subject.rs_loginclass;
1284 		if (lc == NULL) {
1285 			error = EINVAL;
1286 			goto out;
1287 		}
1288 		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1289 		break;
1290 	case RCTL_SUBJECT_TYPE_JAIL:
1291 		prr = filter->rr_subject.rs_prison_racct;
1292 		if (prr == NULL) {
1293 			error = EINVAL;
1294 			goto out;
1295 		}
1296 		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1297 		break;
1298 	default:
1299 		error = EINVAL;
1300 	}
1301 out:
1302 	rctl_rule_release(filter);
1303 	sx_sunlock(&allproc_lock);
1304 	if (error != 0)
1305 		return (error);
1306 
1307 	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1308 
1309 	return (error);
1310 }
1311 
1312 static void
1313 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1314 {
1315 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1316 	struct rctl_rule_link *link;
1317 	struct sbuf *sb = (struct sbuf *)arg3;
1318 
1319 	rw_rlock(&rctl_lock);
1320 	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1321 		if (!rctl_rule_matches(link->rrl_rule, filter))
1322 			continue;
1323 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1324 		sbuf_printf(sb, ",");
1325 	}
1326 	rw_runlock(&rctl_lock);
1327 }
1328 
1329 int
1330 rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1331 {
1332 	int error;
1333 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1334 	char *inputstr, *buf;
1335 	struct sbuf *sb;
1336 	struct rctl_rule *filter;
1337 	struct rctl_rule_link *link;
1338 	struct proc *p;
1339 
1340 	error = priv_check(td, PRIV_RCTL_GET_RULES);
1341 	if (error != 0)
1342 		return (error);
1343 
1344 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1345 	if (error != 0)
1346 		return (error);
1347 
1348 	sx_slock(&allproc_lock);
1349 	error = rctl_string_to_rule(inputstr, &filter);
1350 	free(inputstr, M_RCTL);
1351 	if (error != 0) {
1352 		sx_sunlock(&allproc_lock);
1353 		return (error);
1354 	}
1355 
1356 again:
1357 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1358 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1359 	KASSERT(sb != NULL, ("sbuf_new failed"));
1360 
1361 	sx_assert(&allproc_lock, SA_LOCKED);
1362 	FOREACH_PROC_IN_SYSTEM(p) {
1363 		rw_rlock(&rctl_lock);
1364 		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1365 			/*
1366 			 * Non-process rules will be added to the buffer later.
1367 			 * Adding them here would result in duplicated output.
1368 			 */
1369 			if (link->rrl_rule->rr_subject_type !=
1370 			    RCTL_SUBJECT_TYPE_PROCESS)
1371 				continue;
1372 			if (!rctl_rule_matches(link->rrl_rule, filter))
1373 				continue;
1374 			rctl_rule_to_sbuf(sb, link->rrl_rule);
1375 			sbuf_printf(sb, ",");
1376 		}
1377 		rw_runlock(&rctl_lock);
1378 	}
1379 
1380 	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1381 	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1382 	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1383 	if (sbuf_error(sb) == ENOMEM) {
1384 		sbuf_delete(sb);
1385 		free(buf, M_RCTL);
1386 		bufsize *= 4;
1387 		goto again;
1388 	}
1389 
1390 	/*
1391 	 * Remove trailing ",".
1392 	 */
1393 	if (sbuf_len(sb) > 0)
1394 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1395 
1396 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1397 
1398 	rctl_rule_release(filter);
1399 	sx_sunlock(&allproc_lock);
1400 	free(buf, M_RCTL);
1401 	return (error);
1402 }
1403 
1404 int
1405 rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1406 {
1407 	int error;
1408 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1409 	char *inputstr, *buf;
1410 	struct sbuf *sb;
1411 	struct rctl_rule *filter;
1412 	struct rctl_rule_link *link;
1413 
1414 	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1415 	if (error != 0)
1416 		return (error);
1417 
1418 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1419 	if (error != 0)
1420 		return (error);
1421 
1422 	sx_slock(&allproc_lock);
1423 	error = rctl_string_to_rule(inputstr, &filter);
1424 	free(inputstr, M_RCTL);
1425 	if (error != 0) {
1426 		sx_sunlock(&allproc_lock);
1427 		return (error);
1428 	}
1429 
1430 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1431 		rctl_rule_release(filter);
1432 		sx_sunlock(&allproc_lock);
1433 		return (EINVAL);
1434 	}
1435 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1436 		rctl_rule_release(filter);
1437 		sx_sunlock(&allproc_lock);
1438 		return (EOPNOTSUPP);
1439 	}
1440 	if (filter->rr_subject.rs_proc == NULL) {
1441 		rctl_rule_release(filter);
1442 		sx_sunlock(&allproc_lock);
1443 		return (EINVAL);
1444 	}
1445 
1446 again:
1447 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1448 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1449 	KASSERT(sb != NULL, ("sbuf_new failed"));
1450 
1451 	rw_rlock(&rctl_lock);
1452 	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1453 	    rrl_next) {
1454 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1455 		sbuf_printf(sb, ",");
1456 	}
1457 	rw_runlock(&rctl_lock);
1458 	if (sbuf_error(sb) == ENOMEM) {
1459 		sbuf_delete(sb);
1460 		free(buf, M_RCTL);
1461 		bufsize *= 4;
1462 		goto again;
1463 	}
1464 
1465 	/*
1466 	 * Remove trailing ",".
1467 	 */
1468 	if (sbuf_len(sb) > 0)
1469 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1470 
1471 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1472 	rctl_rule_release(filter);
1473 	sx_sunlock(&allproc_lock);
1474 	free(buf, M_RCTL);
1475 	return (error);
1476 }
1477 
1478 int
1479 rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1480 {
1481 	int error;
1482 	struct rctl_rule *rule;
1483 	char *inputstr;
1484 
1485 	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1486 	if (error != 0)
1487 		return (error);
1488 
1489 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1490 	if (error != 0)
1491 		return (error);
1492 
1493 	sx_slock(&allproc_lock);
1494 	error = rctl_string_to_rule(inputstr, &rule);
1495 	free(inputstr, M_RCTL);
1496 	if (error != 0) {
1497 		sx_sunlock(&allproc_lock);
1498 		return (error);
1499 	}
1500 	/*
1501 	 * The 'per' part of a rule is optional.
1502 	 */
1503 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1504 	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1505 		rule->rr_per = rule->rr_subject_type;
1506 
1507 	if (!rctl_rule_fully_specified(rule)) {
1508 		error = EINVAL;
1509 		goto out;
1510 	}
1511 
1512 	error = rctl_rule_add(rule);
1513 
1514 out:
1515 	rctl_rule_release(rule);
1516 	sx_sunlock(&allproc_lock);
1517 	return (error);
1518 }
1519 
1520 int
1521 rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1522 {
1523 	int error;
1524 	struct rctl_rule *filter;
1525 	char *inputstr;
1526 
1527 	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1528 	if (error != 0)
1529 		return (error);
1530 
1531 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1532 	if (error != 0)
1533 		return (error);
1534 
1535 	sx_slock(&allproc_lock);
1536 	error = rctl_string_to_rule(inputstr, &filter);
1537 	free(inputstr, M_RCTL);
1538 	if (error != 0) {
1539 		sx_sunlock(&allproc_lock);
1540 		return (error);
1541 	}
1542 
1543 	error = rctl_rule_remove(filter);
1544 	rctl_rule_release(filter);
1545 	sx_sunlock(&allproc_lock);
1546 
1547 	return (error);
1548 }
1549 
1550 /*
1551  * Update RCTL rule list after credential change.
1552  */
1553 void
1554 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1555 {
1556 	int rulecnt, i;
1557 	struct rctl_rule_link *link, *newlink;
1558 	struct uidinfo *newuip;
1559 	struct loginclass *newlc;
1560 	struct prison_racct *newprr;
1561 	LIST_HEAD(, rctl_rule_link) newrules;
1562 
1563 	newuip = newcred->cr_ruidinfo;
1564 	newlc = newcred->cr_loginclass;
1565 	newprr = newcred->cr_prison->pr_prison_racct;
1566 
1567 	LIST_INIT(&newrules);
1568 
1569 again:
1570 	/*
1571 	 * First, count the rules that apply to the process with new
1572 	 * credentials.
1573 	 */
1574 	rulecnt = 0;
1575 	rw_rlock(&rctl_lock);
1576 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1577 		if (link->rrl_rule->rr_subject_type ==
1578 		    RCTL_SUBJECT_TYPE_PROCESS)
1579 			rulecnt++;
1580 	}
1581 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1582 		rulecnt++;
1583 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1584 		rulecnt++;
1585 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1586 		rulecnt++;
1587 	rw_runlock(&rctl_lock);
1588 
1589 	/*
1590 	 * Create temporary list.  We've dropped the rctl_lock in order
1591 	 * to use M_WAITOK.
1592 	 */
1593 	for (i = 0; i < rulecnt; i++) {
1594 		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1595 		newlink->rrl_rule = NULL;
1596 		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1597 	}
1598 
1599 	newlink = LIST_FIRST(&newrules);
1600 
1601 	/*
1602 	 * Assign rules to the newly allocated list entries.
1603 	 */
1604 	rw_wlock(&rctl_lock);
1605 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1606 		if (link->rrl_rule->rr_subject_type ==
1607 		    RCTL_SUBJECT_TYPE_PROCESS) {
1608 			if (newlink == NULL)
1609 				goto goaround;
1610 			rctl_rule_acquire(link->rrl_rule);
1611 			newlink->rrl_rule = link->rrl_rule;
1612 			newlink = LIST_NEXT(newlink, rrl_next);
1613 			rulecnt--;
1614 		}
1615 	}
1616 
1617 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1618 		if (newlink == NULL)
1619 			goto goaround;
1620 		rctl_rule_acquire(link->rrl_rule);
1621 		newlink->rrl_rule = link->rrl_rule;
1622 		newlink = LIST_NEXT(newlink, rrl_next);
1623 		rulecnt--;
1624 	}
1625 
1626 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1627 		if (newlink == NULL)
1628 			goto goaround;
1629 		rctl_rule_acquire(link->rrl_rule);
1630 		newlink->rrl_rule = link->rrl_rule;
1631 		newlink = LIST_NEXT(newlink, rrl_next);
1632 		rulecnt--;
1633 	}
1634 
1635 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1636 		if (newlink == NULL)
1637 			goto goaround;
1638 		rctl_rule_acquire(link->rrl_rule);
1639 		newlink->rrl_rule = link->rrl_rule;
1640 		newlink = LIST_NEXT(newlink, rrl_next);
1641 		rulecnt--;
1642 	}
1643 
1644 	if (rulecnt == 0) {
1645 		/*
1646 		 * Free the old rule list.
1647 		 */
1648 		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1649 			link = LIST_FIRST(&p->p_racct->r_rule_links);
1650 			LIST_REMOVE(link, rrl_next);
1651 			rctl_rule_release(link->rrl_rule);
1652 			uma_zfree(rctl_rule_link_zone, link);
1653 		}
1654 
1655 		/*
1656 		 * Replace lists and we're done.
1657 		 *
1658 		 * XXX: Is there any way to switch list heads instead
1659 		 *      of iterating here?
1660 		 */
1661 		while (!LIST_EMPTY(&newrules)) {
1662 			newlink = LIST_FIRST(&newrules);
1663 			LIST_REMOVE(newlink, rrl_next);
1664 			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1665 			    newlink, rrl_next);
1666 		}
1667 
1668 		rw_wunlock(&rctl_lock);
1669 
1670 		return;
1671 	}
1672 
1673 goaround:
1674 	rw_wunlock(&rctl_lock);
1675 
1676 	/*
1677 	 * Rule list changed while we were not holding the rctl_lock.
1678 	 * Free the new list and try again.
1679 	 */
1680 	while (!LIST_EMPTY(&newrules)) {
1681 		newlink = LIST_FIRST(&newrules);
1682 		LIST_REMOVE(newlink, rrl_next);
1683 		if (newlink->rrl_rule != NULL)
1684 			rctl_rule_release(newlink->rrl_rule);
1685 		uma_zfree(rctl_rule_link_zone, newlink);
1686 	}
1687 
1688 	goto again;
1689 }
1690 
1691 /*
1692  * Assign RCTL rules to the newly created process.
1693  */
1694 int
1695 rctl_proc_fork(struct proc *parent, struct proc *child)
1696 {
1697 	int error;
1698 	struct rctl_rule_link *link;
1699 	struct rctl_rule *rule;
1700 
1701 	LIST_INIT(&child->p_racct->r_rule_links);
1702 
1703 	/*
1704 	 * No limits for kernel processes.
1705 	 */
1706 	if (child->p_flag & P_SYSTEM)
1707 		return (0);
1708 
1709 	/*
1710 	 * Nothing to inherit from P_SYSTEM parents.
1711 	 */
1712 	if (parent->p_racct == NULL) {
1713 		KASSERT(parent->p_flag & P_SYSTEM,
1714 		    ("non-system process without racct; p = %p", parent));
1715 		return (0);
1716 	}
1717 
1718 	rw_wlock(&rctl_lock);
1719 
1720 	/*
1721 	 * Go through limits applicable to the parent and assign them
1722 	 * to the child.  Rules with 'process' subject have to be duplicated
1723 	 * in order to make their rr_subject point to the new process.
1724 	 */
1725 	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1726 		if (link->rrl_rule->rr_subject_type ==
1727 		    RCTL_SUBJECT_TYPE_PROCESS) {
1728 			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1729 			if (rule == NULL)
1730 				goto fail;
1731 			KASSERT(rule->rr_subject.rs_proc == parent,
1732 			    ("rule->rr_subject.rs_proc != parent"));
1733 			rule->rr_subject.rs_proc = child;
1734 			error = rctl_racct_add_rule_locked(child->p_racct,
1735 			    rule);
1736 			rctl_rule_release(rule);
1737 			if (error != 0)
1738 				goto fail;
1739 		} else {
1740 			error = rctl_racct_add_rule_locked(child->p_racct,
1741 			    link->rrl_rule);
1742 			if (error != 0)
1743 				goto fail;
1744 		}
1745 	}
1746 
1747 	rw_wunlock(&rctl_lock);
1748 	return (0);
1749 
1750 fail:
1751 	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1752 		link = LIST_FIRST(&child->p_racct->r_rule_links);
1753 		LIST_REMOVE(link, rrl_next);
1754 		rctl_rule_release(link->rrl_rule);
1755 		uma_zfree(rctl_rule_link_zone, link);
1756 	}
1757 	rw_wunlock(&rctl_lock);
1758 	return (EAGAIN);
1759 }
1760 
1761 /*
1762  * Release rules attached to the racct.
1763  */
1764 void
1765 rctl_racct_release(struct racct *racct)
1766 {
1767 	struct rctl_rule_link *link;
1768 
1769 	rw_wlock(&rctl_lock);
1770 	while (!LIST_EMPTY(&racct->r_rule_links)) {
1771 		link = LIST_FIRST(&racct->r_rule_links);
1772 		LIST_REMOVE(link, rrl_next);
1773 		rctl_rule_release(link->rrl_rule);
1774 		uma_zfree(rctl_rule_link_zone, link);
1775 	}
1776 	rw_wunlock(&rctl_lock);
1777 }
1778 
1779 static void
1780 rctl_init(void)
1781 {
1782 
1783 	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1784 	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1785 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1786 	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1787 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1788 }
1789 
1790 #else /* !RCTL */
1791 
1792 int
1793 rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1794 {
1795 
1796 	return (ENOSYS);
1797 }
1798 
1799 int
1800 rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1801 {
1802 
1803 	return (ENOSYS);
1804 }
1805 
1806 int
1807 rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1808 {
1809 
1810 	return (ENOSYS);
1811 }
1812 
1813 int
1814 rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1815 {
1816 
1817 	return (ENOSYS);
1818 }
1819 
1820 int
1821 rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1822 {
1823 
1824 	return (ENOSYS);
1825 }
1826 
1827 #endif /* !RCTL */
1828