xref: /freebsd/sys/kern/kern_rctl.c (revision 9a14aa017b21c292740c00ee098195cd46642730)
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62 
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67 
68 FEATURE(rctl, "Resource Limits");
69 
70 #define	HRF_DEFAULT		0
71 #define	HRF_DONT_INHERIT	1
72 #define	HRF_DONT_ACCUMULATE	2
73 
74 /* Default buffer size for rctl_get_rules(2). */
75 #define	RCTL_DEFAULT_BUFSIZE	4096
76 #define	RCTL_LOG_BUFSIZE	128
77 
78 /*
79  * 'rctl_rule_link' connects a rule with every racct it's related to.
80  * For example, rule 'user:X:openfiles:deny=N/process' is linked
81  * with uidinfo for user X, and to each process of that user.
82  */
83 struct rctl_rule_link {
84 	LIST_ENTRY(rctl_rule_link)	rrl_next;
85 	struct rctl_rule		*rrl_rule;
86 	int				rrl_exceeded;
87 };
88 
89 struct dict {
90 	const char	*d_name;
91 	int		d_value;
92 };
93 
94 static struct dict subjectnames[] = {
95 	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
96 	{ "user", RCTL_SUBJECT_TYPE_USER },
97 	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98 	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
99 	{ NULL, -1 }};
100 
101 static struct dict resourcenames[] = {
102 	{ "cputime", RACCT_CPU },
103 	{ "datasize", RACCT_DATA },
104 	{ "stacksize", RACCT_STACK },
105 	{ "coredumpsize", RACCT_CORE },
106 	{ "memoryuse", RACCT_RSS },
107 	{ "memorylocked", RACCT_MEMLOCK },
108 	{ "maxproc", RACCT_NPROC },
109 	{ "openfiles", RACCT_NOFILE },
110 	{ "vmemoryuse", RACCT_VMEM },
111 	{ "pseudoterminals", RACCT_NPTS },
112 	{ "swapuse", RACCT_SWAP },
113 	{ "nthr", RACCT_NTHR },
114 	{ "msgqqueued", RACCT_MSGQQUEUED },
115 	{ "msgqsize", RACCT_MSGQSIZE },
116 	{ "nmsgq", RACCT_NMSGQ },
117 	{ "nsem", RACCT_NSEM },
118 	{ "nsemop", RACCT_NSEMOP },
119 	{ "nshm", RACCT_NSHM },
120 	{ "shmsize", RACCT_SHMSIZE },
121 	{ "wallclock", RACCT_WALLCLOCK },
122 	{ NULL, -1 }};
123 
124 static struct dict actionnames[] = {
125 	{ "sighup", RCTL_ACTION_SIGHUP },
126 	{ "sigint", RCTL_ACTION_SIGINT },
127 	{ "sigquit", RCTL_ACTION_SIGQUIT },
128 	{ "sigill", RCTL_ACTION_SIGILL },
129 	{ "sigtrap", RCTL_ACTION_SIGTRAP },
130 	{ "sigabrt", RCTL_ACTION_SIGABRT },
131 	{ "sigemt", RCTL_ACTION_SIGEMT },
132 	{ "sigfpe", RCTL_ACTION_SIGFPE },
133 	{ "sigkill", RCTL_ACTION_SIGKILL },
134 	{ "sigbus", RCTL_ACTION_SIGBUS },
135 	{ "sigsegv", RCTL_ACTION_SIGSEGV },
136 	{ "sigsys", RCTL_ACTION_SIGSYS },
137 	{ "sigpipe", RCTL_ACTION_SIGPIPE },
138 	{ "sigalrm", RCTL_ACTION_SIGALRM },
139 	{ "sigterm", RCTL_ACTION_SIGTERM },
140 	{ "sigurg", RCTL_ACTION_SIGURG },
141 	{ "sigstop", RCTL_ACTION_SIGSTOP },
142 	{ "sigtstp", RCTL_ACTION_SIGTSTP },
143 	{ "sigchld", RCTL_ACTION_SIGCHLD },
144 	{ "sigttin", RCTL_ACTION_SIGTTIN },
145 	{ "sigttou", RCTL_ACTION_SIGTTOU },
146 	{ "sigio", RCTL_ACTION_SIGIO },
147 	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
148 	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
149 	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
150 	{ "sigprof", RCTL_ACTION_SIGPROF },
151 	{ "sigwinch", RCTL_ACTION_SIGWINCH },
152 	{ "siginfo", RCTL_ACTION_SIGINFO },
153 	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
154 	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
155 	{ "sigthr", RCTL_ACTION_SIGTHR },
156 	{ "deny", RCTL_ACTION_DENY },
157 	{ "log", RCTL_ACTION_LOG },
158 	{ "devctl", RCTL_ACTION_DEVCTL },
159 	{ NULL, -1 }};
160 
161 static void rctl_init(void);
162 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
163 
164 static uma_zone_t rctl_rule_link_zone;
165 static uma_zone_t rctl_rule_zone;
166 static struct rwlock rctl_lock;
167 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
168 
169 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
170 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
171 
172 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
173 
174 static const char *
175 rctl_subject_type_name(int subject)
176 {
177 	int i;
178 
179 	for (i = 0; subjectnames[i].d_name != NULL; i++) {
180 		if (subjectnames[i].d_value == subject)
181 			return (subjectnames[i].d_name);
182 	}
183 
184 	panic("rctl_subject_type_name: unknown subject type %d", subject);
185 }
186 
187 static const char *
188 rctl_action_name(int action)
189 {
190 	int i;
191 
192 	for (i = 0; actionnames[i].d_name != NULL; i++) {
193 		if (actionnames[i].d_value == action)
194 			return (actionnames[i].d_name);
195 	}
196 
197 	panic("rctl_action_name: unknown action %d", action);
198 }
199 
200 const char *
201 rctl_resource_name(int resource)
202 {
203 	int i;
204 
205 	for (i = 0; resourcenames[i].d_name != NULL; i++) {
206 		if (resourcenames[i].d_value == resource)
207 			return (resourcenames[i].d_name);
208 	}
209 
210 	panic("rctl_resource_name: unknown resource %d", resource);
211 }
212 
213 /*
214  * Return the amount of resource that can be allocated by 'p' before
215  * hitting 'rule'.
216  */
217 static int64_t
218 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
219 {
220 	int resource;
221 	int64_t available = INT64_MAX;
222 	struct ucred *cred = p->p_ucred;
223 
224 	rw_assert(&rctl_lock, RA_LOCKED);
225 
226 	resource = rule->rr_resource;
227 	switch (rule->rr_per) {
228 	case RCTL_SUBJECT_TYPE_PROCESS:
229 		available = rule->rr_amount -
230 		    p->p_racct->r_resources[resource];
231 		break;
232 	case RCTL_SUBJECT_TYPE_USER:
233 		available = rule->rr_amount -
234 		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
235 		break;
236 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
237 		available = rule->rr_amount -
238 		    cred->cr_loginclass->lc_racct->r_resources[resource];
239 		break;
240 	case RCTL_SUBJECT_TYPE_JAIL:
241 		available = rule->rr_amount -
242 		    cred->cr_prison->pr_prison_racct->prr_racct->
243 		        r_resources[resource];
244 		break;
245 	default:
246 		panic("rctl_compute_available: unknown per %d",
247 		    rule->rr_per);
248 	}
249 
250 	return (available);
251 }
252 
253 /*
254  * Return non-zero if allocating 'amount' by proc 'p' would exceed
255  * resource limit specified by 'rule'.
256  */
257 static int
258 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
259     int64_t amount)
260 {
261 	int64_t available;
262 
263 	rw_assert(&rctl_lock, RA_LOCKED);
264 
265 	available = rctl_available_resource(p, rule);
266 	if (available >= amount)
267 		return (0);
268 
269 	return (1);
270 }
271 
272 /*
273  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
274  * to what it keeps allocated now.  Returns non-zero if the allocation should
275  * be denied, 0 otherwise.
276  */
277 int
278 rctl_enforce(struct proc *p, int resource, uint64_t amount)
279 {
280 	struct rctl_rule *rule;
281 	struct rctl_rule_link *link;
282 	struct sbuf sb;
283 	int should_deny = 0;
284 	char *buf;
285 	static int curtime = 0;
286 	static struct timeval lasttime;
287 
288 	rw_rlock(&rctl_lock);
289 
290 	/*
291 	 * There may be more than one matching rule; go through all of them.
292 	 * Denial should be done last, after logging and sending signals.
293 	 */
294 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
295 		rule = link->rrl_rule;
296 		if (rule->rr_resource != resource)
297 			continue;
298 		if (!rctl_would_exceed(p, rule, amount)) {
299 			link->rrl_exceeded = 0;
300 			continue;
301 		}
302 
303 		switch (rule->rr_action) {
304 		case RCTL_ACTION_DENY:
305 			should_deny = 1;
306 			continue;
307 		case RCTL_ACTION_LOG:
308 			/*
309 			 * If rrl_exceeded != 0, it means we've already
310 			 * logged a warning for this process.
311 			 */
312 			if (link->rrl_exceeded != 0)
313 				continue;
314 
315 			/*
316 			 * If the process state is not fully initialized yet,
317 			 * we can't access most of the required fields, e.g.
318 			 * p->p_comm.  This happens when called from fork1().
319 			 * Ignore this rule for now; it will be processed just
320 			 * after fork, when called from racct_proc_fork_done().
321 			 */
322 			if (p->p_state != PRS_NORMAL)
323 				continue;
324 
325 			if (!ppsratecheck(&lasttime, &curtime, 10))
326 				continue;
327 
328 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
329 			if (buf == NULL) {
330 				printf("rctl_enforce: out of memory\n");
331 				continue;
332 			}
333 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
334 			rctl_rule_to_sbuf(&sb, rule);
335 			sbuf_finish(&sb);
336 			printf("rctl: rule \"%s\" matched by pid %d "
337 			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
338 			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
339 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
340 			sbuf_delete(&sb);
341 			free(buf, M_RCTL);
342 			link->rrl_exceeded = 1;
343 			continue;
344 		case RCTL_ACTION_DEVCTL:
345 			if (link->rrl_exceeded != 0)
346 				continue;
347 
348 			if (p->p_state != PRS_NORMAL)
349 				continue;
350 
351 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
352 			if (buf == NULL) {
353 				printf("rctl_enforce: out of memory\n");
354 				continue;
355 			}
356 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
357 			sbuf_printf(&sb, "rule=");
358 			rctl_rule_to_sbuf(&sb, rule);
359 			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
360 			    p->p_pid, p->p_ucred->cr_ruid,
361 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
362 			sbuf_finish(&sb);
363 			devctl_notify_f("RCTL", "rule", "matched",
364 			    sbuf_data(&sb), M_NOWAIT);
365 			sbuf_delete(&sb);
366 			free(buf, M_RCTL);
367 			link->rrl_exceeded = 1;
368 			continue;
369 		default:
370 			if (link->rrl_exceeded != 0)
371 				continue;
372 
373 			if (p->p_state != PRS_NORMAL)
374 				continue;
375 
376 			KASSERT(rule->rr_action > 0 &&
377 			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
378 			    ("rctl_enforce: unknown action %d",
379 			     rule->rr_action));
380 
381 			/*
382 			 * We're using the fact that RCTL_ACTION_SIG* values
383 			 * are equal to their counterparts from sys/signal.h.
384 			 */
385 			kern_psignal(p, rule->rr_action);
386 			link->rrl_exceeded = 1;
387 			continue;
388 		}
389 	}
390 
391 	rw_runlock(&rctl_lock);
392 
393 	if (should_deny) {
394 		/*
395 		 * Return fake error code; the caller should change it
396 		 * into one proper for the situation - EFSIZ, ENOMEM etc.
397 		 */
398 		return (EDOOFUS);
399 	}
400 
401 	return (0);
402 }
403 
404 uint64_t
405 rctl_get_limit(struct proc *p, int resource)
406 {
407 	struct rctl_rule *rule;
408 	struct rctl_rule_link *link;
409 	uint64_t amount = UINT64_MAX;
410 
411 	rw_rlock(&rctl_lock);
412 
413 	/*
414 	 * There may be more than one matching rule; go through all of them.
415 	 * Denial should be done last, after logging and sending signals.
416 	 */
417 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
418 		rule = link->rrl_rule;
419 		if (rule->rr_resource != resource)
420 			continue;
421 		if (rule->rr_action != RCTL_ACTION_DENY)
422 			continue;
423 		if (rule->rr_amount < amount)
424 			amount = rule->rr_amount;
425 	}
426 
427 	rw_runlock(&rctl_lock);
428 
429 	return (amount);
430 }
431 
432 uint64_t
433 rctl_get_available(struct proc *p, int resource)
434 {
435 	struct rctl_rule *rule;
436 	struct rctl_rule_link *link;
437 	int64_t available, minavailable, allocated;
438 
439 	minavailable = INT64_MAX;
440 
441 	rw_rlock(&rctl_lock);
442 
443 	/*
444 	 * There may be more than one matching rule; go through all of them.
445 	 * Denial should be done last, after logging and sending signals.
446 	 */
447 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
448 		rule = link->rrl_rule;
449 		if (rule->rr_resource != resource)
450 			continue;
451 		if (rule->rr_action != RCTL_ACTION_DENY)
452 			continue;
453 		available = rctl_available_resource(p, rule);
454 		if (available < minavailable)
455 			minavailable = available;
456 	}
457 
458 	rw_runlock(&rctl_lock);
459 
460 	/*
461 	 * XXX: Think about this _hard_.
462 	 */
463 	allocated = p->p_racct->r_resources[resource];
464 	if (minavailable < INT64_MAX - allocated)
465 		minavailable += allocated;
466 	if (minavailable < 0)
467 		minavailable = 0;
468 	return (minavailable);
469 }
470 
471 static int
472 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
473 {
474 
475 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
476 		if (rule->rr_subject_type != filter->rr_subject_type)
477 			return (0);
478 
479 		switch (filter->rr_subject_type) {
480 		case RCTL_SUBJECT_TYPE_PROCESS:
481 			if (filter->rr_subject.rs_proc != NULL &&
482 			    rule->rr_subject.rs_proc !=
483 			    filter->rr_subject.rs_proc)
484 				return (0);
485 			break;
486 		case RCTL_SUBJECT_TYPE_USER:
487 			if (filter->rr_subject.rs_uip != NULL &&
488 			    rule->rr_subject.rs_uip !=
489 			    filter->rr_subject.rs_uip)
490 				return (0);
491 			break;
492 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
493 			if (filter->rr_subject.rs_loginclass != NULL &&
494 			    rule->rr_subject.rs_loginclass !=
495 			    filter->rr_subject.rs_loginclass)
496 				return (0);
497 			break;
498 		case RCTL_SUBJECT_TYPE_JAIL:
499 			if (filter->rr_subject.rs_prison_racct != NULL &&
500 			    rule->rr_subject.rs_prison_racct !=
501 			    filter->rr_subject.rs_prison_racct)
502 				return (0);
503 			break;
504 		default:
505 			panic("rctl_rule_matches: unknown subject type %d",
506 			    filter->rr_subject_type);
507 		}
508 	}
509 
510 	if (filter->rr_resource != RACCT_UNDEFINED) {
511 		if (rule->rr_resource != filter->rr_resource)
512 			return (0);
513 	}
514 
515 	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
516 		if (rule->rr_action != filter->rr_action)
517 			return (0);
518 	}
519 
520 	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
521 		if (rule->rr_amount != filter->rr_amount)
522 			return (0);
523 	}
524 
525 	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
526 		if (rule->rr_per != filter->rr_per)
527 			return (0);
528 	}
529 
530 	return (1);
531 }
532 
533 static int
534 str2value(const char *str, int *value, struct dict *table)
535 {
536 	int i;
537 
538 	if (value == NULL)
539 		return (EINVAL);
540 
541 	for (i = 0; table[i].d_name != NULL; i++) {
542 		if (strcasecmp(table[i].d_name, str) == 0) {
543 			*value =  table[i].d_value;
544 			return (0);
545 		}
546 	}
547 
548 	return (EINVAL);
549 }
550 
551 static int
552 str2id(const char *str, id_t *value)
553 {
554 	char *end;
555 
556 	if (str == NULL)
557 		return (EINVAL);
558 
559 	*value = strtoul(str, &end, 10);
560 	if ((size_t)(end - str) != strlen(str))
561 		return (EINVAL);
562 
563 	return (0);
564 }
565 
566 static int
567 str2int64(const char *str, int64_t *value)
568 {
569 	char *end;
570 
571 	if (str == NULL)
572 		return (EINVAL);
573 
574 	*value = strtoul(str, &end, 10);
575 	if ((size_t)(end - str) != strlen(str))
576 		return (EINVAL);
577 
578 	return (0);
579 }
580 
581 /*
582  * Connect the rule to the racct, increasing refcount for the rule.
583  */
584 static void
585 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
586 {
587 	struct rctl_rule_link *link;
588 
589 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
590 
591 	rctl_rule_acquire(rule);
592 	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
593 	link->rrl_rule = rule;
594 	link->rrl_exceeded = 0;
595 
596 	rw_wlock(&rctl_lock);
597 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
598 	rw_wunlock(&rctl_lock);
599 }
600 
601 static int
602 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
603 {
604 	struct rctl_rule_link *link;
605 
606 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
607 	rw_assert(&rctl_lock, RA_WLOCKED);
608 
609 	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
610 	if (link == NULL)
611 		return (ENOMEM);
612 	rctl_rule_acquire(rule);
613 	link->rrl_rule = rule;
614 	link->rrl_exceeded = 0;
615 
616 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
617 	return (0);
618 }
619 
620 /*
621  * Remove limits for a rules matching the filter and release
622  * the refcounts for the rules, possibly freeing them.  Returns
623  * the number of limit structures removed.
624  */
625 static int
626 rctl_racct_remove_rules(struct racct *racct,
627     const struct rctl_rule *filter)
628 {
629 	int removed = 0;
630 	struct rctl_rule_link *link, *linktmp;
631 
632 	rw_assert(&rctl_lock, RA_WLOCKED);
633 
634 	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
635 		if (!rctl_rule_matches(link->rrl_rule, filter))
636 			continue;
637 
638 		LIST_REMOVE(link, rrl_next);
639 		rctl_rule_release(link->rrl_rule);
640 		uma_zfree(rctl_rule_link_zone, link);
641 		removed++;
642 	}
643 	return (removed);
644 }
645 
646 static void
647 rctl_rule_acquire_subject(struct rctl_rule *rule)
648 {
649 
650 	switch (rule->rr_subject_type) {
651 	case RCTL_SUBJECT_TYPE_UNDEFINED:
652 	case RCTL_SUBJECT_TYPE_PROCESS:
653 		break;
654 	case RCTL_SUBJECT_TYPE_JAIL:
655 		if (rule->rr_subject.rs_prison_racct != NULL)
656 			prison_racct_hold(rule->rr_subject.rs_prison_racct);
657 		break;
658 	case RCTL_SUBJECT_TYPE_USER:
659 		if (rule->rr_subject.rs_uip != NULL)
660 			uihold(rule->rr_subject.rs_uip);
661 		break;
662 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
663 		if (rule->rr_subject.rs_loginclass != NULL)
664 			loginclass_hold(rule->rr_subject.rs_loginclass);
665 		break;
666 	default:
667 		panic("rctl_rule_acquire_subject: unknown subject type %d",
668 		    rule->rr_subject_type);
669 	}
670 }
671 
672 static void
673 rctl_rule_release_subject(struct rctl_rule *rule)
674 {
675 
676 	switch (rule->rr_subject_type) {
677 	case RCTL_SUBJECT_TYPE_UNDEFINED:
678 	case RCTL_SUBJECT_TYPE_PROCESS:
679 		break;
680 	case RCTL_SUBJECT_TYPE_JAIL:
681 		if (rule->rr_subject.rs_prison_racct != NULL)
682 			prison_racct_free(rule->rr_subject.rs_prison_racct);
683 		break;
684 	case RCTL_SUBJECT_TYPE_USER:
685 		if (rule->rr_subject.rs_uip != NULL)
686 			uifree(rule->rr_subject.rs_uip);
687 		break;
688 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
689 		if (rule->rr_subject.rs_loginclass != NULL)
690 			loginclass_free(rule->rr_subject.rs_loginclass);
691 		break;
692 	default:
693 		panic("rctl_rule_release_subject: unknown subject type %d",
694 		    rule->rr_subject_type);
695 	}
696 }
697 
698 struct rctl_rule *
699 rctl_rule_alloc(int flags)
700 {
701 	struct rctl_rule *rule;
702 
703 	rule = uma_zalloc(rctl_rule_zone, flags);
704 	if (rule == NULL)
705 		return (NULL);
706 	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
707 	rule->rr_subject.rs_proc = NULL;
708 	rule->rr_subject.rs_uip = NULL;
709 	rule->rr_subject.rs_loginclass = NULL;
710 	rule->rr_subject.rs_prison_racct = NULL;
711 	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
712 	rule->rr_resource = RACCT_UNDEFINED;
713 	rule->rr_action = RCTL_ACTION_UNDEFINED;
714 	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
715 	refcount_init(&rule->rr_refcount, 1);
716 
717 	return (rule);
718 }
719 
720 struct rctl_rule *
721 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
722 {
723 	struct rctl_rule *copy;
724 
725 	copy = uma_zalloc(rctl_rule_zone, flags);
726 	if (copy == NULL)
727 		return (NULL);
728 	copy->rr_subject_type = rule->rr_subject_type;
729 	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
730 	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
731 	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
732 	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
733 	copy->rr_per = rule->rr_per;
734 	copy->rr_resource = rule->rr_resource;
735 	copy->rr_action = rule->rr_action;
736 	copy->rr_amount = rule->rr_amount;
737 	refcount_init(&copy->rr_refcount, 1);
738 	rctl_rule_acquire_subject(copy);
739 
740 	return (copy);
741 }
742 
743 void
744 rctl_rule_acquire(struct rctl_rule *rule)
745 {
746 
747 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
748 
749 	refcount_acquire(&rule->rr_refcount);
750 }
751 
752 static void
753 rctl_rule_free(void *context, int pending)
754 {
755 	struct rctl_rule *rule;
756 
757 	rule = (struct rctl_rule *)context;
758 
759 	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
760 
761 	/*
762 	 * We don't need locking here; rule is guaranteed to be inaccessible.
763 	 */
764 
765 	rctl_rule_release_subject(rule);
766 	uma_zfree(rctl_rule_zone, rule);
767 }
768 
769 void
770 rctl_rule_release(struct rctl_rule *rule)
771 {
772 
773 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
774 
775 	if (refcount_release(&rule->rr_refcount)) {
776 		/*
777 		 * rctl_rule_release() is often called when iterating
778 		 * over all the uidinfo structures in the system,
779 		 * holding uihashtbl_lock.  Since rctl_rule_free()
780 		 * might end up calling uifree(), this would lead
781 		 * to lock recursion.  Use taskqueue to avoid this.
782 		 */
783 		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
784 		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
785 	}
786 }
787 
788 static int
789 rctl_rule_fully_specified(const struct rctl_rule *rule)
790 {
791 
792 	switch (rule->rr_subject_type) {
793 	case RCTL_SUBJECT_TYPE_UNDEFINED:
794 		return (0);
795 	case RCTL_SUBJECT_TYPE_PROCESS:
796 		if (rule->rr_subject.rs_proc == NULL)
797 			return (0);
798 		break;
799 	case RCTL_SUBJECT_TYPE_USER:
800 		if (rule->rr_subject.rs_uip == NULL)
801 			return (0);
802 		break;
803 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
804 		if (rule->rr_subject.rs_loginclass == NULL)
805 			return (0);
806 		break;
807 	case RCTL_SUBJECT_TYPE_JAIL:
808 		if (rule->rr_subject.rs_prison_racct == NULL)
809 			return (0);
810 		break;
811 	default:
812 		panic("rctl_rule_fully_specified: unknown subject type %d",
813 		    rule->rr_subject_type);
814 	}
815 	if (rule->rr_resource == RACCT_UNDEFINED)
816 		return (0);
817 	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
818 		return (0);
819 	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
820 		return (0);
821 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
822 		return (0);
823 
824 	return (1);
825 }
826 
827 static int
828 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
829 {
830 	int error = 0;
831 	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
832 	     *amountstr, *perstr;
833 	struct rctl_rule *rule;
834 	id_t id;
835 
836 	rule = rctl_rule_alloc(M_WAITOK);
837 
838 	subjectstr = strsep(&rulestr, ":");
839 	subject_idstr = strsep(&rulestr, ":");
840 	resourcestr = strsep(&rulestr, ":");
841 	actionstr = strsep(&rulestr, "=/");
842 	amountstr = strsep(&rulestr, "/");
843 	perstr = rulestr;
844 
845 	if (subjectstr == NULL || subjectstr[0] == '\0')
846 		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
847 	else {
848 		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
849 		if (error != 0)
850 			goto out;
851 	}
852 
853 	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
854 		rule->rr_subject.rs_proc = NULL;
855 		rule->rr_subject.rs_uip = NULL;
856 		rule->rr_subject.rs_loginclass = NULL;
857 		rule->rr_subject.rs_prison_racct = NULL;
858 	} else {
859 		switch (rule->rr_subject_type) {
860 		case RCTL_SUBJECT_TYPE_UNDEFINED:
861 			error = EINVAL;
862 			goto out;
863 		case RCTL_SUBJECT_TYPE_PROCESS:
864 			error = str2id(subject_idstr, &id);
865 			if (error != 0)
866 				goto out;
867 			sx_assert(&allproc_lock, SA_LOCKED);
868 			rule->rr_subject.rs_proc = pfind(id);
869 			if (rule->rr_subject.rs_proc == NULL) {
870 				error = ESRCH;
871 				goto out;
872 			}
873 			PROC_UNLOCK(rule->rr_subject.rs_proc);
874 			break;
875 		case RCTL_SUBJECT_TYPE_USER:
876 			error = str2id(subject_idstr, &id);
877 			if (error != 0)
878 				goto out;
879 			rule->rr_subject.rs_uip = uifind(id);
880 			break;
881 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
882 			rule->rr_subject.rs_loginclass =
883 			    loginclass_find(subject_idstr);
884 			if (rule->rr_subject.rs_loginclass == NULL) {
885 				error = ENAMETOOLONG;
886 				goto out;
887 			}
888 			break;
889 		case RCTL_SUBJECT_TYPE_JAIL:
890 			rule->rr_subject.rs_prison_racct =
891 			    prison_racct_find(subject_idstr);
892 			if (rule->rr_subject.rs_prison_racct == NULL) {
893 				error = ENAMETOOLONG;
894 				goto out;
895 			}
896 			break;
897                default:
898                        panic("rctl_string_to_rule: unknown subject type %d",
899                            rule->rr_subject_type);
900                }
901 	}
902 
903 	if (resourcestr == NULL || resourcestr[0] == '\0')
904 		rule->rr_resource = RACCT_UNDEFINED;
905 	else {
906 		error = str2value(resourcestr, &rule->rr_resource,
907 		    resourcenames);
908 		if (error != 0)
909 			goto out;
910 	}
911 
912 	if (actionstr == NULL || actionstr[0] == '\0')
913 		rule->rr_action = RCTL_ACTION_UNDEFINED;
914 	else {
915 		error = str2value(actionstr, &rule->rr_action, actionnames);
916 		if (error != 0)
917 			goto out;
918 	}
919 
920 	if (amountstr == NULL || amountstr[0] == '\0')
921 		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
922 	else {
923 		error = str2int64(amountstr, &rule->rr_amount);
924 		if (error != 0)
925 			goto out;
926 		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
927 			rule->rr_amount *= 1000000;
928 	}
929 
930 	if (perstr == NULL || perstr[0] == '\0')
931 		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
932 	else {
933 		error = str2value(perstr, &rule->rr_per, subjectnames);
934 		if (error != 0)
935 			goto out;
936 	}
937 
938 out:
939 	if (error == 0)
940 		*rulep = rule;
941 	else
942 		rctl_rule_release(rule);
943 
944 	return (error);
945 }
946 
947 /*
948  * Link a rule with all the subjects it applies to.
949  */
950 int
951 rctl_rule_add(struct rctl_rule *rule)
952 {
953 	struct proc *p;
954 	struct ucred *cred;
955 	struct uidinfo *uip;
956 	struct prison *pr;
957 	struct prison_racct *prr;
958 	struct loginclass *lc;
959 	struct rctl_rule *rule2;
960 	int match;
961 
962 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
963 
964 	/*
965 	 * Some rules just don't make sense.  Note that the one below
966 	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
967 	 * for example, is not deniable in the racct sense, but the
968 	 * limit is enforced in a different way, so "deny" rules for %CPU
969 	 * do make sense.
970 	 */
971 	if (rule->rr_action == RCTL_ACTION_DENY &&
972 	    (rule->rr_resource == RACCT_CPU ||
973 	    rule->rr_resource == RACCT_WALLCLOCK))
974 		return (EOPNOTSUPP);
975 
976 	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
977 	    RACCT_IS_SLOPPY(rule->rr_resource))
978 		return (EOPNOTSUPP);
979 
980 	/*
981 	 * Make sure there are no duplicated rules.  Also, for the "deny"
982 	 * rules, remove ones differing only by "amount".
983 	 */
984 	if (rule->rr_action == RCTL_ACTION_DENY) {
985 		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
986 		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
987 		rctl_rule_remove(rule2);
988 		rctl_rule_release(rule2);
989 	} else
990 		rctl_rule_remove(rule);
991 
992 	switch (rule->rr_subject_type) {
993 	case RCTL_SUBJECT_TYPE_PROCESS:
994 		p = rule->rr_subject.rs_proc;
995 		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
996 		/*
997 		 * No resource limits for system processes.
998 		 */
999 		if (p->p_flag & P_SYSTEM)
1000 			return (EPERM);
1001 
1002 		rctl_racct_add_rule(p->p_racct, rule);
1003 		/*
1004 		 * In case of per-process rule, we don't have anything more
1005 		 * to do.
1006 		 */
1007 		return (0);
1008 
1009 	case RCTL_SUBJECT_TYPE_USER:
1010 		uip = rule->rr_subject.rs_uip;
1011 		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1012 		rctl_racct_add_rule(uip->ui_racct, rule);
1013 		break;
1014 
1015 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1016 		lc = rule->rr_subject.rs_loginclass;
1017 		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1018 		rctl_racct_add_rule(lc->lc_racct, rule);
1019 		break;
1020 
1021 	case RCTL_SUBJECT_TYPE_JAIL:
1022 		prr = rule->rr_subject.rs_prison_racct;
1023 		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1024 		rctl_racct_add_rule(prr->prr_racct, rule);
1025 		break;
1026 
1027 	default:
1028 		panic("rctl_rule_add: unknown subject type %d",
1029 		    rule->rr_subject_type);
1030 	}
1031 
1032 	/*
1033 	 * Now go through all the processes and add the new rule to the ones
1034 	 * it applies to.
1035 	 */
1036 	sx_assert(&allproc_lock, SA_LOCKED);
1037 	FOREACH_PROC_IN_SYSTEM(p) {
1038 		if (p->p_flag & P_SYSTEM)
1039 			continue;
1040 		cred = p->p_ucred;
1041 		switch (rule->rr_subject_type) {
1042 		case RCTL_SUBJECT_TYPE_USER:
1043 			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1044 			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1045 				break;
1046 			continue;
1047 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1048 			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1049 				break;
1050 			continue;
1051 		case RCTL_SUBJECT_TYPE_JAIL:
1052 			match = 0;
1053 			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1054 				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1055 					match = 1;
1056 					break;
1057 				}
1058 			}
1059 			if (match)
1060 				break;
1061 			continue;
1062 		default:
1063 			panic("rctl_rule_add: unknown subject type %d",
1064 			    rule->rr_subject_type);
1065 		}
1066 
1067 		rctl_racct_add_rule(p->p_racct, rule);
1068 	}
1069 
1070 	return (0);
1071 }
1072 
1073 static void
1074 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1075 {
1076 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1077 	int found = 0;
1078 
1079 	rw_wlock(&rctl_lock);
1080 	found += rctl_racct_remove_rules(racct, filter);
1081 	rw_wunlock(&rctl_lock);
1082 
1083 	*((int *)arg3) += found;
1084 }
1085 
1086 /*
1087  * Remove all rules that match the filter.
1088  */
1089 int
1090 rctl_rule_remove(struct rctl_rule *filter)
1091 {
1092 	int found = 0;
1093 	struct proc *p;
1094 
1095 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1096 	    filter->rr_subject.rs_proc != NULL) {
1097 		p = filter->rr_subject.rs_proc;
1098 		rw_wlock(&rctl_lock);
1099 		found = rctl_racct_remove_rules(p->p_racct, filter);
1100 		rw_wunlock(&rctl_lock);
1101 		if (found)
1102 			return (0);
1103 		return (ESRCH);
1104 	}
1105 
1106 	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1107 	    (void *)&found);
1108 	ui_racct_foreach(rctl_rule_remove_callback, filter,
1109 	    (void *)&found);
1110 	prison_racct_foreach(rctl_rule_remove_callback, filter,
1111 	    (void *)&found);
1112 
1113 	sx_assert(&allproc_lock, SA_LOCKED);
1114 	rw_wlock(&rctl_lock);
1115 	FOREACH_PROC_IN_SYSTEM(p) {
1116 		found += rctl_racct_remove_rules(p->p_racct, filter);
1117 	}
1118 	rw_wunlock(&rctl_lock);
1119 
1120 	if (found)
1121 		return (0);
1122 	return (ESRCH);
1123 }
1124 
1125 /*
1126  * Appends a rule to the sbuf.
1127  */
1128 static void
1129 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1130 {
1131 	int64_t amount;
1132 
1133 	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1134 
1135 	switch (rule->rr_subject_type) {
1136 	case RCTL_SUBJECT_TYPE_PROCESS:
1137 		if (rule->rr_subject.rs_proc == NULL)
1138 			sbuf_printf(sb, ":");
1139 		else
1140 			sbuf_printf(sb, "%d:",
1141 			    rule->rr_subject.rs_proc->p_pid);
1142 		break;
1143 	case RCTL_SUBJECT_TYPE_USER:
1144 		if (rule->rr_subject.rs_uip == NULL)
1145 			sbuf_printf(sb, ":");
1146 		else
1147 			sbuf_printf(sb, "%d:",
1148 			    rule->rr_subject.rs_uip->ui_uid);
1149 		break;
1150 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1151 		if (rule->rr_subject.rs_loginclass == NULL)
1152 			sbuf_printf(sb, ":");
1153 		else
1154 			sbuf_printf(sb, "%s:",
1155 			    rule->rr_subject.rs_loginclass->lc_name);
1156 		break;
1157 	case RCTL_SUBJECT_TYPE_JAIL:
1158 		if (rule->rr_subject.rs_prison_racct == NULL)
1159 			sbuf_printf(sb, ":");
1160 		else
1161 			sbuf_printf(sb, "%s:",
1162 			    rule->rr_subject.rs_prison_racct->prr_name);
1163 		break;
1164 	default:
1165 		panic("rctl_rule_to_sbuf: unknown subject type %d",
1166 		    rule->rr_subject_type);
1167 	}
1168 
1169 	amount = rule->rr_amount;
1170 	if (amount != RCTL_AMOUNT_UNDEFINED &&
1171 	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1172 		amount /= 1000000;
1173 
1174 	sbuf_printf(sb, "%s:%s=%jd",
1175 	    rctl_resource_name(rule->rr_resource),
1176 	    rctl_action_name(rule->rr_action),
1177 	    amount);
1178 
1179 	if (rule->rr_per != rule->rr_subject_type)
1180 		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1181 }
1182 
1183 /*
1184  * Routine used by RCTL syscalls to read in input string.
1185  */
1186 static int
1187 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1188 {
1189 	int error;
1190 	char *str;
1191 
1192 	if (inbuflen <= 0)
1193 		return (EINVAL);
1194 
1195 	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1196 	error = copyinstr(inbufp, str, inbuflen, NULL);
1197 	if (error != 0) {
1198 		free(str, M_RCTL);
1199 		return (error);
1200 	}
1201 
1202 	*inputstr = str;
1203 
1204 	return (0);
1205 }
1206 
1207 /*
1208  * Routine used by RCTL syscalls to write out output string.
1209  */
1210 static int
1211 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1212 {
1213 	int error;
1214 
1215 	if (outputsbuf == NULL)
1216 		return (0);
1217 
1218 	sbuf_finish(outputsbuf);
1219 	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1220 		sbuf_delete(outputsbuf);
1221 		return (ERANGE);
1222 	}
1223 	error = copyout(sbuf_data(outputsbuf), outbufp,
1224 	    sbuf_len(outputsbuf) + 1);
1225 	sbuf_delete(outputsbuf);
1226 	return (error);
1227 }
1228 
1229 static struct sbuf *
1230 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1231 {
1232 	int i;
1233 	int64_t amount;
1234 	struct sbuf *sb;
1235 
1236 	sb = sbuf_new_auto();
1237 	for (i = 0; i <= RACCT_MAX; i++) {
1238 		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1239 			continue;
1240 		amount = racct->r_resources[i];
1241 		if (RACCT_IS_IN_MILLIONS(i))
1242 			amount /= 1000000;
1243 		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1244 	}
1245 	sbuf_setpos(sb, sbuf_len(sb) - 1);
1246 	return (sb);
1247 }
1248 
1249 int
1250 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1251 {
1252 	int error;
1253 	char *inputstr;
1254 	struct rctl_rule *filter;
1255 	struct sbuf *outputsbuf = NULL;
1256 	struct proc *p;
1257 	struct uidinfo *uip;
1258 	struct loginclass *lc;
1259 	struct prison_racct *prr;
1260 
1261 	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1262 	if (error != 0)
1263 		return (error);
1264 
1265 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1266 	if (error != 0)
1267 		return (error);
1268 
1269 	sx_slock(&allproc_lock);
1270 	error = rctl_string_to_rule(inputstr, &filter);
1271 	free(inputstr, M_RCTL);
1272 	if (error != 0) {
1273 		sx_sunlock(&allproc_lock);
1274 		return (error);
1275 	}
1276 
1277 	switch (filter->rr_subject_type) {
1278 	case RCTL_SUBJECT_TYPE_PROCESS:
1279 		p = filter->rr_subject.rs_proc;
1280 		if (p == NULL) {
1281 			error = EINVAL;
1282 			goto out;
1283 		}
1284 		if (p->p_flag & P_SYSTEM) {
1285 			error = EINVAL;
1286 			goto out;
1287 		}
1288 		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1289 		break;
1290 	case RCTL_SUBJECT_TYPE_USER:
1291 		uip = filter->rr_subject.rs_uip;
1292 		if (uip == NULL) {
1293 			error = EINVAL;
1294 			goto out;
1295 		}
1296 		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1297 		break;
1298 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1299 		lc = filter->rr_subject.rs_loginclass;
1300 		if (lc == NULL) {
1301 			error = EINVAL;
1302 			goto out;
1303 		}
1304 		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1305 		break;
1306 	case RCTL_SUBJECT_TYPE_JAIL:
1307 		prr = filter->rr_subject.rs_prison_racct;
1308 		if (prr == NULL) {
1309 			error = EINVAL;
1310 			goto out;
1311 		}
1312 		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1313 		break;
1314 	default:
1315 		error = EINVAL;
1316 	}
1317 out:
1318 	rctl_rule_release(filter);
1319 	sx_sunlock(&allproc_lock);
1320 	if (error != 0)
1321 		return (error);
1322 
1323 	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1324 
1325 	return (error);
1326 }
1327 
1328 static void
1329 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1330 {
1331 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1332 	struct rctl_rule_link *link;
1333 	struct sbuf *sb = (struct sbuf *)arg3;
1334 
1335 	rw_rlock(&rctl_lock);
1336 	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1337 		if (!rctl_rule_matches(link->rrl_rule, filter))
1338 			continue;
1339 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1340 		sbuf_printf(sb, ",");
1341 	}
1342 	rw_runlock(&rctl_lock);
1343 }
1344 
1345 int
1346 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1347 {
1348 	int error;
1349 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1350 	char *inputstr, *buf;
1351 	struct sbuf *sb;
1352 	struct rctl_rule *filter;
1353 	struct rctl_rule_link *link;
1354 	struct proc *p;
1355 
1356 	error = priv_check(td, PRIV_RCTL_GET_RULES);
1357 	if (error != 0)
1358 		return (error);
1359 
1360 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1361 	if (error != 0)
1362 		return (error);
1363 
1364 	sx_slock(&allproc_lock);
1365 	error = rctl_string_to_rule(inputstr, &filter);
1366 	free(inputstr, M_RCTL);
1367 	if (error != 0) {
1368 		sx_sunlock(&allproc_lock);
1369 		return (error);
1370 	}
1371 
1372 again:
1373 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1374 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1375 	KASSERT(sb != NULL, ("sbuf_new failed"));
1376 
1377 	sx_assert(&allproc_lock, SA_LOCKED);
1378 	FOREACH_PROC_IN_SYSTEM(p) {
1379 		rw_rlock(&rctl_lock);
1380 		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1381 			/*
1382 			 * Non-process rules will be added to the buffer later.
1383 			 * Adding them here would result in duplicated output.
1384 			 */
1385 			if (link->rrl_rule->rr_subject_type !=
1386 			    RCTL_SUBJECT_TYPE_PROCESS)
1387 				continue;
1388 			if (!rctl_rule_matches(link->rrl_rule, filter))
1389 				continue;
1390 			rctl_rule_to_sbuf(sb, link->rrl_rule);
1391 			sbuf_printf(sb, ",");
1392 		}
1393 		rw_runlock(&rctl_lock);
1394 	}
1395 
1396 	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1397 	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1398 	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1399 	if (sbuf_error(sb) == ENOMEM) {
1400 		sbuf_delete(sb);
1401 		free(buf, M_RCTL);
1402 		bufsize *= 4;
1403 		goto again;
1404 	}
1405 
1406 	/*
1407 	 * Remove trailing ",".
1408 	 */
1409 	if (sbuf_len(sb) > 0)
1410 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1411 
1412 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1413 
1414 	rctl_rule_release(filter);
1415 	sx_sunlock(&allproc_lock);
1416 	free(buf, M_RCTL);
1417 	return (error);
1418 }
1419 
1420 int
1421 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1422 {
1423 	int error;
1424 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1425 	char *inputstr, *buf;
1426 	struct sbuf *sb;
1427 	struct rctl_rule *filter;
1428 	struct rctl_rule_link *link;
1429 
1430 	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1431 	if (error != 0)
1432 		return (error);
1433 
1434 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1435 	if (error != 0)
1436 		return (error);
1437 
1438 	sx_slock(&allproc_lock);
1439 	error = rctl_string_to_rule(inputstr, &filter);
1440 	free(inputstr, M_RCTL);
1441 	if (error != 0) {
1442 		sx_sunlock(&allproc_lock);
1443 		return (error);
1444 	}
1445 
1446 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1447 		rctl_rule_release(filter);
1448 		sx_sunlock(&allproc_lock);
1449 		return (EINVAL);
1450 	}
1451 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1452 		rctl_rule_release(filter);
1453 		sx_sunlock(&allproc_lock);
1454 		return (EOPNOTSUPP);
1455 	}
1456 	if (filter->rr_subject.rs_proc == NULL) {
1457 		rctl_rule_release(filter);
1458 		sx_sunlock(&allproc_lock);
1459 		return (EINVAL);
1460 	}
1461 
1462 again:
1463 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1464 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1465 	KASSERT(sb != NULL, ("sbuf_new failed"));
1466 
1467 	rw_rlock(&rctl_lock);
1468 	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1469 	    rrl_next) {
1470 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1471 		sbuf_printf(sb, ",");
1472 	}
1473 	rw_runlock(&rctl_lock);
1474 	if (sbuf_error(sb) == ENOMEM) {
1475 		sbuf_delete(sb);
1476 		free(buf, M_RCTL);
1477 		bufsize *= 4;
1478 		goto again;
1479 	}
1480 
1481 	/*
1482 	 * Remove trailing ",".
1483 	 */
1484 	if (sbuf_len(sb) > 0)
1485 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1486 
1487 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1488 	rctl_rule_release(filter);
1489 	sx_sunlock(&allproc_lock);
1490 	free(buf, M_RCTL);
1491 	return (error);
1492 }
1493 
1494 int
1495 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1496 {
1497 	int error;
1498 	struct rctl_rule *rule;
1499 	char *inputstr;
1500 
1501 	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1502 	if (error != 0)
1503 		return (error);
1504 
1505 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1506 	if (error != 0)
1507 		return (error);
1508 
1509 	sx_slock(&allproc_lock);
1510 	error = rctl_string_to_rule(inputstr, &rule);
1511 	free(inputstr, M_RCTL);
1512 	if (error != 0) {
1513 		sx_sunlock(&allproc_lock);
1514 		return (error);
1515 	}
1516 	/*
1517 	 * The 'per' part of a rule is optional.
1518 	 */
1519 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1520 	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1521 		rule->rr_per = rule->rr_subject_type;
1522 
1523 	if (!rctl_rule_fully_specified(rule)) {
1524 		error = EINVAL;
1525 		goto out;
1526 	}
1527 
1528 	error = rctl_rule_add(rule);
1529 
1530 out:
1531 	rctl_rule_release(rule);
1532 	sx_sunlock(&allproc_lock);
1533 	return (error);
1534 }
1535 
1536 int
1537 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1538 {
1539 	int error;
1540 	struct rctl_rule *filter;
1541 	char *inputstr;
1542 
1543 	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1544 	if (error != 0)
1545 		return (error);
1546 
1547 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1548 	if (error != 0)
1549 		return (error);
1550 
1551 	sx_slock(&allproc_lock);
1552 	error = rctl_string_to_rule(inputstr, &filter);
1553 	free(inputstr, M_RCTL);
1554 	if (error != 0) {
1555 		sx_sunlock(&allproc_lock);
1556 		return (error);
1557 	}
1558 
1559 	error = rctl_rule_remove(filter);
1560 	rctl_rule_release(filter);
1561 	sx_sunlock(&allproc_lock);
1562 
1563 	return (error);
1564 }
1565 
1566 /*
1567  * Update RCTL rule list after credential change.
1568  */
1569 void
1570 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1571 {
1572 	int rulecnt, i;
1573 	struct rctl_rule_link *link, *newlink;
1574 	struct uidinfo *newuip;
1575 	struct loginclass *newlc;
1576 	struct prison_racct *newprr;
1577 	LIST_HEAD(, rctl_rule_link) newrules;
1578 
1579 	newuip = newcred->cr_ruidinfo;
1580 	newlc = newcred->cr_loginclass;
1581 	newprr = newcred->cr_prison->pr_prison_racct;
1582 
1583 	LIST_INIT(&newrules);
1584 
1585 again:
1586 	/*
1587 	 * First, count the rules that apply to the process with new
1588 	 * credentials.
1589 	 */
1590 	rulecnt = 0;
1591 	rw_rlock(&rctl_lock);
1592 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1593 		if (link->rrl_rule->rr_subject_type ==
1594 		    RCTL_SUBJECT_TYPE_PROCESS)
1595 			rulecnt++;
1596 	}
1597 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1598 		rulecnt++;
1599 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1600 		rulecnt++;
1601 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1602 		rulecnt++;
1603 	rw_runlock(&rctl_lock);
1604 
1605 	/*
1606 	 * Create temporary list.  We've dropped the rctl_lock in order
1607 	 * to use M_WAITOK.
1608 	 */
1609 	for (i = 0; i < rulecnt; i++) {
1610 		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1611 		newlink->rrl_rule = NULL;
1612 		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1613 	}
1614 
1615 	newlink = LIST_FIRST(&newrules);
1616 
1617 	/*
1618 	 * Assign rules to the newly allocated list entries.
1619 	 */
1620 	rw_wlock(&rctl_lock);
1621 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1622 		if (link->rrl_rule->rr_subject_type ==
1623 		    RCTL_SUBJECT_TYPE_PROCESS) {
1624 			if (newlink == NULL)
1625 				goto goaround;
1626 			rctl_rule_acquire(link->rrl_rule);
1627 			newlink->rrl_rule = link->rrl_rule;
1628 			newlink = LIST_NEXT(newlink, rrl_next);
1629 			rulecnt--;
1630 		}
1631 	}
1632 
1633 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1634 		if (newlink == NULL)
1635 			goto goaround;
1636 		rctl_rule_acquire(link->rrl_rule);
1637 		newlink->rrl_rule = link->rrl_rule;
1638 		newlink = LIST_NEXT(newlink, rrl_next);
1639 		rulecnt--;
1640 	}
1641 
1642 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1643 		if (newlink == NULL)
1644 			goto goaround;
1645 		rctl_rule_acquire(link->rrl_rule);
1646 		newlink->rrl_rule = link->rrl_rule;
1647 		newlink = LIST_NEXT(newlink, rrl_next);
1648 		rulecnt--;
1649 	}
1650 
1651 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1652 		if (newlink == NULL)
1653 			goto goaround;
1654 		rctl_rule_acquire(link->rrl_rule);
1655 		newlink->rrl_rule = link->rrl_rule;
1656 		newlink = LIST_NEXT(newlink, rrl_next);
1657 		rulecnt--;
1658 	}
1659 
1660 	if (rulecnt == 0) {
1661 		/*
1662 		 * Free the old rule list.
1663 		 */
1664 		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1665 			link = LIST_FIRST(&p->p_racct->r_rule_links);
1666 			LIST_REMOVE(link, rrl_next);
1667 			rctl_rule_release(link->rrl_rule);
1668 			uma_zfree(rctl_rule_link_zone, link);
1669 		}
1670 
1671 		/*
1672 		 * Replace lists and we're done.
1673 		 *
1674 		 * XXX: Is there any way to switch list heads instead
1675 		 *      of iterating here?
1676 		 */
1677 		while (!LIST_EMPTY(&newrules)) {
1678 			newlink = LIST_FIRST(&newrules);
1679 			LIST_REMOVE(newlink, rrl_next);
1680 			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1681 			    newlink, rrl_next);
1682 		}
1683 
1684 		rw_wunlock(&rctl_lock);
1685 
1686 		return;
1687 	}
1688 
1689 goaround:
1690 	rw_wunlock(&rctl_lock);
1691 
1692 	/*
1693 	 * Rule list changed while we were not holding the rctl_lock.
1694 	 * Free the new list and try again.
1695 	 */
1696 	while (!LIST_EMPTY(&newrules)) {
1697 		newlink = LIST_FIRST(&newrules);
1698 		LIST_REMOVE(newlink, rrl_next);
1699 		if (newlink->rrl_rule != NULL)
1700 			rctl_rule_release(newlink->rrl_rule);
1701 		uma_zfree(rctl_rule_link_zone, newlink);
1702 	}
1703 
1704 	goto again;
1705 }
1706 
1707 /*
1708  * Assign RCTL rules to the newly created process.
1709  */
1710 int
1711 rctl_proc_fork(struct proc *parent, struct proc *child)
1712 {
1713 	int error;
1714 	struct rctl_rule_link *link;
1715 	struct rctl_rule *rule;
1716 
1717 	LIST_INIT(&child->p_racct->r_rule_links);
1718 
1719 	/*
1720 	 * No limits for kernel processes.
1721 	 */
1722 	if (child->p_flag & P_SYSTEM)
1723 		return (0);
1724 
1725 	/*
1726 	 * Nothing to inherit from P_SYSTEM parents.
1727 	 */
1728 	if (parent->p_racct == NULL) {
1729 		KASSERT(parent->p_flag & P_SYSTEM,
1730 		    ("non-system process without racct; p = %p", parent));
1731 		return (0);
1732 	}
1733 
1734 	rw_wlock(&rctl_lock);
1735 
1736 	/*
1737 	 * Go through limits applicable to the parent and assign them
1738 	 * to the child.  Rules with 'process' subject have to be duplicated
1739 	 * in order to make their rr_subject point to the new process.
1740 	 */
1741 	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1742 		if (link->rrl_rule->rr_subject_type ==
1743 		    RCTL_SUBJECT_TYPE_PROCESS) {
1744 			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1745 			if (rule == NULL)
1746 				goto fail;
1747 			KASSERT(rule->rr_subject.rs_proc == parent,
1748 			    ("rule->rr_subject.rs_proc != parent"));
1749 			rule->rr_subject.rs_proc = child;
1750 			error = rctl_racct_add_rule_locked(child->p_racct,
1751 			    rule);
1752 			rctl_rule_release(rule);
1753 			if (error != 0)
1754 				goto fail;
1755 		} else {
1756 			error = rctl_racct_add_rule_locked(child->p_racct,
1757 			    link->rrl_rule);
1758 			if (error != 0)
1759 				goto fail;
1760 		}
1761 	}
1762 
1763 	rw_wunlock(&rctl_lock);
1764 	return (0);
1765 
1766 fail:
1767 	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1768 		link = LIST_FIRST(&child->p_racct->r_rule_links);
1769 		LIST_REMOVE(link, rrl_next);
1770 		rctl_rule_release(link->rrl_rule);
1771 		uma_zfree(rctl_rule_link_zone, link);
1772 	}
1773 	rw_wunlock(&rctl_lock);
1774 	return (EAGAIN);
1775 }
1776 
1777 /*
1778  * Release rules attached to the racct.
1779  */
1780 void
1781 rctl_racct_release(struct racct *racct)
1782 {
1783 	struct rctl_rule_link *link;
1784 
1785 	rw_wlock(&rctl_lock);
1786 	while (!LIST_EMPTY(&racct->r_rule_links)) {
1787 		link = LIST_FIRST(&racct->r_rule_links);
1788 		LIST_REMOVE(link, rrl_next);
1789 		rctl_rule_release(link->rrl_rule);
1790 		uma_zfree(rctl_rule_link_zone, link);
1791 	}
1792 	rw_wunlock(&rctl_lock);
1793 }
1794 
1795 static void
1796 rctl_init(void)
1797 {
1798 
1799 	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1800 	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1801 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1802 	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1803 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1804 }
1805 
1806 #else /* !RCTL */
1807 
1808 int
1809 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1810 {
1811 
1812 	return (ENOSYS);
1813 }
1814 
1815 int
1816 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1817 {
1818 
1819 	return (ENOSYS);
1820 }
1821 
1822 int
1823 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1824 {
1825 
1826 	return (ENOSYS);
1827 }
1828 
1829 int
1830 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1831 {
1832 
1833 	return (ENOSYS);
1834 }
1835 
1836 int
1837 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1838 {
1839 
1840 	return (ENOSYS);
1841 }
1842 
1843 #endif /* !RCTL */
1844