xref: /freebsd/sys/kern/kern_rctl.c (revision d51f8d20247c373ab2c2db8aed596b8ac44e7a34)
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62 
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67 
68 FEATURE(rctl, "Resource Limits");
69 
70 #define	HRF_DEFAULT		0
71 #define	HRF_DONT_INHERIT	1
72 #define	HRF_DONT_ACCUMULATE	2
73 
74 /* Default buffer size for rctl_get_rules(2). */
75 #define	RCTL_DEFAULT_BUFSIZE	4096
76 #define	RCTL_LOG_BUFSIZE	128
77 
78 /*
79  * 'rctl_rule_link' connects a rule with every racct it's related to.
80  * For example, rule 'user:X:openfiles:deny=N/process' is linked
81  * with uidinfo for user X, and to each process of that user.
82  */
83 struct rctl_rule_link {
84 	LIST_ENTRY(rctl_rule_link)	rrl_next;
85 	struct rctl_rule		*rrl_rule;
86 	int				rrl_exceeded;
87 };
88 
89 struct dict {
90 	const char	*d_name;
91 	int		d_value;
92 };
93 
94 static struct dict subjectnames[] = {
95 	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
96 	{ "user", RCTL_SUBJECT_TYPE_USER },
97 	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98 	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
99 	{ NULL, -1 }};
100 
101 static struct dict resourcenames[] = {
102 	{ "cpu", RACCT_CPU },
103 	{ "fsize", RACCT_FSIZE },
104 	{ "data", RACCT_DATA },
105 	{ "stack", RACCT_STACK },
106 	{ "core", RACCT_CORE },
107 	{ "rss", RACCT_RSS },
108 	{ "memlock", RACCT_MEMLOCK },
109 	{ "nproc", RACCT_NPROC },
110 	{ "nofile", RACCT_NOFILE },
111 	{ "sbsize", RACCT_SBSIZE },
112 	{ "vmem", RACCT_VMEM },
113 	{ "npts", RACCT_NPTS },
114 	{ "swap", RACCT_SWAP },
115 	{ "nthr", RACCT_NTHR },
116 	{ "msgqqueued", RACCT_MSGQQUEUED },
117 	{ "msgqsize", RACCT_MSGQSIZE },
118 	{ "nmsgq", RACCT_NMSGQ },
119 	{ "nsem", RACCT_NSEM },
120 	{ "nsemop", RACCT_NSEMOP },
121 	{ "nshm", RACCT_NSHM },
122 	{ "shmsize", RACCT_SHMSIZE },
123 	{ "wallclock", RACCT_WALLCLOCK },
124 	{ NULL, -1 }};
125 
126 static struct dict actionnames[] = {
127 	{ "sighup", RCTL_ACTION_SIGHUP },
128 	{ "sigint", RCTL_ACTION_SIGINT },
129 	{ "sigquit", RCTL_ACTION_SIGQUIT },
130 	{ "sigill", RCTL_ACTION_SIGILL },
131 	{ "sigtrap", RCTL_ACTION_SIGTRAP },
132 	{ "sigabrt", RCTL_ACTION_SIGABRT },
133 	{ "sigemt", RCTL_ACTION_SIGEMT },
134 	{ "sigfpe", RCTL_ACTION_SIGFPE },
135 	{ "sigkill", RCTL_ACTION_SIGKILL },
136 	{ "sigbus", RCTL_ACTION_SIGBUS },
137 	{ "sigsegv", RCTL_ACTION_SIGSEGV },
138 	{ "sigsys", RCTL_ACTION_SIGSYS },
139 	{ "sigpipe", RCTL_ACTION_SIGPIPE },
140 	{ "sigalrm", RCTL_ACTION_SIGALRM },
141 	{ "sigterm", RCTL_ACTION_SIGTERM },
142 	{ "sigurg", RCTL_ACTION_SIGURG },
143 	{ "sigstop", RCTL_ACTION_SIGSTOP },
144 	{ "sigtstp", RCTL_ACTION_SIGTSTP },
145 	{ "sigchld", RCTL_ACTION_SIGCHLD },
146 	{ "sigttin", RCTL_ACTION_SIGTTIN },
147 	{ "sigttou", RCTL_ACTION_SIGTTOU },
148 	{ "sigio", RCTL_ACTION_SIGIO },
149 	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
150 	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
151 	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
152 	{ "sigprof", RCTL_ACTION_SIGPROF },
153 	{ "sigwinch", RCTL_ACTION_SIGWINCH },
154 	{ "siginfo", RCTL_ACTION_SIGINFO },
155 	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
156 	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
157 	{ "sigthr", RCTL_ACTION_SIGTHR },
158 	{ "deny", RCTL_ACTION_DENY },
159 	{ "log", RCTL_ACTION_LOG },
160 	{ "devctl", RCTL_ACTION_DEVCTL },
161 	{ NULL, -1 }};
162 
163 static void rctl_init(void);
164 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
165 
166 static uma_zone_t rctl_rule_link_zone;
167 static uma_zone_t rctl_rule_zone;
168 static struct rwlock rctl_lock;
169 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
170 
171 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
172 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
173 
174 MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
175 
176 static const char *
177 rctl_subject_type_name(int subject)
178 {
179 	int i;
180 
181 	for (i = 0; subjectnames[i].d_name != NULL; i++) {
182 		if (subjectnames[i].d_value == subject)
183 			return (subjectnames[i].d_name);
184 	}
185 
186 	panic("rctl_subject_type_name: unknown subject type %d", subject);
187 }
188 
189 static const char *
190 rctl_action_name(int action)
191 {
192 	int i;
193 
194 	for (i = 0; actionnames[i].d_name != NULL; i++) {
195 		if (actionnames[i].d_value == action)
196 			return (actionnames[i].d_name);
197 	}
198 
199 	panic("rctl_action_name: unknown action %d", action);
200 }
201 
202 const char *
203 rctl_resource_name(int resource)
204 {
205 	int i;
206 
207 	for (i = 0; resourcenames[i].d_name != NULL; i++) {
208 		if (resourcenames[i].d_value == resource)
209 			return (resourcenames[i].d_name);
210 	}
211 
212 	panic("rctl_resource_name: unknown resource %d", resource);
213 }
214 
215 /*
216  * Return the amount of resource that can be allocated by 'p' before
217  * hitting 'rule'.
218  */
219 static int64_t
220 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
221 {
222 	int resource;
223 	int64_t available = INT64_MAX;
224 	struct ucred *cred = p->p_ucred;
225 
226 	rw_assert(&rctl_lock, RA_LOCKED);
227 
228 	resource = rule->rr_resource;
229 	switch (rule->rr_per) {
230 	case RCTL_SUBJECT_TYPE_PROCESS:
231 		available = rule->rr_amount -
232 		    p->p_racct->r_resources[resource];
233 		break;
234 	case RCTL_SUBJECT_TYPE_USER:
235 		available = rule->rr_amount -
236 		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
237 		break;
238 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
239 		available = rule->rr_amount -
240 		    cred->cr_loginclass->lc_racct->r_resources[resource];
241 		break;
242 	case RCTL_SUBJECT_TYPE_JAIL:
243 		available = rule->rr_amount -
244 		    cred->cr_prison->pr_racct->r_resources[resource];
245 		break;
246 	default:
247 		panic("rctl_compute_available: unknown per %d",
248 		    rule->rr_per);
249 	}
250 
251 	return (available);
252 }
253 
254 /*
255  * Return non-zero if allocating 'amount' by proc 'p' would exceed
256  * resource limit specified by 'rule'.
257  */
258 static int
259 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
260     int64_t amount)
261 {
262 	int64_t available;
263 
264 	rw_assert(&rctl_lock, RA_LOCKED);
265 
266 	available = rctl_available_resource(p, rule);
267 	if (available >= amount)
268 		return (0);
269 
270 	return (1);
271 }
272 
273 /*
274  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
275  * to what it keeps allocated now.  Returns non-zero if the allocation should
276  * be denied, 0 otherwise.
277  */
278 int
279 rctl_enforce(struct proc *p, int resource, uint64_t amount)
280 {
281 	struct rctl_rule *rule;
282 	struct rctl_rule_link *link;
283 	struct sbuf sb;
284 	int should_deny = 0;
285 	char *buf;
286 	static int curtime = 0;
287 	static struct timeval lasttime;
288 
289 	rw_rlock(&rctl_lock);
290 
291 	/*
292 	 * There may be more than one matching rule; go through all of them.
293 	 * Denial should be done last, after logging and sending signals.
294 	 */
295 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
296 		rule = link->rrl_rule;
297 		if (rule->rr_resource != resource)
298 			continue;
299 		if (!rctl_would_exceed(p, rule, amount)) {
300 			link->rrl_exceeded = 0;
301 			continue;
302 		}
303 
304 		switch (rule->rr_action) {
305 		case RCTL_ACTION_DENY:
306 			should_deny = 1;
307 			continue;
308 		case RCTL_ACTION_LOG:
309 			/*
310 			 * If rrl_exceeded != 0, it means we've already
311 			 * logged a warning for this process.
312 			 */
313 			if (link->rrl_exceeded != 0)
314 				continue;
315 
316 			if (!ppsratecheck(&lasttime, &curtime, 10))
317 				continue;
318 
319 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
320 			if (buf == NULL) {
321 				printf("rctl_enforce: out of memory\n");
322 				continue;
323 			}
324 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
325 			rctl_rule_to_sbuf(&sb, rule);
326 			sbuf_finish(&sb);
327 			printf("rctl: rule \"%s\" matched by pid %d "
328 			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
329 			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
330 			    p->p_ucred->cr_prison->pr_name);
331 			sbuf_delete(&sb);
332 			free(buf, M_RCTL);
333 			link->rrl_exceeded = 1;
334 			continue;
335 		case RCTL_ACTION_DEVCTL:
336 			if (link->rrl_exceeded != 0)
337 				continue;
338 
339 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
340 			if (buf == NULL) {
341 				printf("rctl_enforce: out of memory\n");
342 				continue;
343 			}
344 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
345 			sbuf_printf(&sb, "rule=");
346 			rctl_rule_to_sbuf(&sb, rule);
347 			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
348 			    p->p_pid, p->p_ucred->cr_ruid,
349 			    p->p_ucred->cr_prison->pr_name);
350 			sbuf_finish(&sb);
351 			devctl_notify_f("RCTL", "rule", "matched",
352 			    sbuf_data(&sb), M_NOWAIT);
353 			sbuf_delete(&sb);
354 			free(buf, M_RCTL);
355 			link->rrl_exceeded = 1;
356 			continue;
357 		default:
358 			if (link->rrl_exceeded != 0)
359 				continue;
360 
361 			KASSERT(rule->rr_action > 0 &&
362 			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
363 			    ("rctl_enforce: unknown action %d",
364 			     rule->rr_action));
365 
366 			/*
367 			 * We're using the fact that RCTL_ACTION_SIG* values
368 			 * are equal to their counterparts from sys/signal.h.
369 			 */
370 			psignal(p, rule->rr_action);
371 			link->rrl_exceeded = 1;
372 			continue;
373 		}
374 	}
375 
376 	rw_runlock(&rctl_lock);
377 
378 	if (should_deny) {
379 		/*
380 		 * Return fake error code; the caller should change it
381 		 * into one proper for the situation - EFSIZ, ENOMEM etc.
382 		 */
383 		return (EDOOFUS);
384 	}
385 
386 	return (0);
387 }
388 
389 uint64_t
390 rctl_get_limit(struct proc *p, int resource)
391 {
392 	struct rctl_rule *rule;
393 	struct rctl_rule_link *link;
394 	uint64_t amount = UINT64_MAX;
395 
396 	rw_rlock(&rctl_lock);
397 
398 	/*
399 	 * There may be more than one matching rule; go through all of them.
400 	 * Denial should be done last, after logging and sending signals.
401 	 */
402 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
403 		rule = link->rrl_rule;
404 		if (rule->rr_resource != resource)
405 			continue;
406 		if (rule->rr_action != RCTL_ACTION_DENY)
407 			continue;
408 		if (rule->rr_amount < amount)
409 			amount = rule->rr_amount;
410 	}
411 
412 	rw_runlock(&rctl_lock);
413 
414 	return (amount);
415 }
416 
417 uint64_t
418 rctl_get_available(struct proc *p, int resource)
419 {
420 	struct rctl_rule *rule;
421 	struct rctl_rule_link *link;
422 	int64_t available, minavailable, allocated;
423 
424 	minavailable = INT64_MAX;
425 
426 	rw_rlock(&rctl_lock);
427 
428 	/*
429 	 * There may be more than one matching rule; go through all of them.
430 	 * Denial should be done last, after logging and sending signals.
431 	 */
432 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
433 		rule = link->rrl_rule;
434 		if (rule->rr_resource != resource)
435 			continue;
436 		if (rule->rr_action != RCTL_ACTION_DENY)
437 			continue;
438 		available = rctl_available_resource(p, rule);
439 		if (available < minavailable)
440 			minavailable = available;
441 	}
442 
443 	rw_runlock(&rctl_lock);
444 
445 	/*
446 	 * XXX: Think about this _hard_.
447 	 */
448 	allocated = p->p_racct->r_resources[resource];
449 	if (minavailable < INT64_MAX - allocated)
450 		minavailable += allocated;
451 	if (minavailable < 0)
452 		minavailable = 0;
453 	return (minavailable);
454 }
455 
456 static int
457 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
458 {
459 
460 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
461 		if (rule->rr_subject_type != filter->rr_subject_type)
462 			return (0);
463 
464 		switch (filter->rr_subject_type) {
465 		case RCTL_SUBJECT_TYPE_PROCESS:
466 			if (filter->rr_subject.rs_proc != NULL &&
467 			    rule->rr_subject.rs_proc !=
468 			    filter->rr_subject.rs_proc)
469 				return (0);
470 			break;
471 		case RCTL_SUBJECT_TYPE_USER:
472 			if (filter->rr_subject.rs_uip != NULL &&
473 			    rule->rr_subject.rs_uip !=
474 			    filter->rr_subject.rs_uip)
475 				return (0);
476 			break;
477 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
478 			if (filter->rr_subject.rs_loginclass != NULL &&
479 			    rule->rr_subject.rs_loginclass !=
480 			    filter->rr_subject.rs_loginclass)
481 				return (0);
482 			break;
483 		case RCTL_SUBJECT_TYPE_JAIL:
484 			if (filter->rr_subject.rs_prison != NULL &&
485 			    rule->rr_subject.rs_prison !=
486 			    filter->rr_subject.rs_prison)
487 				return (0);
488 			break;
489 		default:
490 			panic("rctl_rule_matches: unknown subject type %d",
491 			    filter->rr_subject_type);
492 		}
493 	}
494 
495 	if (filter->rr_resource != RACCT_UNDEFINED) {
496 		if (rule->rr_resource != filter->rr_resource)
497 			return (0);
498 	}
499 
500 	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
501 		if (rule->rr_action != filter->rr_action)
502 			return (0);
503 	}
504 
505 	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
506 		if (rule->rr_amount != filter->rr_amount)
507 			return (0);
508 	}
509 
510 	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
511 		if (rule->rr_per != filter->rr_per)
512 			return (0);
513 	}
514 
515 	return (1);
516 }
517 
518 static int
519 str2value(const char *str, int *value, struct dict *table)
520 {
521 	int i;
522 
523 	if (value == NULL)
524 		return (EINVAL);
525 
526 	for (i = 0; table[i].d_name != NULL; i++) {
527 		if (strcasecmp(table[i].d_name, str) == 0) {
528 			*value =  table[i].d_value;
529 			return (0);
530 		}
531 	}
532 
533 	return (EINVAL);
534 }
535 
536 static int
537 str2id(const char *str, id_t *value)
538 {
539 	char *end;
540 
541 	if (str == NULL)
542 		return (EINVAL);
543 
544 	*value = strtoul(str, &end, 10);
545 	if ((size_t)(end - str) != strlen(str))
546 		return (EINVAL);
547 
548 	return (0);
549 }
550 
551 static int
552 str2int64(const char *str, int64_t *value)
553 {
554 	char *end;
555 
556 	if (str == NULL)
557 		return (EINVAL);
558 
559 	*value = strtoul(str, &end, 10);
560 	if ((size_t)(end - str) != strlen(str))
561 		return (EINVAL);
562 
563 	return (0);
564 }
565 
566 /*
567  * Connect the rule to the racct, increasing refcount for the rule.
568  */
569 static void
570 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
571 {
572 	struct rctl_rule_link *link;
573 
574 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
575 
576 	rctl_rule_acquire(rule);
577 	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
578 	link->rrl_rule = rule;
579 	link->rrl_exceeded = 0;
580 
581 	rw_wlock(&rctl_lock);
582 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
583 	rw_wunlock(&rctl_lock);
584 }
585 
586 static int
587 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
588 {
589 	struct rctl_rule_link *link;
590 
591 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
592 	rw_assert(&rctl_lock, RA_WLOCKED);
593 
594 	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
595 	if (link == NULL)
596 		return (ENOMEM);
597 	rctl_rule_acquire(rule);
598 	link->rrl_rule = rule;
599 	link->rrl_exceeded = 0;
600 
601 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
602 	return (0);
603 }
604 
605 /*
606  * Remove limits for a rules matching the filter and release
607  * the refcounts for the rules, possibly freeing them.  Returns
608  * the number of limit structures removed.
609  */
610 static int
611 rctl_racct_remove_rules(struct racct *racct,
612     const struct rctl_rule *filter)
613 {
614 	int removed = 0;
615 	struct rctl_rule_link *link, *linktmp;
616 
617 	rw_assert(&rctl_lock, RA_WLOCKED);
618 
619 	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
620 		if (!rctl_rule_matches(link->rrl_rule, filter))
621 			continue;
622 
623 		LIST_REMOVE(link, rrl_next);
624 		rctl_rule_release(link->rrl_rule);
625 		uma_zfree(rctl_rule_link_zone, link);
626 		removed++;
627 	}
628 	return (removed);
629 }
630 
631 static void
632 rctl_rule_acquire_subject(struct rctl_rule *rule)
633 {
634 
635 	switch (rule->rr_subject_type) {
636 	case RCTL_SUBJECT_TYPE_UNDEFINED:
637 	case RCTL_SUBJECT_TYPE_PROCESS:
638 	case RCTL_SUBJECT_TYPE_JAIL:
639 		break;
640 	case RCTL_SUBJECT_TYPE_USER:
641 		if (rule->rr_subject.rs_uip != NULL)
642 			uihold(rule->rr_subject.rs_uip);
643 		break;
644 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
645 		if (rule->rr_subject.rs_loginclass != NULL)
646 			loginclass_hold(rule->rr_subject.rs_loginclass);
647 		break;
648 	default:
649 		panic("rctl_rule_acquire_subject: unknown subject type %d",
650 		    rule->rr_subject_type);
651 	}
652 }
653 
654 static void
655 rctl_rule_release_subject(struct rctl_rule *rule)
656 {
657 
658 	switch (rule->rr_subject_type) {
659 	case RCTL_SUBJECT_TYPE_UNDEFINED:
660 	case RCTL_SUBJECT_TYPE_PROCESS:
661 	case RCTL_SUBJECT_TYPE_JAIL:
662 		break;
663 	case RCTL_SUBJECT_TYPE_USER:
664 		if (rule->rr_subject.rs_uip != NULL)
665 			uifree(rule->rr_subject.rs_uip);
666 		break;
667 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
668 		if (rule->rr_subject.rs_loginclass != NULL)
669 			loginclass_free(rule->rr_subject.rs_loginclass);
670 		break;
671 	default:
672 		panic("rctl_rule_release_subject: unknown subject type %d",
673 		    rule->rr_subject_type);
674 	}
675 }
676 
677 struct rctl_rule *
678 rctl_rule_alloc(int flags)
679 {
680 	struct rctl_rule *rule;
681 
682 	rule = uma_zalloc(rctl_rule_zone, flags);
683 	if (rule == NULL)
684 		return (NULL);
685 	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
686 	rule->rr_subject.rs_proc = NULL;
687 	rule->rr_subject.rs_uip = NULL;
688 	rule->rr_subject.rs_loginclass = NULL;
689 	rule->rr_subject.rs_prison = NULL;
690 	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
691 	rule->rr_resource = RACCT_UNDEFINED;
692 	rule->rr_action = RCTL_ACTION_UNDEFINED;
693 	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
694 	refcount_init(&rule->rr_refcount, 1);
695 
696 	return (rule);
697 }
698 
699 struct rctl_rule *
700 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
701 {
702 	struct rctl_rule *copy;
703 
704 	copy = uma_zalloc(rctl_rule_zone, flags);
705 	if (copy == NULL)
706 		return (NULL);
707 	copy->rr_subject_type = rule->rr_subject_type;
708 	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
709 	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
710 	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
711 	copy->rr_subject.rs_prison = rule->rr_subject.rs_prison;
712 	copy->rr_per = rule->rr_per;
713 	copy->rr_resource = rule->rr_resource;
714 	copy->rr_action = rule->rr_action;
715 	copy->rr_amount = rule->rr_amount;
716 	refcount_init(&copy->rr_refcount, 1);
717 	rctl_rule_acquire_subject(copy);
718 
719 	return (copy);
720 }
721 
722 void
723 rctl_rule_acquire(struct rctl_rule *rule)
724 {
725 
726 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
727 
728 	refcount_acquire(&rule->rr_refcount);
729 }
730 
731 static void
732 rctl_rule_free(void *context, int pending)
733 {
734 	struct rctl_rule *rule;
735 
736 	rule = (struct rctl_rule *)context;
737 
738 	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
739 
740 	/*
741 	 * We don't need locking here; rule is guaranteed to be inaccessible.
742 	 */
743 
744 	rctl_rule_release_subject(rule);
745 	uma_zfree(rctl_rule_zone, rule);
746 }
747 
748 void
749 rctl_rule_release(struct rctl_rule *rule)
750 {
751 
752 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
753 
754 	if (refcount_release(&rule->rr_refcount)) {
755 		/*
756 		 * rctl_rule_release() is often called when iterating
757 		 * over all the uidinfo structures in the system,
758 		 * holding uihashtbl_lock.  Since rctl_rule_free()
759 		 * might end up calling uifree(), this would lead
760 		 * to lock recursion.  Use taskqueue to avoid this.
761 		 */
762 		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
763 		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
764 	}
765 }
766 
767 static int
768 rctl_rule_fully_specified(const struct rctl_rule *rule)
769 {
770 
771 	switch (rule->rr_subject_type) {
772 	case RCTL_SUBJECT_TYPE_UNDEFINED:
773 		return (0);
774 	case RCTL_SUBJECT_TYPE_PROCESS:
775 		if (rule->rr_subject.rs_proc == NULL)
776 			return (0);
777 		break;
778 	case RCTL_SUBJECT_TYPE_USER:
779 		if (rule->rr_subject.rs_uip == NULL)
780 			return (0);
781 		break;
782 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
783 		if (rule->rr_subject.rs_loginclass == NULL)
784 			return (0);
785 		break;
786 	case RCTL_SUBJECT_TYPE_JAIL:
787 		if (rule->rr_subject.rs_prison == NULL)
788 			return (0);
789 		break;
790 	default:
791 		panic("rctl_rule_fully_specified: unknown subject type %d",
792 		    rule->rr_subject_type);
793 	}
794 	if (rule->rr_resource == RACCT_UNDEFINED)
795 		return (0);
796 	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
797 		return (0);
798 	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
799 		return (0);
800 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
801 		return (0);
802 
803 	return (1);
804 }
805 
806 static int
807 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
808 {
809 	int error = 0;
810 	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
811 	     *amountstr, *perstr;
812 	struct rctl_rule *rule;
813 	id_t id;
814 
815 	rule = rctl_rule_alloc(M_WAITOK);
816 
817 	subjectstr = strsep(&rulestr, ":");
818 	subject_idstr = strsep(&rulestr, ":");
819 	resourcestr = strsep(&rulestr, ":");
820 	actionstr = strsep(&rulestr, "=/");
821 	amountstr = strsep(&rulestr, "/");
822 	perstr = rulestr;
823 
824 	if (subjectstr == NULL || subjectstr[0] == '\0')
825 		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
826 	else {
827 		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
828 		if (error != 0)
829 			goto out;
830 	}
831 
832 	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
833 		rule->rr_subject.rs_proc = NULL;
834 		rule->rr_subject.rs_uip = NULL;
835 		rule->rr_subject.rs_loginclass = NULL;
836 		rule->rr_subject.rs_prison = NULL;
837 	} else {
838 		switch (rule->rr_subject_type) {
839 		case RCTL_SUBJECT_TYPE_UNDEFINED:
840 			error = EINVAL;
841 			goto out;
842 		case RCTL_SUBJECT_TYPE_PROCESS:
843 			error = str2id(subject_idstr, &id);
844 			if (error != 0)
845 				goto out;
846 			sx_assert(&allproc_lock, SA_LOCKED);
847 			rule->rr_subject.rs_proc = pfind(id);
848 			if (rule->rr_subject.rs_proc == NULL) {
849 				error = ESRCH;
850 				goto out;
851 			}
852 			PROC_UNLOCK(rule->rr_subject.rs_proc);
853 			break;
854 		case RCTL_SUBJECT_TYPE_USER:
855 			error = str2id(subject_idstr, &id);
856 			if (error != 0)
857 				goto out;
858 			rule->rr_subject.rs_uip = uifind(id);
859 			break;
860 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
861 			rule->rr_subject.rs_loginclass =
862 			    loginclass_find(subject_idstr);
863 			if (rule->rr_subject.rs_loginclass == NULL) {
864 				error = ENAMETOOLONG;
865 				goto out;
866 			}
867 			break;
868 		case RCTL_SUBJECT_TYPE_JAIL:
869 			rule->rr_subject.rs_prison =
870 			    prison_find_name(&prison0, subject_idstr);
871 			if (rule->rr_subject.rs_prison == NULL) {
872 				/*
873 				 * No jail with that name; try with the JID.
874 				 */
875 				error = str2id(subject_idstr, &id);
876 				if (error != 0)
877 					goto out;
878 				rule->rr_subject.rs_prison = prison_find(id);
879 				if (rule->rr_subject.rs_prison == NULL) {
880 					error = ESRCH;
881 					goto out;
882 				}
883 			}
884 			/* prison_find() returns with mutex held. */
885 			mtx_unlock(&rule->rr_subject.rs_prison->pr_mtx);
886 			break;
887                default:
888                        panic("rctl_string_to_rule: unknown subject type %d",
889                            rule->rr_subject_type);
890                }
891 	}
892 
893 	if (resourcestr == NULL || resourcestr[0] == '\0')
894 		rule->rr_resource = RACCT_UNDEFINED;
895 	else {
896 		error = str2value(resourcestr, &rule->rr_resource,
897 		    resourcenames);
898 		if (error != 0)
899 			goto out;
900 	}
901 
902 	if (actionstr == NULL || actionstr[0] == '\0')
903 		rule->rr_action = RCTL_ACTION_UNDEFINED;
904 	else {
905 		error = str2value(actionstr, &rule->rr_action, actionnames);
906 		if (error != 0)
907 			goto out;
908 	}
909 
910 	if (amountstr == NULL || amountstr[0] == '\0')
911 		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
912 	else {
913 		error = str2int64(amountstr, &rule->rr_amount);
914 		if (error != 0)
915 			goto out;
916 		if (racct_is_in_thousands(rule->rr_resource))
917 			rule->rr_amount *= 1000;
918 	}
919 
920 	if (perstr == NULL || perstr[0] == '\0')
921 		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
922 	else {
923 		error = str2value(perstr, &rule->rr_per, subjectnames);
924 		if (error != 0)
925 			goto out;
926 	}
927 
928 out:
929 	if (error == 0)
930 		*rulep = rule;
931 	else
932 		rctl_rule_release(rule);
933 
934 	return (error);
935 }
936 
937 /*
938  * Link a rule with all the subjects it applies to.
939  */
940 int
941 rctl_rule_add(struct rctl_rule *rule)
942 {
943 	struct proc *p;
944 	struct ucred *cred;
945 	struct uidinfo *uip;
946 	struct prison *pr;
947 	struct loginclass *lc;
948 	struct rctl_rule *rule2;
949 	int match;
950 
951 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
952 
953 	/*
954 	 * Some rules just don't make sense.  Note that the one below
955 	 * cannot be rewritten using racct_is_deniable(); the RACCT_PCTCPU,
956 	 * for example, is not deniable in the racct sense, but the
957 	 * limit is enforced in a different way, so "deny" rules for %CPU
958 	 * do make sense.
959 	 */
960 	if (rule->rr_action == RCTL_ACTION_DENY &&
961 	    (rule->rr_resource == RACCT_CPU ||
962 	    rule->rr_resource == RACCT_WALLCLOCK))
963 		return (EOPNOTSUPP);
964 
965 	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
966 	    racct_is_sloppy(rule->rr_resource))
967 		return (EOPNOTSUPP);
968 
969 	/*
970 	 * Make sure there are no duplicated rules.  Also, for the "deny"
971 	 * rules, remove ones differing only by "amount".
972 	 */
973 	if (rule->rr_action == RCTL_ACTION_DENY) {
974 		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
975 		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
976 		rctl_rule_remove(rule2);
977 		rctl_rule_release(rule2);
978 	} else
979 		rctl_rule_remove(rule);
980 
981 	switch (rule->rr_subject_type) {
982 	case RCTL_SUBJECT_TYPE_PROCESS:
983 		p = rule->rr_subject.rs_proc;
984 		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
985 		/*
986 		 * No resource limits for system processes.
987 		 */
988 		if (p->p_flag & P_SYSTEM)
989 			return (EPERM);
990 
991 		rctl_racct_add_rule(p->p_racct, rule);
992 		/*
993 		 * In case of per-process rule, we don't have anything more
994 		 * to do.
995 		 */
996 		return (0);
997 
998 	case RCTL_SUBJECT_TYPE_USER:
999 		uip = rule->rr_subject.rs_uip;
1000 		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1001 		rctl_racct_add_rule(uip->ui_racct, rule);
1002 		break;
1003 
1004 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1005 		lc = rule->rr_subject.rs_loginclass;
1006 		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1007 		rctl_racct_add_rule(lc->lc_racct, rule);
1008 		break;
1009 
1010 	case RCTL_SUBJECT_TYPE_JAIL:
1011 		pr = rule->rr_subject.rs_prison;
1012 		KASSERT(pr != NULL, ("rctl_rule_add: NULL pr"));
1013 		rctl_racct_add_rule(pr->pr_racct, rule);
1014 		break;
1015 
1016 	default:
1017 		panic("rctl_rule_add: unknown subject type %d",
1018 		    rule->rr_subject_type);
1019 	}
1020 
1021 	/*
1022 	 * Now go through all the processes and add the new rule to the ones
1023 	 * it applies to.
1024 	 */
1025 	sx_assert(&allproc_lock, SA_LOCKED);
1026 	FOREACH_PROC_IN_SYSTEM(p) {
1027 		if (p->p_flag & P_SYSTEM)
1028 			continue;
1029 		cred = p->p_ucred;
1030 		switch (rule->rr_subject_type) {
1031 		case RCTL_SUBJECT_TYPE_USER:
1032 			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1033 			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1034 				break;
1035 			continue;
1036 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1037 			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1038 				break;
1039 			continue;
1040 		case RCTL_SUBJECT_TYPE_JAIL:
1041 			match = 0;
1042 			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1043 				if (pr == rule->rr_subject.rs_prison) {
1044 					match = 1;
1045 					break;
1046 				}
1047 			}
1048 			if (match)
1049 				break;
1050 			continue;
1051 		default:
1052 			panic("rctl_rule_add: unknown subject type %d",
1053 			    rule->rr_subject_type);
1054 		}
1055 
1056 		rctl_racct_add_rule(p->p_racct, rule);
1057 	}
1058 
1059 	return (0);
1060 }
1061 
1062 static void
1063 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1064 {
1065 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1066 	int found = 0;
1067 
1068 	rw_wlock(&rctl_lock);
1069 	found += rctl_racct_remove_rules(racct, filter);
1070 	rw_wunlock(&rctl_lock);
1071 
1072 	*((int *)arg3) += found;
1073 }
1074 
1075 /*
1076  * Remove all rules that match the filter.
1077  */
1078 int
1079 rctl_rule_remove(struct rctl_rule *filter)
1080 {
1081 	int found = 0;
1082 	struct proc *p;
1083 
1084 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1085 	    filter->rr_subject.rs_proc != NULL) {
1086 		p = filter->rr_subject.rs_proc;
1087 		rw_wlock(&rctl_lock);
1088 		found = rctl_racct_remove_rules(p->p_racct, filter);
1089 		rw_wunlock(&rctl_lock);
1090 		if (found)
1091 			return (0);
1092 		return (ESRCH);
1093 	}
1094 
1095 	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1096 	    (void *)&found);
1097 	ui_racct_foreach(rctl_rule_remove_callback, filter,
1098 	    (void *)&found);
1099 	prison_racct_foreach(rctl_rule_remove_callback, filter,
1100 	    (void *)&found);
1101 
1102 	sx_assert(&allproc_lock, SA_LOCKED);
1103 	rw_wlock(&rctl_lock);
1104 	FOREACH_PROC_IN_SYSTEM(p) {
1105 		found += rctl_racct_remove_rules(p->p_racct, filter);
1106 	}
1107 	rw_wunlock(&rctl_lock);
1108 
1109 	if (found)
1110 		return (0);
1111 	return (ESRCH);
1112 }
1113 
1114 /*
1115  * Appends a rule to the sbuf.
1116  */
1117 static void
1118 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1119 {
1120 	int64_t amount;
1121 
1122 	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1123 
1124 	switch (rule->rr_subject_type) {
1125 	case RCTL_SUBJECT_TYPE_PROCESS:
1126 		if (rule->rr_subject.rs_proc == NULL)
1127 			sbuf_printf(sb, ":");
1128 		else
1129 			sbuf_printf(sb, "%d:",
1130 			    rule->rr_subject.rs_proc->p_pid);
1131 		break;
1132 	case RCTL_SUBJECT_TYPE_USER:
1133 		if (rule->rr_subject.rs_uip == NULL)
1134 			sbuf_printf(sb, ":");
1135 		else
1136 			sbuf_printf(sb, "%d:",
1137 			    rule->rr_subject.rs_uip->ui_uid);
1138 		break;
1139 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1140 		if (rule->rr_subject.rs_loginclass == NULL)
1141 			sbuf_printf(sb, ":");
1142 		else
1143 			sbuf_printf(sb, "%s:",
1144 			    rule->rr_subject.rs_loginclass->lc_name);
1145 		break;
1146 	case RCTL_SUBJECT_TYPE_JAIL:
1147 		if (rule->rr_subject.rs_prison == NULL)
1148 			sbuf_printf(sb, ":");
1149 		else
1150 			sbuf_printf(sb, "%s:",
1151 			    rule->rr_subject.rs_prison->pr_name);
1152 		break;
1153 	default:
1154 		panic("rctl_rule_to_sbuf: unknown subject type %d",
1155 		    rule->rr_subject_type);
1156 	}
1157 
1158 	amount = rule->rr_amount;
1159 	if (amount != RCTL_AMOUNT_UNDEFINED &&
1160 	    racct_is_in_thousands(rule->rr_resource))
1161 		amount /= 1000;
1162 
1163 	sbuf_printf(sb, "%s:%s=%jd",
1164 	    rctl_resource_name(rule->rr_resource),
1165 	    rctl_action_name(rule->rr_action),
1166 	    amount);
1167 
1168 	if (rule->rr_per != rule->rr_subject_type)
1169 		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1170 }
1171 
1172 /*
1173  * Routine used by RCTL syscalls to read in input string.
1174  */
1175 static int
1176 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1177 {
1178 	int error;
1179 	char *str;
1180 
1181 	if (inbuflen <= 0)
1182 		return (EINVAL);
1183 
1184 	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1185 	error = copyinstr(inbufp, str, inbuflen, NULL);
1186 	if (error != 0) {
1187 		free(str, M_RCTL);
1188 		return (error);
1189 	}
1190 
1191 	*inputstr = str;
1192 
1193 	return (0);
1194 }
1195 
1196 /*
1197  * Routine used by RCTL syscalls to write out output string.
1198  */
1199 static int
1200 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1201 {
1202 	int error;
1203 
1204 	if (outputsbuf == NULL)
1205 		return (0);
1206 
1207 	sbuf_finish(outputsbuf);
1208 	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1209 		sbuf_delete(outputsbuf);
1210 		return (ERANGE);
1211 	}
1212 	error = copyout(sbuf_data(outputsbuf), outbufp,
1213 	    sbuf_len(outputsbuf) + 1);
1214 	sbuf_delete(outputsbuf);
1215 	return (error);
1216 }
1217 
1218 static struct sbuf *
1219 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1220 {
1221 	int i;
1222 	int64_t amount;
1223 	struct sbuf *sb;
1224 
1225 	sb = sbuf_new_auto();
1226 	for (i = 0; i <= RACCT_MAX; i++) {
1227 		if (sloppy == 0 && racct_is_sloppy(i))
1228 			continue;
1229 		amount = racct->r_resources[i];
1230 		if (racct_is_in_thousands(i))
1231 			amount /= 1000;
1232 		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1233 	}
1234 	sbuf_setpos(sb, sbuf_len(sb) - 1);
1235 	return (sb);
1236 }
1237 
1238 int
1239 rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1240 {
1241 	int error;
1242 	char *inputstr;
1243 	struct rctl_rule *filter;
1244 	struct sbuf *outputsbuf = NULL;
1245 	struct proc *p;
1246 	struct uidinfo *uip;
1247 	struct loginclass *lc;
1248 	struct prison *pr;
1249 
1250 	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1251 	if (error != 0)
1252 		return (error);
1253 
1254 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1255 	if (error != 0)
1256 		return (error);
1257 
1258 	sx_slock(&allproc_lock);
1259 	sx_slock(&allprison_lock);
1260 	error = rctl_string_to_rule(inputstr, &filter);
1261 	free(inputstr, M_RCTL);
1262 	if (error != 0) {
1263 		sx_sunlock(&allprison_lock);
1264 		sx_sunlock(&allproc_lock);
1265 		return (error);
1266 	}
1267 
1268 	switch (filter->rr_subject_type) {
1269 	case RCTL_SUBJECT_TYPE_PROCESS:
1270 		p = filter->rr_subject.rs_proc;
1271 		if (p == NULL) {
1272 			error = EINVAL;
1273 			goto out;
1274 		}
1275 		if (p->p_flag & P_SYSTEM) {
1276 			error = EINVAL;
1277 			goto out;
1278 		}
1279 		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1280 		break;
1281 	case RCTL_SUBJECT_TYPE_USER:
1282 		uip = filter->rr_subject.rs_uip;
1283 		if (uip == NULL) {
1284 			error = EINVAL;
1285 			goto out;
1286 		}
1287 		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1288 		break;
1289 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1290 		lc = filter->rr_subject.rs_loginclass;
1291 		if (lc == NULL) {
1292 			error = EINVAL;
1293 			goto out;
1294 		}
1295 		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1296 		break;
1297 	case RCTL_SUBJECT_TYPE_JAIL:
1298 		pr = filter->rr_subject.rs_prison;
1299 		if (pr == NULL) {
1300 			error = EINVAL;
1301 			goto out;
1302 		}
1303 		outputsbuf = rctl_racct_to_sbuf(pr->pr_racct, 1);
1304 		break;
1305 	default:
1306 		error = EINVAL;
1307 	}
1308 out:
1309 	rctl_rule_release(filter);
1310 	sx_sunlock(&allprison_lock);
1311 	sx_sunlock(&allproc_lock);
1312 	if (error != 0)
1313 		return (error);
1314 
1315 	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1316 
1317 	return (error);
1318 }
1319 
1320 static void
1321 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1322 {
1323 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1324 	struct rctl_rule_link *link;
1325 	struct sbuf *sb = (struct sbuf *)arg3;
1326 
1327 	rw_rlock(&rctl_lock);
1328 	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1329 		if (!rctl_rule_matches(link->rrl_rule, filter))
1330 			continue;
1331 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1332 		sbuf_printf(sb, ",");
1333 	}
1334 	rw_runlock(&rctl_lock);
1335 }
1336 
1337 int
1338 rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1339 {
1340 	int error;
1341 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1342 	char *inputstr, *buf;
1343 	struct sbuf *sb;
1344 	struct rctl_rule *filter;
1345 	struct rctl_rule_link *link;
1346 	struct proc *p;
1347 
1348 	error = priv_check(td, PRIV_RCTL_GET_RULES);
1349 	if (error != 0)
1350 		return (error);
1351 
1352 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1353 	if (error != 0)
1354 		return (error);
1355 
1356 	sx_slock(&allproc_lock);
1357 	sx_slock(&allprison_lock);
1358 	error = rctl_string_to_rule(inputstr, &filter);
1359 	free(inputstr, M_RCTL);
1360 	if (error != 0) {
1361 		sx_sunlock(&allprison_lock);
1362 		sx_sunlock(&allproc_lock);
1363 		return (error);
1364 	}
1365 
1366 again:
1367 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1368 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1369 	KASSERT(sb != NULL, ("sbuf_new failed"));
1370 
1371 	sx_assert(&allproc_lock, SA_LOCKED);
1372 	FOREACH_PROC_IN_SYSTEM(p) {
1373 		rw_rlock(&rctl_lock);
1374 		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1375 			/*
1376 			 * Non-process rules will be added to the buffer later.
1377 			 * Adding them here would result in duplicated output.
1378 			 */
1379 			if (link->rrl_rule->rr_subject_type !=
1380 			    RCTL_SUBJECT_TYPE_PROCESS)
1381 				continue;
1382 			if (!rctl_rule_matches(link->rrl_rule, filter))
1383 				continue;
1384 			rctl_rule_to_sbuf(sb, link->rrl_rule);
1385 			sbuf_printf(sb, ",");
1386 		}
1387 		rw_runlock(&rctl_lock);
1388 	}
1389 
1390 	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1391 	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1392 	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1393 	if (sbuf_error(sb) == ENOMEM) {
1394 		sbuf_delete(sb);
1395 		free(buf, M_RCTL);
1396 		bufsize *= 4;
1397 		goto again;
1398 	}
1399 
1400 	/*
1401 	 * Remove trailing ",".
1402 	 */
1403 	if (sbuf_len(sb) > 0)
1404 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1405 
1406 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1407 
1408 	rctl_rule_release(filter);
1409 	sx_sunlock(&allprison_lock);
1410 	sx_sunlock(&allproc_lock);
1411 	free(buf, M_RCTL);
1412 	return (error);
1413 }
1414 
1415 int
1416 rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1417 {
1418 	int error;
1419 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1420 	char *inputstr, *buf;
1421 	struct sbuf *sb;
1422 	struct rctl_rule *filter;
1423 	struct rctl_rule_link *link;
1424 
1425 	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1426 	if (error != 0)
1427 		return (error);
1428 
1429 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1430 	if (error != 0)
1431 		return (error);
1432 
1433 	sx_slock(&allproc_lock);
1434 	sx_slock(&allprison_lock);
1435 	error = rctl_string_to_rule(inputstr, &filter);
1436 	free(inputstr, M_RCTL);
1437 	if (error != 0) {
1438 		sx_sunlock(&allprison_lock);
1439 		sx_sunlock(&allproc_lock);
1440 		return (error);
1441 	}
1442 
1443 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1444 		rctl_rule_release(filter);
1445 		sx_sunlock(&allprison_lock);
1446 		sx_sunlock(&allproc_lock);
1447 		return (EINVAL);
1448 	}
1449 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1450 		rctl_rule_release(filter);
1451 		sx_sunlock(&allprison_lock);
1452 		sx_sunlock(&allproc_lock);
1453 		return (EOPNOTSUPP);
1454 	}
1455 	if (filter->rr_subject.rs_proc == NULL) {
1456 		rctl_rule_release(filter);
1457 		sx_sunlock(&allprison_lock);
1458 		sx_sunlock(&allproc_lock);
1459 		return (EINVAL);
1460 	}
1461 
1462 again:
1463 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1464 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1465 	KASSERT(sb != NULL, ("sbuf_new failed"));
1466 
1467 	rw_rlock(&rctl_lock);
1468 	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1469 	    rrl_next) {
1470 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1471 		sbuf_printf(sb, ",");
1472 	}
1473 	rw_runlock(&rctl_lock);
1474 	if (sbuf_error(sb) == ENOMEM) {
1475 		sbuf_delete(sb);
1476 		free(buf, M_RCTL);
1477 		bufsize *= 4;
1478 		goto again;
1479 	}
1480 
1481 	/*
1482 	 * Remove trailing ",".
1483 	 */
1484 	if (sbuf_len(sb) > 0)
1485 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1486 
1487 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1488 	rctl_rule_release(filter);
1489 	sx_sunlock(&allprison_lock);
1490 	sx_sunlock(&allproc_lock);
1491 	free(buf, M_RCTL);
1492 	return (error);
1493 }
1494 
1495 int
1496 rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1497 {
1498 	int error;
1499 	struct rctl_rule *rule;
1500 	char *inputstr;
1501 
1502 	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1503 	if (error != 0)
1504 		return (error);
1505 
1506 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1507 	if (error != 0)
1508 		return (error);
1509 
1510 	sx_slock(&allproc_lock);
1511 	sx_slock(&allprison_lock);
1512 	error = rctl_string_to_rule(inputstr, &rule);
1513 	free(inputstr, M_RCTL);
1514 	if (error != 0) {
1515 		sx_sunlock(&allprison_lock);
1516 		sx_sunlock(&allproc_lock);
1517 		return (error);
1518 	}
1519 	/*
1520 	 * The 'per' part of a rule is optional.
1521 	 */
1522 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1523 	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1524 		rule->rr_per = rule->rr_subject_type;
1525 
1526 	if (!rctl_rule_fully_specified(rule)) {
1527 		error = EINVAL;
1528 		goto out;
1529 	}
1530 
1531 	error = rctl_rule_add(rule);
1532 
1533 out:
1534 	rctl_rule_release(rule);
1535 	sx_sunlock(&allprison_lock);
1536 	sx_sunlock(&allproc_lock);
1537 	return (error);
1538 }
1539 
1540 int
1541 rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1542 {
1543 	int error;
1544 	struct rctl_rule *filter;
1545 	char *inputstr;
1546 
1547 	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1548 	if (error != 0)
1549 		return (error);
1550 
1551 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1552 	if (error != 0)
1553 		return (error);
1554 
1555 	sx_slock(&allproc_lock);
1556 	sx_slock(&allprison_lock);
1557 	error = rctl_string_to_rule(inputstr, &filter);
1558 	free(inputstr, M_RCTL);
1559 	if (error != 0) {
1560 		sx_sunlock(&allprison_lock);
1561 		sx_sunlock(&allproc_lock);
1562 		return (error);
1563 	}
1564 
1565 	error = rctl_rule_remove(filter);
1566 	rctl_rule_release(filter);
1567 	sx_sunlock(&allprison_lock);
1568 	sx_sunlock(&allproc_lock);
1569 
1570 	return (error);
1571 }
1572 
1573 /*
1574  * Update RCTL rule list after credential change.
1575  */
1576 void
1577 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1578 {
1579 	int rulecnt, i;
1580 	struct rctl_rule_link *link, *newlink;
1581 	struct uidinfo *newuip;
1582 	struct loginclass *newlc;
1583 	struct prison *newpr;
1584 	LIST_HEAD(, rctl_rule_link) newrules;
1585 
1586 	newuip = newcred->cr_ruidinfo;
1587 	newlc = newcred->cr_loginclass;
1588 	newpr = newcred->cr_prison;
1589 
1590 	LIST_INIT(&newrules);
1591 
1592 again:
1593 	/*
1594 	 * First, count the rules that apply to the process with new
1595 	 * credentials.
1596 	 */
1597 	rulecnt = 0;
1598 	rw_rlock(&rctl_lock);
1599 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1600 		if (link->rrl_rule->rr_subject_type ==
1601 		    RCTL_SUBJECT_TYPE_PROCESS)
1602 			rulecnt++;
1603 	}
1604 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1605 		rulecnt++;
1606 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1607 		rulecnt++;
1608 	LIST_FOREACH(link, &newpr->pr_racct->r_rule_links, rrl_next)
1609 		rulecnt++;
1610 	rw_runlock(&rctl_lock);
1611 
1612 	/*
1613 	 * Create temporary list.  We've dropped the rctl_lock in order
1614 	 * to use M_WAITOK.
1615 	 */
1616 	for (i = 0; i < rulecnt; i++) {
1617 		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1618 		newlink->rrl_rule = NULL;
1619 		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1620 	}
1621 
1622 	newlink = LIST_FIRST(&newrules);
1623 
1624 	/*
1625 	 * Assign rules to the newly allocated list entries.
1626 	 */
1627 	rw_wlock(&rctl_lock);
1628 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1629 		if (link->rrl_rule->rr_subject_type ==
1630 		    RCTL_SUBJECT_TYPE_PROCESS) {
1631 			if (newlink == NULL)
1632 				goto goaround;
1633 			rctl_rule_acquire(link->rrl_rule);
1634 			newlink->rrl_rule = link->rrl_rule;
1635 			newlink = LIST_NEXT(newlink, rrl_next);
1636 			rulecnt--;
1637 		}
1638 	}
1639 
1640 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1641 		if (newlink == NULL)
1642 			goto goaround;
1643 		rctl_rule_acquire(link->rrl_rule);
1644 		newlink->rrl_rule = link->rrl_rule;
1645 		newlink = LIST_NEXT(newlink, rrl_next);
1646 		rulecnt--;
1647 	}
1648 
1649 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1650 		if (newlink == NULL)
1651 			goto goaround;
1652 		rctl_rule_acquire(link->rrl_rule);
1653 		newlink->rrl_rule = link->rrl_rule;
1654 		newlink = LIST_NEXT(newlink, rrl_next);
1655 		rulecnt--;
1656 	}
1657 
1658 	LIST_FOREACH(link, &newpr->pr_racct->r_rule_links, rrl_next) {
1659 		if (newlink == NULL)
1660 			goto goaround;
1661 		rctl_rule_acquire(link->rrl_rule);
1662 		newlink->rrl_rule = link->rrl_rule;
1663 		newlink = LIST_NEXT(newlink, rrl_next);
1664 		rulecnt--;
1665 	}
1666 
1667 	if (rulecnt == 0) {
1668 		/*
1669 		 * Free the old rule list.
1670 		 */
1671 		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1672 			link = LIST_FIRST(&p->p_racct->r_rule_links);
1673 			LIST_REMOVE(link, rrl_next);
1674 			rctl_rule_release(link->rrl_rule);
1675 			uma_zfree(rctl_rule_link_zone, link);
1676 		}
1677 
1678 		/*
1679 		 * Replace lists and we're done.
1680 		 *
1681 		 * XXX: Is there any way to switch list heads instead
1682 		 *      of iterating here?
1683 		 */
1684 		while (!LIST_EMPTY(&newrules)) {
1685 			newlink = LIST_FIRST(&newrules);
1686 			LIST_REMOVE(newlink, rrl_next);
1687 			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1688 			    newlink, rrl_next);
1689 		}
1690 
1691 		rw_wunlock(&rctl_lock);
1692 
1693 		return;
1694 	}
1695 
1696 goaround:
1697 	rw_wunlock(&rctl_lock);
1698 
1699 	/*
1700 	 * Rule list changed while we were not holding the rctl_lock.
1701 	 * Free the new list and try again.
1702 	 */
1703 	while (!LIST_EMPTY(&newrules)) {
1704 		newlink = LIST_FIRST(&newrules);
1705 		LIST_REMOVE(newlink, rrl_next);
1706 		if (newlink->rrl_rule != NULL)
1707 			rctl_rule_release(newlink->rrl_rule);
1708 		uma_zfree(rctl_rule_link_zone, newlink);
1709 	}
1710 
1711 	goto again;
1712 }
1713 
1714 /*
1715  * Assign RCTL rules to the newly created process.
1716  */
1717 int
1718 rctl_proc_fork(struct proc *parent, struct proc *child)
1719 {
1720 	int error;
1721 	struct rctl_rule_link *link;
1722 	struct rctl_rule *rule;
1723 
1724 	LIST_INIT(&child->p_racct->r_rule_links);
1725 
1726 	/*
1727 	 * No limits for kernel processes.
1728 	 */
1729 	if (child->p_flag & P_SYSTEM)
1730 		return (0);
1731 
1732 	/*
1733 	 * Nothing to inherit from P_SYSTEM parents.
1734 	 */
1735 	if (parent->p_racct == NULL) {
1736 		KASSERT(parent->p_flag & P_SYSTEM,
1737 		    ("non-system process without racct; p = %p", parent));
1738 		return (0);
1739 	}
1740 
1741 	rw_wlock(&rctl_lock);
1742 
1743 	/*
1744 	 * Go through limits applicable to the parent and assign them
1745 	 * to the child.  Rules with 'process' subject have to be duplicated
1746 	 * in order to make their rr_subject point to the new process.
1747 	 */
1748 	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1749 		if (link->rrl_rule->rr_subject_type ==
1750 		    RCTL_SUBJECT_TYPE_PROCESS) {
1751 			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1752 			if (rule == NULL)
1753 				goto fail;
1754 			KASSERT(rule->rr_subject.rs_proc == parent,
1755 			    ("rule->rr_subject.rs_proc != parent"));
1756 			rule->rr_subject.rs_proc = child;
1757 			error = rctl_racct_add_rule_locked(child->p_racct,
1758 			    rule);
1759 			rctl_rule_release(rule);
1760 			if (error != 0)
1761 				goto fail;
1762 		} else {
1763 			error = rctl_racct_add_rule_locked(child->p_racct,
1764 			    link->rrl_rule);
1765 			if (error != 0)
1766 				goto fail;
1767 		}
1768 	}
1769 
1770 	rw_wunlock(&rctl_lock);
1771 	return (0);
1772 
1773 fail:
1774 	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1775 		link = LIST_FIRST(&child->p_racct->r_rule_links);
1776 		LIST_REMOVE(link, rrl_next);
1777 		rctl_rule_release(link->rrl_rule);
1778 		uma_zfree(rctl_rule_link_zone, link);
1779 	}
1780 	rw_wunlock(&rctl_lock);
1781 	return (EAGAIN);
1782 }
1783 
1784 /*
1785  * Release rules attached to the racct.
1786  */
1787 void
1788 rctl_racct_release(struct racct *racct)
1789 {
1790 	struct rctl_rule_link *link;
1791 
1792 	rw_wlock(&rctl_lock);
1793 	while (!LIST_EMPTY(&racct->r_rule_links)) {
1794 		link = LIST_FIRST(&racct->r_rule_links);
1795 		LIST_REMOVE(link, rrl_next);
1796 		rctl_rule_release(link->rrl_rule);
1797 		uma_zfree(rctl_rule_link_zone, link);
1798 	}
1799 	rw_wunlock(&rctl_lock);
1800 }
1801 
1802 static void
1803 rctl_init(void)
1804 {
1805 
1806 	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1807 	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1808 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1809 	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1810 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1811 }
1812 
1813 #else /* !RCTL */
1814 
1815 int
1816 rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1817 {
1818 
1819 	return (ENOSYS);
1820 }
1821 
1822 int
1823 rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1824 {
1825 
1826 	return (ENOSYS);
1827 }
1828 
1829 int
1830 rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1831 {
1832 
1833 	return (ENOSYS);
1834 }
1835 
1836 int
1837 rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1838 {
1839 
1840 	return (ENOSYS);
1841 }
1842 
1843 int
1844 rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1845 {
1846 
1847 	return (ENOSYS);
1848 }
1849 
1850 #endif /* !RCTL */
1851