xref: /freebsd/sys/kern/kern_rctl.c (revision a10cee30c94cf5944826d2a495e9cdf339dfbcc8)
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62 
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67 
68 FEATURE(rctl, "Resource Limits");
69 
70 #define	HRF_DEFAULT		0
71 #define	HRF_DONT_INHERIT	1
72 #define	HRF_DONT_ACCUMULATE	2
73 
74 /* Default buffer size for rctl_get_rules(2). */
75 #define	RCTL_DEFAULT_BUFSIZE	4096
76 #define	RCTL_MAX_INBUFLEN	4096
77 #define	RCTL_LOG_BUFSIZE	128
78 
79 /*
80  * 'rctl_rule_link' connects a rule with every racct it's related to.
81  * For example, rule 'user:X:openfiles:deny=N/process' is linked
82  * with uidinfo for user X, and to each process of that user.
83  */
84 struct rctl_rule_link {
85 	LIST_ENTRY(rctl_rule_link)	rrl_next;
86 	struct rctl_rule		*rrl_rule;
87 	int				rrl_exceeded;
88 };
89 
90 struct dict {
91 	const char	*d_name;
92 	int		d_value;
93 };
94 
95 static struct dict subjectnames[] = {
96 	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
97 	{ "user", RCTL_SUBJECT_TYPE_USER },
98 	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
99 	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
100 	{ NULL, -1 }};
101 
102 static struct dict resourcenames[] = {
103 	{ "cputime", RACCT_CPU },
104 	{ "datasize", RACCT_DATA },
105 	{ "stacksize", RACCT_STACK },
106 	{ "coredumpsize", RACCT_CORE },
107 	{ "memoryuse", RACCT_RSS },
108 	{ "memorylocked", RACCT_MEMLOCK },
109 	{ "maxproc", RACCT_NPROC },
110 	{ "openfiles", RACCT_NOFILE },
111 	{ "vmemoryuse", RACCT_VMEM },
112 	{ "pseudoterminals", RACCT_NPTS },
113 	{ "swapuse", RACCT_SWAP },
114 	{ "nthr", RACCT_NTHR },
115 	{ "msgqqueued", RACCT_MSGQQUEUED },
116 	{ "msgqsize", RACCT_MSGQSIZE },
117 	{ "nmsgq", RACCT_NMSGQ },
118 	{ "nsem", RACCT_NSEM },
119 	{ "nsemop", RACCT_NSEMOP },
120 	{ "nshm", RACCT_NSHM },
121 	{ "shmsize", RACCT_SHMSIZE },
122 	{ "wallclock", RACCT_WALLCLOCK },
123 	{ NULL, -1 }};
124 
125 static struct dict actionnames[] = {
126 	{ "sighup", RCTL_ACTION_SIGHUP },
127 	{ "sigint", RCTL_ACTION_SIGINT },
128 	{ "sigquit", RCTL_ACTION_SIGQUIT },
129 	{ "sigill", RCTL_ACTION_SIGILL },
130 	{ "sigtrap", RCTL_ACTION_SIGTRAP },
131 	{ "sigabrt", RCTL_ACTION_SIGABRT },
132 	{ "sigemt", RCTL_ACTION_SIGEMT },
133 	{ "sigfpe", RCTL_ACTION_SIGFPE },
134 	{ "sigkill", RCTL_ACTION_SIGKILL },
135 	{ "sigbus", RCTL_ACTION_SIGBUS },
136 	{ "sigsegv", RCTL_ACTION_SIGSEGV },
137 	{ "sigsys", RCTL_ACTION_SIGSYS },
138 	{ "sigpipe", RCTL_ACTION_SIGPIPE },
139 	{ "sigalrm", RCTL_ACTION_SIGALRM },
140 	{ "sigterm", RCTL_ACTION_SIGTERM },
141 	{ "sigurg", RCTL_ACTION_SIGURG },
142 	{ "sigstop", RCTL_ACTION_SIGSTOP },
143 	{ "sigtstp", RCTL_ACTION_SIGTSTP },
144 	{ "sigchld", RCTL_ACTION_SIGCHLD },
145 	{ "sigttin", RCTL_ACTION_SIGTTIN },
146 	{ "sigttou", RCTL_ACTION_SIGTTOU },
147 	{ "sigio", RCTL_ACTION_SIGIO },
148 	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
149 	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
150 	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
151 	{ "sigprof", RCTL_ACTION_SIGPROF },
152 	{ "sigwinch", RCTL_ACTION_SIGWINCH },
153 	{ "siginfo", RCTL_ACTION_SIGINFO },
154 	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
155 	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
156 	{ "sigthr", RCTL_ACTION_SIGTHR },
157 	{ "deny", RCTL_ACTION_DENY },
158 	{ "log", RCTL_ACTION_LOG },
159 	{ "devctl", RCTL_ACTION_DEVCTL },
160 	{ NULL, -1 }};
161 
162 static void rctl_init(void);
163 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
164 
165 static uma_zone_t rctl_rule_link_zone;
166 static uma_zone_t rctl_rule_zone;
167 static struct rwlock rctl_lock;
168 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
169 
170 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
171 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
172 
173 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
174 
175 static const char *
176 rctl_subject_type_name(int subject)
177 {
178 	int i;
179 
180 	for (i = 0; subjectnames[i].d_name != NULL; i++) {
181 		if (subjectnames[i].d_value == subject)
182 			return (subjectnames[i].d_name);
183 	}
184 
185 	panic("rctl_subject_type_name: unknown subject type %d", subject);
186 }
187 
188 static const char *
189 rctl_action_name(int action)
190 {
191 	int i;
192 
193 	for (i = 0; actionnames[i].d_name != NULL; i++) {
194 		if (actionnames[i].d_value == action)
195 			return (actionnames[i].d_name);
196 	}
197 
198 	panic("rctl_action_name: unknown action %d", action);
199 }
200 
201 const char *
202 rctl_resource_name(int resource)
203 {
204 	int i;
205 
206 	for (i = 0; resourcenames[i].d_name != NULL; i++) {
207 		if (resourcenames[i].d_value == resource)
208 			return (resourcenames[i].d_name);
209 	}
210 
211 	panic("rctl_resource_name: unknown resource %d", resource);
212 }
213 
214 /*
215  * Return the amount of resource that can be allocated by 'p' before
216  * hitting 'rule'.
217  */
218 static int64_t
219 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
220 {
221 	int resource;
222 	int64_t available = INT64_MAX;
223 	struct ucred *cred = p->p_ucred;
224 
225 	rw_assert(&rctl_lock, RA_LOCKED);
226 
227 	resource = rule->rr_resource;
228 	switch (rule->rr_per) {
229 	case RCTL_SUBJECT_TYPE_PROCESS:
230 		available = rule->rr_amount -
231 		    p->p_racct->r_resources[resource];
232 		break;
233 	case RCTL_SUBJECT_TYPE_USER:
234 		available = rule->rr_amount -
235 		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
236 		break;
237 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
238 		available = rule->rr_amount -
239 		    cred->cr_loginclass->lc_racct->r_resources[resource];
240 		break;
241 	case RCTL_SUBJECT_TYPE_JAIL:
242 		available = rule->rr_amount -
243 		    cred->cr_prison->pr_prison_racct->prr_racct->
244 		        r_resources[resource];
245 		break;
246 	default:
247 		panic("rctl_compute_available: unknown per %d",
248 		    rule->rr_per);
249 	}
250 
251 	return (available);
252 }
253 
254 /*
255  * Return non-zero if allocating 'amount' by proc 'p' would exceed
256  * resource limit specified by 'rule'.
257  */
258 static int
259 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
260     int64_t amount)
261 {
262 	int64_t available;
263 
264 	rw_assert(&rctl_lock, RA_LOCKED);
265 
266 	available = rctl_available_resource(p, rule);
267 	if (available >= amount)
268 		return (0);
269 
270 	return (1);
271 }
272 
273 /*
274  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
275  * to what it keeps allocated now.  Returns non-zero if the allocation should
276  * be denied, 0 otherwise.
277  */
278 int
279 rctl_enforce(struct proc *p, int resource, uint64_t amount)
280 {
281 	struct rctl_rule *rule;
282 	struct rctl_rule_link *link;
283 	struct sbuf sb;
284 	int should_deny = 0;
285 	char *buf;
286 	static int curtime = 0;
287 	static struct timeval lasttime;
288 
289 	rw_rlock(&rctl_lock);
290 
291 	/*
292 	 * There may be more than one matching rule; go through all of them.
293 	 * Denial should be done last, after logging and sending signals.
294 	 */
295 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
296 		rule = link->rrl_rule;
297 		if (rule->rr_resource != resource)
298 			continue;
299 		if (!rctl_would_exceed(p, rule, amount)) {
300 			link->rrl_exceeded = 0;
301 			continue;
302 		}
303 
304 		switch (rule->rr_action) {
305 		case RCTL_ACTION_DENY:
306 			should_deny = 1;
307 			continue;
308 		case RCTL_ACTION_LOG:
309 			/*
310 			 * If rrl_exceeded != 0, it means we've already
311 			 * logged a warning for this process.
312 			 */
313 			if (link->rrl_exceeded != 0)
314 				continue;
315 
316 			/*
317 			 * If the process state is not fully initialized yet,
318 			 * we can't access most of the required fields, e.g.
319 			 * p->p_comm.  This happens when called from fork1().
320 			 * Ignore this rule for now; it will be processed just
321 			 * after fork, when called from racct_proc_fork_done().
322 			 */
323 			if (p->p_state != PRS_NORMAL)
324 				continue;
325 
326 			if (!ppsratecheck(&lasttime, &curtime, 10))
327 				continue;
328 
329 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
330 			if (buf == NULL) {
331 				printf("rctl_enforce: out of memory\n");
332 				continue;
333 			}
334 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
335 			rctl_rule_to_sbuf(&sb, rule);
336 			sbuf_finish(&sb);
337 			printf("rctl: rule \"%s\" matched by pid %d "
338 			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
339 			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
340 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
341 			sbuf_delete(&sb);
342 			free(buf, M_RCTL);
343 			link->rrl_exceeded = 1;
344 			continue;
345 		case RCTL_ACTION_DEVCTL:
346 			if (link->rrl_exceeded != 0)
347 				continue;
348 
349 			if (p->p_state != PRS_NORMAL)
350 				continue;
351 
352 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
353 			if (buf == NULL) {
354 				printf("rctl_enforce: out of memory\n");
355 				continue;
356 			}
357 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
358 			sbuf_printf(&sb, "rule=");
359 			rctl_rule_to_sbuf(&sb, rule);
360 			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
361 			    p->p_pid, p->p_ucred->cr_ruid,
362 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
363 			sbuf_finish(&sb);
364 			devctl_notify_f("RCTL", "rule", "matched",
365 			    sbuf_data(&sb), M_NOWAIT);
366 			sbuf_delete(&sb);
367 			free(buf, M_RCTL);
368 			link->rrl_exceeded = 1;
369 			continue;
370 		default:
371 			if (link->rrl_exceeded != 0)
372 				continue;
373 
374 			if (p->p_state != PRS_NORMAL)
375 				continue;
376 
377 			KASSERT(rule->rr_action > 0 &&
378 			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
379 			    ("rctl_enforce: unknown action %d",
380 			     rule->rr_action));
381 
382 			/*
383 			 * We're using the fact that RCTL_ACTION_SIG* values
384 			 * are equal to their counterparts from sys/signal.h.
385 			 */
386 			kern_psignal(p, rule->rr_action);
387 			link->rrl_exceeded = 1;
388 			continue;
389 		}
390 	}
391 
392 	rw_runlock(&rctl_lock);
393 
394 	if (should_deny) {
395 		/*
396 		 * Return fake error code; the caller should change it
397 		 * into one proper for the situation - EFSIZ, ENOMEM etc.
398 		 */
399 		return (EDOOFUS);
400 	}
401 
402 	return (0);
403 }
404 
405 uint64_t
406 rctl_get_limit(struct proc *p, int resource)
407 {
408 	struct rctl_rule *rule;
409 	struct rctl_rule_link *link;
410 	uint64_t amount = UINT64_MAX;
411 
412 	rw_rlock(&rctl_lock);
413 
414 	/*
415 	 * There may be more than one matching rule; go through all of them.
416 	 * Denial should be done last, after logging and sending signals.
417 	 */
418 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
419 		rule = link->rrl_rule;
420 		if (rule->rr_resource != resource)
421 			continue;
422 		if (rule->rr_action != RCTL_ACTION_DENY)
423 			continue;
424 		if (rule->rr_amount < amount)
425 			amount = rule->rr_amount;
426 	}
427 
428 	rw_runlock(&rctl_lock);
429 
430 	return (amount);
431 }
432 
433 uint64_t
434 rctl_get_available(struct proc *p, int resource)
435 {
436 	struct rctl_rule *rule;
437 	struct rctl_rule_link *link;
438 	int64_t available, minavailable, allocated;
439 
440 	minavailable = INT64_MAX;
441 
442 	rw_rlock(&rctl_lock);
443 
444 	/*
445 	 * There may be more than one matching rule; go through all of them.
446 	 * Denial should be done last, after logging and sending signals.
447 	 */
448 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
449 		rule = link->rrl_rule;
450 		if (rule->rr_resource != resource)
451 			continue;
452 		if (rule->rr_action != RCTL_ACTION_DENY)
453 			continue;
454 		available = rctl_available_resource(p, rule);
455 		if (available < minavailable)
456 			minavailable = available;
457 	}
458 
459 	rw_runlock(&rctl_lock);
460 
461 	/*
462 	 * XXX: Think about this _hard_.
463 	 */
464 	allocated = p->p_racct->r_resources[resource];
465 	if (minavailable < INT64_MAX - allocated)
466 		minavailable += allocated;
467 	if (minavailable < 0)
468 		minavailable = 0;
469 	return (minavailable);
470 }
471 
472 static int
473 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
474 {
475 
476 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
477 		if (rule->rr_subject_type != filter->rr_subject_type)
478 			return (0);
479 
480 		switch (filter->rr_subject_type) {
481 		case RCTL_SUBJECT_TYPE_PROCESS:
482 			if (filter->rr_subject.rs_proc != NULL &&
483 			    rule->rr_subject.rs_proc !=
484 			    filter->rr_subject.rs_proc)
485 				return (0);
486 			break;
487 		case RCTL_SUBJECT_TYPE_USER:
488 			if (filter->rr_subject.rs_uip != NULL &&
489 			    rule->rr_subject.rs_uip !=
490 			    filter->rr_subject.rs_uip)
491 				return (0);
492 			break;
493 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
494 			if (filter->rr_subject.rs_loginclass != NULL &&
495 			    rule->rr_subject.rs_loginclass !=
496 			    filter->rr_subject.rs_loginclass)
497 				return (0);
498 			break;
499 		case RCTL_SUBJECT_TYPE_JAIL:
500 			if (filter->rr_subject.rs_prison_racct != NULL &&
501 			    rule->rr_subject.rs_prison_racct !=
502 			    filter->rr_subject.rs_prison_racct)
503 				return (0);
504 			break;
505 		default:
506 			panic("rctl_rule_matches: unknown subject type %d",
507 			    filter->rr_subject_type);
508 		}
509 	}
510 
511 	if (filter->rr_resource != RACCT_UNDEFINED) {
512 		if (rule->rr_resource != filter->rr_resource)
513 			return (0);
514 	}
515 
516 	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
517 		if (rule->rr_action != filter->rr_action)
518 			return (0);
519 	}
520 
521 	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
522 		if (rule->rr_amount != filter->rr_amount)
523 			return (0);
524 	}
525 
526 	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
527 		if (rule->rr_per != filter->rr_per)
528 			return (0);
529 	}
530 
531 	return (1);
532 }
533 
534 static int
535 str2value(const char *str, int *value, struct dict *table)
536 {
537 	int i;
538 
539 	if (value == NULL)
540 		return (EINVAL);
541 
542 	for (i = 0; table[i].d_name != NULL; i++) {
543 		if (strcasecmp(table[i].d_name, str) == 0) {
544 			*value =  table[i].d_value;
545 			return (0);
546 		}
547 	}
548 
549 	return (EINVAL);
550 }
551 
552 static int
553 str2id(const char *str, id_t *value)
554 {
555 	char *end;
556 
557 	if (str == NULL)
558 		return (EINVAL);
559 
560 	*value = strtoul(str, &end, 10);
561 	if ((size_t)(end - str) != strlen(str))
562 		return (EINVAL);
563 
564 	return (0);
565 }
566 
567 static int
568 str2int64(const char *str, int64_t *value)
569 {
570 	char *end;
571 
572 	if (str == NULL)
573 		return (EINVAL);
574 
575 	*value = strtoul(str, &end, 10);
576 	if ((size_t)(end - str) != strlen(str))
577 		return (EINVAL);
578 
579 	return (0);
580 }
581 
582 /*
583  * Connect the rule to the racct, increasing refcount for the rule.
584  */
585 static void
586 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
587 {
588 	struct rctl_rule_link *link;
589 
590 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
591 
592 	rctl_rule_acquire(rule);
593 	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
594 	link->rrl_rule = rule;
595 	link->rrl_exceeded = 0;
596 
597 	rw_wlock(&rctl_lock);
598 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
599 	rw_wunlock(&rctl_lock);
600 }
601 
602 static int
603 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
604 {
605 	struct rctl_rule_link *link;
606 
607 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
608 	rw_assert(&rctl_lock, RA_WLOCKED);
609 
610 	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
611 	if (link == NULL)
612 		return (ENOMEM);
613 	rctl_rule_acquire(rule);
614 	link->rrl_rule = rule;
615 	link->rrl_exceeded = 0;
616 
617 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
618 	return (0);
619 }
620 
621 /*
622  * Remove limits for a rules matching the filter and release
623  * the refcounts for the rules, possibly freeing them.  Returns
624  * the number of limit structures removed.
625  */
626 static int
627 rctl_racct_remove_rules(struct racct *racct,
628     const struct rctl_rule *filter)
629 {
630 	int removed = 0;
631 	struct rctl_rule_link *link, *linktmp;
632 
633 	rw_assert(&rctl_lock, RA_WLOCKED);
634 
635 	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
636 		if (!rctl_rule_matches(link->rrl_rule, filter))
637 			continue;
638 
639 		LIST_REMOVE(link, rrl_next);
640 		rctl_rule_release(link->rrl_rule);
641 		uma_zfree(rctl_rule_link_zone, link);
642 		removed++;
643 	}
644 	return (removed);
645 }
646 
647 static void
648 rctl_rule_acquire_subject(struct rctl_rule *rule)
649 {
650 
651 	switch (rule->rr_subject_type) {
652 	case RCTL_SUBJECT_TYPE_UNDEFINED:
653 	case RCTL_SUBJECT_TYPE_PROCESS:
654 		break;
655 	case RCTL_SUBJECT_TYPE_JAIL:
656 		if (rule->rr_subject.rs_prison_racct != NULL)
657 			prison_racct_hold(rule->rr_subject.rs_prison_racct);
658 		break;
659 	case RCTL_SUBJECT_TYPE_USER:
660 		if (rule->rr_subject.rs_uip != NULL)
661 			uihold(rule->rr_subject.rs_uip);
662 		break;
663 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
664 		if (rule->rr_subject.rs_loginclass != NULL)
665 			loginclass_hold(rule->rr_subject.rs_loginclass);
666 		break;
667 	default:
668 		panic("rctl_rule_acquire_subject: unknown subject type %d",
669 		    rule->rr_subject_type);
670 	}
671 }
672 
673 static void
674 rctl_rule_release_subject(struct rctl_rule *rule)
675 {
676 
677 	switch (rule->rr_subject_type) {
678 	case RCTL_SUBJECT_TYPE_UNDEFINED:
679 	case RCTL_SUBJECT_TYPE_PROCESS:
680 		break;
681 	case RCTL_SUBJECT_TYPE_JAIL:
682 		if (rule->rr_subject.rs_prison_racct != NULL)
683 			prison_racct_free(rule->rr_subject.rs_prison_racct);
684 		break;
685 	case RCTL_SUBJECT_TYPE_USER:
686 		if (rule->rr_subject.rs_uip != NULL)
687 			uifree(rule->rr_subject.rs_uip);
688 		break;
689 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
690 		if (rule->rr_subject.rs_loginclass != NULL)
691 			loginclass_free(rule->rr_subject.rs_loginclass);
692 		break;
693 	default:
694 		panic("rctl_rule_release_subject: unknown subject type %d",
695 		    rule->rr_subject_type);
696 	}
697 }
698 
699 struct rctl_rule *
700 rctl_rule_alloc(int flags)
701 {
702 	struct rctl_rule *rule;
703 
704 	rule = uma_zalloc(rctl_rule_zone, flags);
705 	if (rule == NULL)
706 		return (NULL);
707 	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
708 	rule->rr_subject.rs_proc = NULL;
709 	rule->rr_subject.rs_uip = NULL;
710 	rule->rr_subject.rs_loginclass = NULL;
711 	rule->rr_subject.rs_prison_racct = NULL;
712 	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
713 	rule->rr_resource = RACCT_UNDEFINED;
714 	rule->rr_action = RCTL_ACTION_UNDEFINED;
715 	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
716 	refcount_init(&rule->rr_refcount, 1);
717 
718 	return (rule);
719 }
720 
721 struct rctl_rule *
722 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
723 {
724 	struct rctl_rule *copy;
725 
726 	copy = uma_zalloc(rctl_rule_zone, flags);
727 	if (copy == NULL)
728 		return (NULL);
729 	copy->rr_subject_type = rule->rr_subject_type;
730 	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
731 	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
732 	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
733 	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
734 	copy->rr_per = rule->rr_per;
735 	copy->rr_resource = rule->rr_resource;
736 	copy->rr_action = rule->rr_action;
737 	copy->rr_amount = rule->rr_amount;
738 	refcount_init(&copy->rr_refcount, 1);
739 	rctl_rule_acquire_subject(copy);
740 
741 	return (copy);
742 }
743 
744 void
745 rctl_rule_acquire(struct rctl_rule *rule)
746 {
747 
748 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
749 
750 	refcount_acquire(&rule->rr_refcount);
751 }
752 
753 static void
754 rctl_rule_free(void *context, int pending)
755 {
756 	struct rctl_rule *rule;
757 
758 	rule = (struct rctl_rule *)context;
759 
760 	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
761 
762 	/*
763 	 * We don't need locking here; rule is guaranteed to be inaccessible.
764 	 */
765 
766 	rctl_rule_release_subject(rule);
767 	uma_zfree(rctl_rule_zone, rule);
768 }
769 
770 void
771 rctl_rule_release(struct rctl_rule *rule)
772 {
773 
774 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
775 
776 	if (refcount_release(&rule->rr_refcount)) {
777 		/*
778 		 * rctl_rule_release() is often called when iterating
779 		 * over all the uidinfo structures in the system,
780 		 * holding uihashtbl_lock.  Since rctl_rule_free()
781 		 * might end up calling uifree(), this would lead
782 		 * to lock recursion.  Use taskqueue to avoid this.
783 		 */
784 		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
785 		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
786 	}
787 }
788 
789 static int
790 rctl_rule_fully_specified(const struct rctl_rule *rule)
791 {
792 
793 	switch (rule->rr_subject_type) {
794 	case RCTL_SUBJECT_TYPE_UNDEFINED:
795 		return (0);
796 	case RCTL_SUBJECT_TYPE_PROCESS:
797 		if (rule->rr_subject.rs_proc == NULL)
798 			return (0);
799 		break;
800 	case RCTL_SUBJECT_TYPE_USER:
801 		if (rule->rr_subject.rs_uip == NULL)
802 			return (0);
803 		break;
804 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
805 		if (rule->rr_subject.rs_loginclass == NULL)
806 			return (0);
807 		break;
808 	case RCTL_SUBJECT_TYPE_JAIL:
809 		if (rule->rr_subject.rs_prison_racct == NULL)
810 			return (0);
811 		break;
812 	default:
813 		panic("rctl_rule_fully_specified: unknown subject type %d",
814 		    rule->rr_subject_type);
815 	}
816 	if (rule->rr_resource == RACCT_UNDEFINED)
817 		return (0);
818 	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
819 		return (0);
820 	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
821 		return (0);
822 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
823 		return (0);
824 
825 	return (1);
826 }
827 
828 static int
829 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
830 {
831 	int error = 0;
832 	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
833 	     *amountstr, *perstr;
834 	struct rctl_rule *rule;
835 	id_t id;
836 
837 	rule = rctl_rule_alloc(M_WAITOK);
838 
839 	subjectstr = strsep(&rulestr, ":");
840 	subject_idstr = strsep(&rulestr, ":");
841 	resourcestr = strsep(&rulestr, ":");
842 	actionstr = strsep(&rulestr, "=/");
843 	amountstr = strsep(&rulestr, "/");
844 	perstr = rulestr;
845 
846 	if (subjectstr == NULL || subjectstr[0] == '\0')
847 		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
848 	else {
849 		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
850 		if (error != 0)
851 			goto out;
852 	}
853 
854 	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
855 		rule->rr_subject.rs_proc = NULL;
856 		rule->rr_subject.rs_uip = NULL;
857 		rule->rr_subject.rs_loginclass = NULL;
858 		rule->rr_subject.rs_prison_racct = NULL;
859 	} else {
860 		switch (rule->rr_subject_type) {
861 		case RCTL_SUBJECT_TYPE_UNDEFINED:
862 			error = EINVAL;
863 			goto out;
864 		case RCTL_SUBJECT_TYPE_PROCESS:
865 			error = str2id(subject_idstr, &id);
866 			if (error != 0)
867 				goto out;
868 			sx_assert(&allproc_lock, SA_LOCKED);
869 			rule->rr_subject.rs_proc = pfind(id);
870 			if (rule->rr_subject.rs_proc == NULL) {
871 				error = ESRCH;
872 				goto out;
873 			}
874 			PROC_UNLOCK(rule->rr_subject.rs_proc);
875 			break;
876 		case RCTL_SUBJECT_TYPE_USER:
877 			error = str2id(subject_idstr, &id);
878 			if (error != 0)
879 				goto out;
880 			rule->rr_subject.rs_uip = uifind(id);
881 			break;
882 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
883 			rule->rr_subject.rs_loginclass =
884 			    loginclass_find(subject_idstr);
885 			if (rule->rr_subject.rs_loginclass == NULL) {
886 				error = ENAMETOOLONG;
887 				goto out;
888 			}
889 			break;
890 		case RCTL_SUBJECT_TYPE_JAIL:
891 			rule->rr_subject.rs_prison_racct =
892 			    prison_racct_find(subject_idstr);
893 			if (rule->rr_subject.rs_prison_racct == NULL) {
894 				error = ENAMETOOLONG;
895 				goto out;
896 			}
897 			break;
898                default:
899                        panic("rctl_string_to_rule: unknown subject type %d",
900                            rule->rr_subject_type);
901                }
902 	}
903 
904 	if (resourcestr == NULL || resourcestr[0] == '\0')
905 		rule->rr_resource = RACCT_UNDEFINED;
906 	else {
907 		error = str2value(resourcestr, &rule->rr_resource,
908 		    resourcenames);
909 		if (error != 0)
910 			goto out;
911 	}
912 
913 	if (actionstr == NULL || actionstr[0] == '\0')
914 		rule->rr_action = RCTL_ACTION_UNDEFINED;
915 	else {
916 		error = str2value(actionstr, &rule->rr_action, actionnames);
917 		if (error != 0)
918 			goto out;
919 	}
920 
921 	if (amountstr == NULL || amountstr[0] == '\0')
922 		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
923 	else {
924 		error = str2int64(amountstr, &rule->rr_amount);
925 		if (error != 0)
926 			goto out;
927 		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
928 			rule->rr_amount *= 1000000;
929 	}
930 
931 	if (perstr == NULL || perstr[0] == '\0')
932 		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
933 	else {
934 		error = str2value(perstr, &rule->rr_per, subjectnames);
935 		if (error != 0)
936 			goto out;
937 	}
938 
939 out:
940 	if (error == 0)
941 		*rulep = rule;
942 	else
943 		rctl_rule_release(rule);
944 
945 	return (error);
946 }
947 
948 /*
949  * Link a rule with all the subjects it applies to.
950  */
951 int
952 rctl_rule_add(struct rctl_rule *rule)
953 {
954 	struct proc *p;
955 	struct ucred *cred;
956 	struct uidinfo *uip;
957 	struct prison *pr;
958 	struct prison_racct *prr;
959 	struct loginclass *lc;
960 	struct rctl_rule *rule2;
961 	int match;
962 
963 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
964 
965 	/*
966 	 * Some rules just don't make sense.  Note that the one below
967 	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
968 	 * for example, is not deniable in the racct sense, but the
969 	 * limit is enforced in a different way, so "deny" rules for %CPU
970 	 * do make sense.
971 	 */
972 	if (rule->rr_action == RCTL_ACTION_DENY &&
973 	    (rule->rr_resource == RACCT_CPU ||
974 	    rule->rr_resource == RACCT_WALLCLOCK))
975 		return (EOPNOTSUPP);
976 
977 	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
978 	    RACCT_IS_SLOPPY(rule->rr_resource))
979 		return (EOPNOTSUPP);
980 
981 	/*
982 	 * Make sure there are no duplicated rules.  Also, for the "deny"
983 	 * rules, remove ones differing only by "amount".
984 	 */
985 	if (rule->rr_action == RCTL_ACTION_DENY) {
986 		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
987 		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
988 		rctl_rule_remove(rule2);
989 		rctl_rule_release(rule2);
990 	} else
991 		rctl_rule_remove(rule);
992 
993 	switch (rule->rr_subject_type) {
994 	case RCTL_SUBJECT_TYPE_PROCESS:
995 		p = rule->rr_subject.rs_proc;
996 		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
997 
998 		rctl_racct_add_rule(p->p_racct, rule);
999 		/*
1000 		 * In case of per-process rule, we don't have anything more
1001 		 * to do.
1002 		 */
1003 		return (0);
1004 
1005 	case RCTL_SUBJECT_TYPE_USER:
1006 		uip = rule->rr_subject.rs_uip;
1007 		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1008 		rctl_racct_add_rule(uip->ui_racct, rule);
1009 		break;
1010 
1011 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1012 		lc = rule->rr_subject.rs_loginclass;
1013 		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1014 		rctl_racct_add_rule(lc->lc_racct, rule);
1015 		break;
1016 
1017 	case RCTL_SUBJECT_TYPE_JAIL:
1018 		prr = rule->rr_subject.rs_prison_racct;
1019 		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1020 		rctl_racct_add_rule(prr->prr_racct, rule);
1021 		break;
1022 
1023 	default:
1024 		panic("rctl_rule_add: unknown subject type %d",
1025 		    rule->rr_subject_type);
1026 	}
1027 
1028 	/*
1029 	 * Now go through all the processes and add the new rule to the ones
1030 	 * it applies to.
1031 	 */
1032 	sx_assert(&allproc_lock, SA_LOCKED);
1033 	FOREACH_PROC_IN_SYSTEM(p) {
1034 		cred = p->p_ucred;
1035 		switch (rule->rr_subject_type) {
1036 		case RCTL_SUBJECT_TYPE_USER:
1037 			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1038 			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1039 				break;
1040 			continue;
1041 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1042 			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1043 				break;
1044 			continue;
1045 		case RCTL_SUBJECT_TYPE_JAIL:
1046 			match = 0;
1047 			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1048 				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1049 					match = 1;
1050 					break;
1051 				}
1052 			}
1053 			if (match)
1054 				break;
1055 			continue;
1056 		default:
1057 			panic("rctl_rule_add: unknown subject type %d",
1058 			    rule->rr_subject_type);
1059 		}
1060 
1061 		rctl_racct_add_rule(p->p_racct, rule);
1062 	}
1063 
1064 	return (0);
1065 }
1066 
1067 static void
1068 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1069 {
1070 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1071 	int found = 0;
1072 
1073 	rw_wlock(&rctl_lock);
1074 	found += rctl_racct_remove_rules(racct, filter);
1075 	rw_wunlock(&rctl_lock);
1076 
1077 	*((int *)arg3) += found;
1078 }
1079 
1080 /*
1081  * Remove all rules that match the filter.
1082  */
1083 int
1084 rctl_rule_remove(struct rctl_rule *filter)
1085 {
1086 	int found = 0;
1087 	struct proc *p;
1088 
1089 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1090 	    filter->rr_subject.rs_proc != NULL) {
1091 		p = filter->rr_subject.rs_proc;
1092 		rw_wlock(&rctl_lock);
1093 		found = rctl_racct_remove_rules(p->p_racct, filter);
1094 		rw_wunlock(&rctl_lock);
1095 		if (found)
1096 			return (0);
1097 		return (ESRCH);
1098 	}
1099 
1100 	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1101 	    (void *)&found);
1102 	ui_racct_foreach(rctl_rule_remove_callback, filter,
1103 	    (void *)&found);
1104 	prison_racct_foreach(rctl_rule_remove_callback, filter,
1105 	    (void *)&found);
1106 
1107 	sx_assert(&allproc_lock, SA_LOCKED);
1108 	rw_wlock(&rctl_lock);
1109 	FOREACH_PROC_IN_SYSTEM(p) {
1110 		found += rctl_racct_remove_rules(p->p_racct, filter);
1111 	}
1112 	rw_wunlock(&rctl_lock);
1113 
1114 	if (found)
1115 		return (0);
1116 	return (ESRCH);
1117 }
1118 
1119 /*
1120  * Appends a rule to the sbuf.
1121  */
1122 static void
1123 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1124 {
1125 	int64_t amount;
1126 
1127 	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1128 
1129 	switch (rule->rr_subject_type) {
1130 	case RCTL_SUBJECT_TYPE_PROCESS:
1131 		if (rule->rr_subject.rs_proc == NULL)
1132 			sbuf_printf(sb, ":");
1133 		else
1134 			sbuf_printf(sb, "%d:",
1135 			    rule->rr_subject.rs_proc->p_pid);
1136 		break;
1137 	case RCTL_SUBJECT_TYPE_USER:
1138 		if (rule->rr_subject.rs_uip == NULL)
1139 			sbuf_printf(sb, ":");
1140 		else
1141 			sbuf_printf(sb, "%d:",
1142 			    rule->rr_subject.rs_uip->ui_uid);
1143 		break;
1144 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1145 		if (rule->rr_subject.rs_loginclass == NULL)
1146 			sbuf_printf(sb, ":");
1147 		else
1148 			sbuf_printf(sb, "%s:",
1149 			    rule->rr_subject.rs_loginclass->lc_name);
1150 		break;
1151 	case RCTL_SUBJECT_TYPE_JAIL:
1152 		if (rule->rr_subject.rs_prison_racct == NULL)
1153 			sbuf_printf(sb, ":");
1154 		else
1155 			sbuf_printf(sb, "%s:",
1156 			    rule->rr_subject.rs_prison_racct->prr_name);
1157 		break;
1158 	default:
1159 		panic("rctl_rule_to_sbuf: unknown subject type %d",
1160 		    rule->rr_subject_type);
1161 	}
1162 
1163 	amount = rule->rr_amount;
1164 	if (amount != RCTL_AMOUNT_UNDEFINED &&
1165 	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1166 		amount /= 1000000;
1167 
1168 	sbuf_printf(sb, "%s:%s=%jd",
1169 	    rctl_resource_name(rule->rr_resource),
1170 	    rctl_action_name(rule->rr_action),
1171 	    amount);
1172 
1173 	if (rule->rr_per != rule->rr_subject_type)
1174 		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1175 }
1176 
1177 /*
1178  * Routine used by RCTL syscalls to read in input string.
1179  */
1180 static int
1181 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1182 {
1183 	int error;
1184 	char *str;
1185 
1186 	if (inbuflen <= 0)
1187 		return (EINVAL);
1188 	if (inbuflen > RCTL_MAX_INBUFLEN)
1189 		return (E2BIG);
1190 
1191 	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1192 	error = copyinstr(inbufp, str, inbuflen, NULL);
1193 	if (error != 0) {
1194 		free(str, M_RCTL);
1195 		return (error);
1196 	}
1197 
1198 	*inputstr = str;
1199 
1200 	return (0);
1201 }
1202 
1203 /*
1204  * Routine used by RCTL syscalls to write out output string.
1205  */
1206 static int
1207 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1208 {
1209 	int error;
1210 
1211 	if (outputsbuf == NULL)
1212 		return (0);
1213 
1214 	sbuf_finish(outputsbuf);
1215 	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1216 		sbuf_delete(outputsbuf);
1217 		return (ERANGE);
1218 	}
1219 	error = copyout(sbuf_data(outputsbuf), outbufp,
1220 	    sbuf_len(outputsbuf) + 1);
1221 	sbuf_delete(outputsbuf);
1222 	return (error);
1223 }
1224 
1225 static struct sbuf *
1226 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1227 {
1228 	int i;
1229 	int64_t amount;
1230 	struct sbuf *sb;
1231 
1232 	sb = sbuf_new_auto();
1233 	for (i = 0; i <= RACCT_MAX; i++) {
1234 		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1235 			continue;
1236 		amount = racct->r_resources[i];
1237 		if (RACCT_IS_IN_MILLIONS(i))
1238 			amount /= 1000000;
1239 		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1240 	}
1241 	sbuf_setpos(sb, sbuf_len(sb) - 1);
1242 	return (sb);
1243 }
1244 
1245 int
1246 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1247 {
1248 	int error;
1249 	char *inputstr;
1250 	struct rctl_rule *filter;
1251 	struct sbuf *outputsbuf = NULL;
1252 	struct proc *p;
1253 	struct uidinfo *uip;
1254 	struct loginclass *lc;
1255 	struct prison_racct *prr;
1256 
1257 	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1258 	if (error != 0)
1259 		return (error);
1260 
1261 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1262 	if (error != 0)
1263 		return (error);
1264 
1265 	sx_slock(&allproc_lock);
1266 	error = rctl_string_to_rule(inputstr, &filter);
1267 	free(inputstr, M_RCTL);
1268 	if (error != 0) {
1269 		sx_sunlock(&allproc_lock);
1270 		return (error);
1271 	}
1272 
1273 	switch (filter->rr_subject_type) {
1274 	case RCTL_SUBJECT_TYPE_PROCESS:
1275 		p = filter->rr_subject.rs_proc;
1276 		if (p == NULL) {
1277 			error = EINVAL;
1278 			goto out;
1279 		}
1280 		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1281 		break;
1282 	case RCTL_SUBJECT_TYPE_USER:
1283 		uip = filter->rr_subject.rs_uip;
1284 		if (uip == NULL) {
1285 			error = EINVAL;
1286 			goto out;
1287 		}
1288 		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1289 		break;
1290 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1291 		lc = filter->rr_subject.rs_loginclass;
1292 		if (lc == NULL) {
1293 			error = EINVAL;
1294 			goto out;
1295 		}
1296 		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1297 		break;
1298 	case RCTL_SUBJECT_TYPE_JAIL:
1299 		prr = filter->rr_subject.rs_prison_racct;
1300 		if (prr == NULL) {
1301 			error = EINVAL;
1302 			goto out;
1303 		}
1304 		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1305 		break;
1306 	default:
1307 		error = EINVAL;
1308 	}
1309 out:
1310 	rctl_rule_release(filter);
1311 	sx_sunlock(&allproc_lock);
1312 	if (error != 0)
1313 		return (error);
1314 
1315 	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1316 
1317 	return (error);
1318 }
1319 
1320 static void
1321 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1322 {
1323 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1324 	struct rctl_rule_link *link;
1325 	struct sbuf *sb = (struct sbuf *)arg3;
1326 
1327 	rw_rlock(&rctl_lock);
1328 	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1329 		if (!rctl_rule_matches(link->rrl_rule, filter))
1330 			continue;
1331 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1332 		sbuf_printf(sb, ",");
1333 	}
1334 	rw_runlock(&rctl_lock);
1335 }
1336 
1337 int
1338 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1339 {
1340 	int error;
1341 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1342 	char *inputstr, *buf;
1343 	struct sbuf *sb;
1344 	struct rctl_rule *filter;
1345 	struct rctl_rule_link *link;
1346 	struct proc *p;
1347 
1348 	error = priv_check(td, PRIV_RCTL_GET_RULES);
1349 	if (error != 0)
1350 		return (error);
1351 
1352 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1353 	if (error != 0)
1354 		return (error);
1355 
1356 	sx_slock(&allproc_lock);
1357 	error = rctl_string_to_rule(inputstr, &filter);
1358 	free(inputstr, M_RCTL);
1359 	if (error != 0) {
1360 		sx_sunlock(&allproc_lock);
1361 		return (error);
1362 	}
1363 
1364 again:
1365 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1366 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1367 	KASSERT(sb != NULL, ("sbuf_new failed"));
1368 
1369 	sx_assert(&allproc_lock, SA_LOCKED);
1370 	FOREACH_PROC_IN_SYSTEM(p) {
1371 		rw_rlock(&rctl_lock);
1372 		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1373 			/*
1374 			 * Non-process rules will be added to the buffer later.
1375 			 * Adding them here would result in duplicated output.
1376 			 */
1377 			if (link->rrl_rule->rr_subject_type !=
1378 			    RCTL_SUBJECT_TYPE_PROCESS)
1379 				continue;
1380 			if (!rctl_rule_matches(link->rrl_rule, filter))
1381 				continue;
1382 			rctl_rule_to_sbuf(sb, link->rrl_rule);
1383 			sbuf_printf(sb, ",");
1384 		}
1385 		rw_runlock(&rctl_lock);
1386 	}
1387 
1388 	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1389 	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1390 	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1391 	if (sbuf_error(sb) == ENOMEM) {
1392 		sbuf_delete(sb);
1393 		free(buf, M_RCTL);
1394 		bufsize *= 4;
1395 		goto again;
1396 	}
1397 
1398 	/*
1399 	 * Remove trailing ",".
1400 	 */
1401 	if (sbuf_len(sb) > 0)
1402 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1403 
1404 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1405 
1406 	rctl_rule_release(filter);
1407 	sx_sunlock(&allproc_lock);
1408 	free(buf, M_RCTL);
1409 	return (error);
1410 }
1411 
1412 int
1413 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1414 {
1415 	int error;
1416 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1417 	char *inputstr, *buf;
1418 	struct sbuf *sb;
1419 	struct rctl_rule *filter;
1420 	struct rctl_rule_link *link;
1421 
1422 	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1423 	if (error != 0)
1424 		return (error);
1425 
1426 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1427 	if (error != 0)
1428 		return (error);
1429 
1430 	sx_slock(&allproc_lock);
1431 	error = rctl_string_to_rule(inputstr, &filter);
1432 	free(inputstr, M_RCTL);
1433 	if (error != 0) {
1434 		sx_sunlock(&allproc_lock);
1435 		return (error);
1436 	}
1437 
1438 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1439 		rctl_rule_release(filter);
1440 		sx_sunlock(&allproc_lock);
1441 		return (EINVAL);
1442 	}
1443 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1444 		rctl_rule_release(filter);
1445 		sx_sunlock(&allproc_lock);
1446 		return (EOPNOTSUPP);
1447 	}
1448 	if (filter->rr_subject.rs_proc == NULL) {
1449 		rctl_rule_release(filter);
1450 		sx_sunlock(&allproc_lock);
1451 		return (EINVAL);
1452 	}
1453 
1454 again:
1455 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1456 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1457 	KASSERT(sb != NULL, ("sbuf_new failed"));
1458 
1459 	rw_rlock(&rctl_lock);
1460 	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1461 	    rrl_next) {
1462 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1463 		sbuf_printf(sb, ",");
1464 	}
1465 	rw_runlock(&rctl_lock);
1466 	if (sbuf_error(sb) == ENOMEM) {
1467 		sbuf_delete(sb);
1468 		free(buf, M_RCTL);
1469 		bufsize *= 4;
1470 		goto again;
1471 	}
1472 
1473 	/*
1474 	 * Remove trailing ",".
1475 	 */
1476 	if (sbuf_len(sb) > 0)
1477 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1478 
1479 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1480 	rctl_rule_release(filter);
1481 	sx_sunlock(&allproc_lock);
1482 	free(buf, M_RCTL);
1483 	return (error);
1484 }
1485 
1486 int
1487 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1488 {
1489 	int error;
1490 	struct rctl_rule *rule;
1491 	char *inputstr;
1492 
1493 	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1494 	if (error != 0)
1495 		return (error);
1496 
1497 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1498 	if (error != 0)
1499 		return (error);
1500 
1501 	sx_slock(&allproc_lock);
1502 	error = rctl_string_to_rule(inputstr, &rule);
1503 	free(inputstr, M_RCTL);
1504 	if (error != 0) {
1505 		sx_sunlock(&allproc_lock);
1506 		return (error);
1507 	}
1508 	/*
1509 	 * The 'per' part of a rule is optional.
1510 	 */
1511 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1512 	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1513 		rule->rr_per = rule->rr_subject_type;
1514 
1515 	if (!rctl_rule_fully_specified(rule)) {
1516 		error = EINVAL;
1517 		goto out;
1518 	}
1519 
1520 	error = rctl_rule_add(rule);
1521 
1522 out:
1523 	rctl_rule_release(rule);
1524 	sx_sunlock(&allproc_lock);
1525 	return (error);
1526 }
1527 
1528 int
1529 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1530 {
1531 	int error;
1532 	struct rctl_rule *filter;
1533 	char *inputstr;
1534 
1535 	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1536 	if (error != 0)
1537 		return (error);
1538 
1539 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1540 	if (error != 0)
1541 		return (error);
1542 
1543 	sx_slock(&allproc_lock);
1544 	error = rctl_string_to_rule(inputstr, &filter);
1545 	free(inputstr, M_RCTL);
1546 	if (error != 0) {
1547 		sx_sunlock(&allproc_lock);
1548 		return (error);
1549 	}
1550 
1551 	error = rctl_rule_remove(filter);
1552 	rctl_rule_release(filter);
1553 	sx_sunlock(&allproc_lock);
1554 
1555 	return (error);
1556 }
1557 
1558 /*
1559  * Update RCTL rule list after credential change.
1560  */
1561 void
1562 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1563 {
1564 	int rulecnt, i;
1565 	struct rctl_rule_link *link, *newlink;
1566 	struct uidinfo *newuip;
1567 	struct loginclass *newlc;
1568 	struct prison_racct *newprr;
1569 	LIST_HEAD(, rctl_rule_link) newrules;
1570 
1571 	newuip = newcred->cr_ruidinfo;
1572 	newlc = newcred->cr_loginclass;
1573 	newprr = newcred->cr_prison->pr_prison_racct;
1574 
1575 	LIST_INIT(&newrules);
1576 
1577 again:
1578 	/*
1579 	 * First, count the rules that apply to the process with new
1580 	 * credentials.
1581 	 */
1582 	rulecnt = 0;
1583 	rw_rlock(&rctl_lock);
1584 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1585 		if (link->rrl_rule->rr_subject_type ==
1586 		    RCTL_SUBJECT_TYPE_PROCESS)
1587 			rulecnt++;
1588 	}
1589 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1590 		rulecnt++;
1591 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1592 		rulecnt++;
1593 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1594 		rulecnt++;
1595 	rw_runlock(&rctl_lock);
1596 
1597 	/*
1598 	 * Create temporary list.  We've dropped the rctl_lock in order
1599 	 * to use M_WAITOK.
1600 	 */
1601 	for (i = 0; i < rulecnt; i++) {
1602 		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1603 		newlink->rrl_rule = NULL;
1604 		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1605 	}
1606 
1607 	newlink = LIST_FIRST(&newrules);
1608 
1609 	/*
1610 	 * Assign rules to the newly allocated list entries.
1611 	 */
1612 	rw_wlock(&rctl_lock);
1613 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1614 		if (link->rrl_rule->rr_subject_type ==
1615 		    RCTL_SUBJECT_TYPE_PROCESS) {
1616 			if (newlink == NULL)
1617 				goto goaround;
1618 			rctl_rule_acquire(link->rrl_rule);
1619 			newlink->rrl_rule = link->rrl_rule;
1620 			newlink = LIST_NEXT(newlink, rrl_next);
1621 			rulecnt--;
1622 		}
1623 	}
1624 
1625 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1626 		if (newlink == NULL)
1627 			goto goaround;
1628 		rctl_rule_acquire(link->rrl_rule);
1629 		newlink->rrl_rule = link->rrl_rule;
1630 		newlink = LIST_NEXT(newlink, rrl_next);
1631 		rulecnt--;
1632 	}
1633 
1634 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1635 		if (newlink == NULL)
1636 			goto goaround;
1637 		rctl_rule_acquire(link->rrl_rule);
1638 		newlink->rrl_rule = link->rrl_rule;
1639 		newlink = LIST_NEXT(newlink, rrl_next);
1640 		rulecnt--;
1641 	}
1642 
1643 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1644 		if (newlink == NULL)
1645 			goto goaround;
1646 		rctl_rule_acquire(link->rrl_rule);
1647 		newlink->rrl_rule = link->rrl_rule;
1648 		newlink = LIST_NEXT(newlink, rrl_next);
1649 		rulecnt--;
1650 	}
1651 
1652 	if (rulecnt == 0) {
1653 		/*
1654 		 * Free the old rule list.
1655 		 */
1656 		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1657 			link = LIST_FIRST(&p->p_racct->r_rule_links);
1658 			LIST_REMOVE(link, rrl_next);
1659 			rctl_rule_release(link->rrl_rule);
1660 			uma_zfree(rctl_rule_link_zone, link);
1661 		}
1662 
1663 		/*
1664 		 * Replace lists and we're done.
1665 		 *
1666 		 * XXX: Is there any way to switch list heads instead
1667 		 *      of iterating here?
1668 		 */
1669 		while (!LIST_EMPTY(&newrules)) {
1670 			newlink = LIST_FIRST(&newrules);
1671 			LIST_REMOVE(newlink, rrl_next);
1672 			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1673 			    newlink, rrl_next);
1674 		}
1675 
1676 		rw_wunlock(&rctl_lock);
1677 
1678 		return;
1679 	}
1680 
1681 goaround:
1682 	rw_wunlock(&rctl_lock);
1683 
1684 	/*
1685 	 * Rule list changed while we were not holding the rctl_lock.
1686 	 * Free the new list and try again.
1687 	 */
1688 	while (!LIST_EMPTY(&newrules)) {
1689 		newlink = LIST_FIRST(&newrules);
1690 		LIST_REMOVE(newlink, rrl_next);
1691 		if (newlink->rrl_rule != NULL)
1692 			rctl_rule_release(newlink->rrl_rule);
1693 		uma_zfree(rctl_rule_link_zone, newlink);
1694 	}
1695 
1696 	goto again;
1697 }
1698 
1699 /*
1700  * Assign RCTL rules to the newly created process.
1701  */
1702 int
1703 rctl_proc_fork(struct proc *parent, struct proc *child)
1704 {
1705 	int error;
1706 	struct rctl_rule_link *link;
1707 	struct rctl_rule *rule;
1708 
1709 	LIST_INIT(&child->p_racct->r_rule_links);
1710 
1711 	KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
1712 
1713 	rw_wlock(&rctl_lock);
1714 
1715 	/*
1716 	 * Go through limits applicable to the parent and assign them
1717 	 * to the child.  Rules with 'process' subject have to be duplicated
1718 	 * in order to make their rr_subject point to the new process.
1719 	 */
1720 	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1721 		if (link->rrl_rule->rr_subject_type ==
1722 		    RCTL_SUBJECT_TYPE_PROCESS) {
1723 			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1724 			if (rule == NULL)
1725 				goto fail;
1726 			KASSERT(rule->rr_subject.rs_proc == parent,
1727 			    ("rule->rr_subject.rs_proc != parent"));
1728 			rule->rr_subject.rs_proc = child;
1729 			error = rctl_racct_add_rule_locked(child->p_racct,
1730 			    rule);
1731 			rctl_rule_release(rule);
1732 			if (error != 0)
1733 				goto fail;
1734 		} else {
1735 			error = rctl_racct_add_rule_locked(child->p_racct,
1736 			    link->rrl_rule);
1737 			if (error != 0)
1738 				goto fail;
1739 		}
1740 	}
1741 
1742 	rw_wunlock(&rctl_lock);
1743 	return (0);
1744 
1745 fail:
1746 	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1747 		link = LIST_FIRST(&child->p_racct->r_rule_links);
1748 		LIST_REMOVE(link, rrl_next);
1749 		rctl_rule_release(link->rrl_rule);
1750 		uma_zfree(rctl_rule_link_zone, link);
1751 	}
1752 	rw_wunlock(&rctl_lock);
1753 	return (EAGAIN);
1754 }
1755 
1756 /*
1757  * Release rules attached to the racct.
1758  */
1759 void
1760 rctl_racct_release(struct racct *racct)
1761 {
1762 	struct rctl_rule_link *link;
1763 
1764 	rw_wlock(&rctl_lock);
1765 	while (!LIST_EMPTY(&racct->r_rule_links)) {
1766 		link = LIST_FIRST(&racct->r_rule_links);
1767 		LIST_REMOVE(link, rrl_next);
1768 		rctl_rule_release(link->rrl_rule);
1769 		uma_zfree(rctl_rule_link_zone, link);
1770 	}
1771 	rw_wunlock(&rctl_lock);
1772 }
1773 
1774 static void
1775 rctl_init(void)
1776 {
1777 
1778 	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1779 	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1780 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1781 	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1782 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1783 }
1784 
1785 #else /* !RCTL */
1786 
1787 int
1788 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1789 {
1790 
1791 	return (ENOSYS);
1792 }
1793 
1794 int
1795 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1796 {
1797 
1798 	return (ENOSYS);
1799 }
1800 
1801 int
1802 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1803 {
1804 
1805 	return (ENOSYS);
1806 }
1807 
1808 int
1809 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1810 {
1811 
1812 	return (ENOSYS);
1813 }
1814 
1815 int
1816 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1817 {
1818 
1819 	return (ENOSYS);
1820 }
1821 
1822 #endif /* !RCTL */
1823