xref: /freebsd/sys/kern/kern_rctl.c (revision 7778ab7e0cc22f0824eb1d1047a7ef8b4785267a)
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62 
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67 
68 FEATURE(rctl, "Resource Limits");
69 
70 #define	HRF_DEFAULT		0
71 #define	HRF_DONT_INHERIT	1
72 #define	HRF_DONT_ACCUMULATE	2
73 
74 /* Default buffer size for rctl_get_rules(2). */
75 #define	RCTL_DEFAULT_BUFSIZE	4096
76 #define	RCTL_LOG_BUFSIZE	128
77 
78 /*
79  * 'rctl_rule_link' connects a rule with every racct it's related to.
80  * For example, rule 'user:X:openfiles:deny=N/process' is linked
81  * with uidinfo for user X, and to each process of that user.
82  */
83 struct rctl_rule_link {
84 	LIST_ENTRY(rctl_rule_link)	rrl_next;
85 	struct rctl_rule		*rrl_rule;
86 	int				rrl_exceeded;
87 };
88 
89 struct dict {
90 	const char	*d_name;
91 	int		d_value;
92 };
93 
94 static struct dict subjectnames[] = {
95 	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
96 	{ "user", RCTL_SUBJECT_TYPE_USER },
97 	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98 	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
99 	{ NULL, -1 }};
100 
101 static struct dict resourcenames[] = {
102 	{ "cputime", RACCT_CPU },
103 	{ "datasize", RACCT_DATA },
104 	{ "stacksize", RACCT_STACK },
105 	{ "coredumpsize", RACCT_CORE },
106 	{ "memoryuse", RACCT_RSS },
107 	{ "memorylocked", RACCT_MEMLOCK },
108 	{ "maxproc", RACCT_NPROC },
109 	{ "openfiles", RACCT_NOFILE },
110 	{ "vmemoryuse", RACCT_VMEM },
111 	{ "pseudoterminals", RACCT_NPTS },
112 	{ "swapuse", RACCT_SWAP },
113 	{ "nthr", RACCT_NTHR },
114 	{ "msgqqueued", RACCT_MSGQQUEUED },
115 	{ "msgqsize", RACCT_MSGQSIZE },
116 	{ "nmsgq", RACCT_NMSGQ },
117 	{ "nsem", RACCT_NSEM },
118 	{ "nsemop", RACCT_NSEMOP },
119 	{ "nshm", RACCT_NSHM },
120 	{ "shmsize", RACCT_SHMSIZE },
121 	{ "wallclock", RACCT_WALLCLOCK },
122 	{ NULL, -1 }};
123 
124 static struct dict actionnames[] = {
125 	{ "sighup", RCTL_ACTION_SIGHUP },
126 	{ "sigint", RCTL_ACTION_SIGINT },
127 	{ "sigquit", RCTL_ACTION_SIGQUIT },
128 	{ "sigill", RCTL_ACTION_SIGILL },
129 	{ "sigtrap", RCTL_ACTION_SIGTRAP },
130 	{ "sigabrt", RCTL_ACTION_SIGABRT },
131 	{ "sigemt", RCTL_ACTION_SIGEMT },
132 	{ "sigfpe", RCTL_ACTION_SIGFPE },
133 	{ "sigkill", RCTL_ACTION_SIGKILL },
134 	{ "sigbus", RCTL_ACTION_SIGBUS },
135 	{ "sigsegv", RCTL_ACTION_SIGSEGV },
136 	{ "sigsys", RCTL_ACTION_SIGSYS },
137 	{ "sigpipe", RCTL_ACTION_SIGPIPE },
138 	{ "sigalrm", RCTL_ACTION_SIGALRM },
139 	{ "sigterm", RCTL_ACTION_SIGTERM },
140 	{ "sigurg", RCTL_ACTION_SIGURG },
141 	{ "sigstop", RCTL_ACTION_SIGSTOP },
142 	{ "sigtstp", RCTL_ACTION_SIGTSTP },
143 	{ "sigchld", RCTL_ACTION_SIGCHLD },
144 	{ "sigttin", RCTL_ACTION_SIGTTIN },
145 	{ "sigttou", RCTL_ACTION_SIGTTOU },
146 	{ "sigio", RCTL_ACTION_SIGIO },
147 	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
148 	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
149 	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
150 	{ "sigprof", RCTL_ACTION_SIGPROF },
151 	{ "sigwinch", RCTL_ACTION_SIGWINCH },
152 	{ "siginfo", RCTL_ACTION_SIGINFO },
153 	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
154 	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
155 	{ "sigthr", RCTL_ACTION_SIGTHR },
156 	{ "deny", RCTL_ACTION_DENY },
157 	{ "log", RCTL_ACTION_LOG },
158 	{ "devctl", RCTL_ACTION_DEVCTL },
159 	{ NULL, -1 }};
160 
161 static void rctl_init(void);
162 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
163 
164 static uma_zone_t rctl_rule_link_zone;
165 static uma_zone_t rctl_rule_zone;
166 static struct rwlock rctl_lock;
167 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
168 
169 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
170 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
171 
172 MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
173 
174 static const char *
175 rctl_subject_type_name(int subject)
176 {
177 	int i;
178 
179 	for (i = 0; subjectnames[i].d_name != NULL; i++) {
180 		if (subjectnames[i].d_value == subject)
181 			return (subjectnames[i].d_name);
182 	}
183 
184 	panic("rctl_subject_type_name: unknown subject type %d", subject);
185 }
186 
187 static const char *
188 rctl_action_name(int action)
189 {
190 	int i;
191 
192 	for (i = 0; actionnames[i].d_name != NULL; i++) {
193 		if (actionnames[i].d_value == action)
194 			return (actionnames[i].d_name);
195 	}
196 
197 	panic("rctl_action_name: unknown action %d", action);
198 }
199 
200 const char *
201 rctl_resource_name(int resource)
202 {
203 	int i;
204 
205 	for (i = 0; resourcenames[i].d_name != NULL; i++) {
206 		if (resourcenames[i].d_value == resource)
207 			return (resourcenames[i].d_name);
208 	}
209 
210 	panic("rctl_resource_name: unknown resource %d", resource);
211 }
212 
213 /*
214  * Return the amount of resource that can be allocated by 'p' before
215  * hitting 'rule'.
216  */
217 static int64_t
218 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
219 {
220 	int resource;
221 	int64_t available = INT64_MAX;
222 	struct ucred *cred = p->p_ucred;
223 
224 	rw_assert(&rctl_lock, RA_LOCKED);
225 
226 	resource = rule->rr_resource;
227 	switch (rule->rr_per) {
228 	case RCTL_SUBJECT_TYPE_PROCESS:
229 		available = rule->rr_amount -
230 		    p->p_racct->r_resources[resource];
231 		break;
232 	case RCTL_SUBJECT_TYPE_USER:
233 		available = rule->rr_amount -
234 		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
235 		break;
236 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
237 		available = rule->rr_amount -
238 		    cred->cr_loginclass->lc_racct->r_resources[resource];
239 		break;
240 	case RCTL_SUBJECT_TYPE_JAIL:
241 		available = rule->rr_amount -
242 		    cred->cr_prison->pr_prison_racct->prr_racct->
243 		        r_resources[resource];
244 		break;
245 	default:
246 		panic("rctl_compute_available: unknown per %d",
247 		    rule->rr_per);
248 	}
249 
250 	return (available);
251 }
252 
253 /*
254  * Return non-zero if allocating 'amount' by proc 'p' would exceed
255  * resource limit specified by 'rule'.
256  */
257 static int
258 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
259     int64_t amount)
260 {
261 	int64_t available;
262 
263 	rw_assert(&rctl_lock, RA_LOCKED);
264 
265 	available = rctl_available_resource(p, rule);
266 	if (available >= amount)
267 		return (0);
268 
269 	return (1);
270 }
271 
272 /*
273  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
274  * to what it keeps allocated now.  Returns non-zero if the allocation should
275  * be denied, 0 otherwise.
276  */
277 int
278 rctl_enforce(struct proc *p, int resource, uint64_t amount)
279 {
280 	struct rctl_rule *rule;
281 	struct rctl_rule_link *link;
282 	struct sbuf sb;
283 	int should_deny = 0;
284 	char *buf;
285 	static int curtime = 0;
286 	static struct timeval lasttime;
287 
288 	rw_rlock(&rctl_lock);
289 
290 	/*
291 	 * There may be more than one matching rule; go through all of them.
292 	 * Denial should be done last, after logging and sending signals.
293 	 */
294 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
295 		rule = link->rrl_rule;
296 		if (rule->rr_resource != resource)
297 			continue;
298 		if (!rctl_would_exceed(p, rule, amount)) {
299 			link->rrl_exceeded = 0;
300 			continue;
301 		}
302 
303 		switch (rule->rr_action) {
304 		case RCTL_ACTION_DENY:
305 			should_deny = 1;
306 			continue;
307 		case RCTL_ACTION_LOG:
308 			/*
309 			 * If rrl_exceeded != 0, it means we've already
310 			 * logged a warning for this process.
311 			 */
312 			if (link->rrl_exceeded != 0)
313 				continue;
314 
315 			if (!ppsratecheck(&lasttime, &curtime, 10))
316 				continue;
317 
318 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
319 			if (buf == NULL) {
320 				printf("rctl_enforce: out of memory\n");
321 				continue;
322 			}
323 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
324 			rctl_rule_to_sbuf(&sb, rule);
325 			sbuf_finish(&sb);
326 			printf("rctl: rule \"%s\" matched by pid %d "
327 			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
328 			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
329 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
330 			sbuf_delete(&sb);
331 			free(buf, M_RCTL);
332 			link->rrl_exceeded = 1;
333 			continue;
334 		case RCTL_ACTION_DEVCTL:
335 			if (link->rrl_exceeded != 0)
336 				continue;
337 
338 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
339 			if (buf == NULL) {
340 				printf("rctl_enforce: out of memory\n");
341 				continue;
342 			}
343 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
344 			sbuf_printf(&sb, "rule=");
345 			rctl_rule_to_sbuf(&sb, rule);
346 			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
347 			    p->p_pid, p->p_ucred->cr_ruid,
348 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
349 			sbuf_finish(&sb);
350 			devctl_notify_f("RCTL", "rule", "matched",
351 			    sbuf_data(&sb), M_NOWAIT);
352 			sbuf_delete(&sb);
353 			free(buf, M_RCTL);
354 			link->rrl_exceeded = 1;
355 			continue;
356 		default:
357 			if (link->rrl_exceeded != 0)
358 				continue;
359 
360 			KASSERT(rule->rr_action > 0 &&
361 			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
362 			    ("rctl_enforce: unknown action %d",
363 			     rule->rr_action));
364 
365 			/*
366 			 * We're supposed to send a signal, but the process
367 			 * is not fully initialized yet, probably because we
368 			 * got called from fork1().  For now just deny the
369 			 * allocation instead.
370 			 */
371 			if (p->p_state != PRS_NORMAL) {
372 				should_deny = 1;
373 				continue;
374 			}
375 
376 			/*
377 			 * We're using the fact that RCTL_ACTION_SIG* values
378 			 * are equal to their counterparts from sys/signal.h.
379 			 */
380 			kern_psignal(p, rule->rr_action);
381 			link->rrl_exceeded = 1;
382 			continue;
383 		}
384 	}
385 
386 	rw_runlock(&rctl_lock);
387 
388 	if (should_deny) {
389 		/*
390 		 * Return fake error code; the caller should change it
391 		 * into one proper for the situation - EFSIZ, ENOMEM etc.
392 		 */
393 		return (EDOOFUS);
394 	}
395 
396 	return (0);
397 }
398 
399 uint64_t
400 rctl_get_limit(struct proc *p, int resource)
401 {
402 	struct rctl_rule *rule;
403 	struct rctl_rule_link *link;
404 	uint64_t amount = UINT64_MAX;
405 
406 	rw_rlock(&rctl_lock);
407 
408 	/*
409 	 * There may be more than one matching rule; go through all of them.
410 	 * Denial should be done last, after logging and sending signals.
411 	 */
412 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
413 		rule = link->rrl_rule;
414 		if (rule->rr_resource != resource)
415 			continue;
416 		if (rule->rr_action != RCTL_ACTION_DENY)
417 			continue;
418 		if (rule->rr_amount < amount)
419 			amount = rule->rr_amount;
420 	}
421 
422 	rw_runlock(&rctl_lock);
423 
424 	return (amount);
425 }
426 
427 uint64_t
428 rctl_get_available(struct proc *p, int resource)
429 {
430 	struct rctl_rule *rule;
431 	struct rctl_rule_link *link;
432 	int64_t available, minavailable, allocated;
433 
434 	minavailable = INT64_MAX;
435 
436 	rw_rlock(&rctl_lock);
437 
438 	/*
439 	 * There may be more than one matching rule; go through all of them.
440 	 * Denial should be done last, after logging and sending signals.
441 	 */
442 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
443 		rule = link->rrl_rule;
444 		if (rule->rr_resource != resource)
445 			continue;
446 		if (rule->rr_action != RCTL_ACTION_DENY)
447 			continue;
448 		available = rctl_available_resource(p, rule);
449 		if (available < minavailable)
450 			minavailable = available;
451 	}
452 
453 	rw_runlock(&rctl_lock);
454 
455 	/*
456 	 * XXX: Think about this _hard_.
457 	 */
458 	allocated = p->p_racct->r_resources[resource];
459 	if (minavailable < INT64_MAX - allocated)
460 		minavailable += allocated;
461 	if (minavailable < 0)
462 		minavailable = 0;
463 	return (minavailable);
464 }
465 
466 static int
467 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
468 {
469 
470 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
471 		if (rule->rr_subject_type != filter->rr_subject_type)
472 			return (0);
473 
474 		switch (filter->rr_subject_type) {
475 		case RCTL_SUBJECT_TYPE_PROCESS:
476 			if (filter->rr_subject.rs_proc != NULL &&
477 			    rule->rr_subject.rs_proc !=
478 			    filter->rr_subject.rs_proc)
479 				return (0);
480 			break;
481 		case RCTL_SUBJECT_TYPE_USER:
482 			if (filter->rr_subject.rs_uip != NULL &&
483 			    rule->rr_subject.rs_uip !=
484 			    filter->rr_subject.rs_uip)
485 				return (0);
486 			break;
487 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
488 			if (filter->rr_subject.rs_loginclass != NULL &&
489 			    rule->rr_subject.rs_loginclass !=
490 			    filter->rr_subject.rs_loginclass)
491 				return (0);
492 			break;
493 		case RCTL_SUBJECT_TYPE_JAIL:
494 			if (filter->rr_subject.rs_prison_racct != NULL &&
495 			    rule->rr_subject.rs_prison_racct !=
496 			    filter->rr_subject.rs_prison_racct)
497 				return (0);
498 			break;
499 		default:
500 			panic("rctl_rule_matches: unknown subject type %d",
501 			    filter->rr_subject_type);
502 		}
503 	}
504 
505 	if (filter->rr_resource != RACCT_UNDEFINED) {
506 		if (rule->rr_resource != filter->rr_resource)
507 			return (0);
508 	}
509 
510 	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
511 		if (rule->rr_action != filter->rr_action)
512 			return (0);
513 	}
514 
515 	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
516 		if (rule->rr_amount != filter->rr_amount)
517 			return (0);
518 	}
519 
520 	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
521 		if (rule->rr_per != filter->rr_per)
522 			return (0);
523 	}
524 
525 	return (1);
526 }
527 
528 static int
529 str2value(const char *str, int *value, struct dict *table)
530 {
531 	int i;
532 
533 	if (value == NULL)
534 		return (EINVAL);
535 
536 	for (i = 0; table[i].d_name != NULL; i++) {
537 		if (strcasecmp(table[i].d_name, str) == 0) {
538 			*value =  table[i].d_value;
539 			return (0);
540 		}
541 	}
542 
543 	return (EINVAL);
544 }
545 
546 static int
547 str2id(const char *str, id_t *value)
548 {
549 	char *end;
550 
551 	if (str == NULL)
552 		return (EINVAL);
553 
554 	*value = strtoul(str, &end, 10);
555 	if ((size_t)(end - str) != strlen(str))
556 		return (EINVAL);
557 
558 	return (0);
559 }
560 
561 static int
562 str2int64(const char *str, int64_t *value)
563 {
564 	char *end;
565 
566 	if (str == NULL)
567 		return (EINVAL);
568 
569 	*value = strtoul(str, &end, 10);
570 	if ((size_t)(end - str) != strlen(str))
571 		return (EINVAL);
572 
573 	return (0);
574 }
575 
576 /*
577  * Connect the rule to the racct, increasing refcount for the rule.
578  */
579 static void
580 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
581 {
582 	struct rctl_rule_link *link;
583 
584 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
585 
586 	rctl_rule_acquire(rule);
587 	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
588 	link->rrl_rule = rule;
589 	link->rrl_exceeded = 0;
590 
591 	rw_wlock(&rctl_lock);
592 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
593 	rw_wunlock(&rctl_lock);
594 }
595 
596 static int
597 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
598 {
599 	struct rctl_rule_link *link;
600 
601 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
602 	rw_assert(&rctl_lock, RA_WLOCKED);
603 
604 	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
605 	if (link == NULL)
606 		return (ENOMEM);
607 	rctl_rule_acquire(rule);
608 	link->rrl_rule = rule;
609 	link->rrl_exceeded = 0;
610 
611 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
612 	return (0);
613 }
614 
615 /*
616  * Remove limits for a rules matching the filter and release
617  * the refcounts for the rules, possibly freeing them.  Returns
618  * the number of limit structures removed.
619  */
620 static int
621 rctl_racct_remove_rules(struct racct *racct,
622     const struct rctl_rule *filter)
623 {
624 	int removed = 0;
625 	struct rctl_rule_link *link, *linktmp;
626 
627 	rw_assert(&rctl_lock, RA_WLOCKED);
628 
629 	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
630 		if (!rctl_rule_matches(link->rrl_rule, filter))
631 			continue;
632 
633 		LIST_REMOVE(link, rrl_next);
634 		rctl_rule_release(link->rrl_rule);
635 		uma_zfree(rctl_rule_link_zone, link);
636 		removed++;
637 	}
638 	return (removed);
639 }
640 
641 static void
642 rctl_rule_acquire_subject(struct rctl_rule *rule)
643 {
644 
645 	switch (rule->rr_subject_type) {
646 	case RCTL_SUBJECT_TYPE_UNDEFINED:
647 	case RCTL_SUBJECT_TYPE_PROCESS:
648 		break;
649 	case RCTL_SUBJECT_TYPE_JAIL:
650 		if (rule->rr_subject.rs_prison_racct != NULL)
651 			prison_racct_hold(rule->rr_subject.rs_prison_racct);
652 		break;
653 	case RCTL_SUBJECT_TYPE_USER:
654 		if (rule->rr_subject.rs_uip != NULL)
655 			uihold(rule->rr_subject.rs_uip);
656 		break;
657 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
658 		if (rule->rr_subject.rs_loginclass != NULL)
659 			loginclass_hold(rule->rr_subject.rs_loginclass);
660 		break;
661 	default:
662 		panic("rctl_rule_acquire_subject: unknown subject type %d",
663 		    rule->rr_subject_type);
664 	}
665 }
666 
667 static void
668 rctl_rule_release_subject(struct rctl_rule *rule)
669 {
670 
671 	switch (rule->rr_subject_type) {
672 	case RCTL_SUBJECT_TYPE_UNDEFINED:
673 	case RCTL_SUBJECT_TYPE_PROCESS:
674 		break;
675 	case RCTL_SUBJECT_TYPE_JAIL:
676 		if (rule->rr_subject.rs_prison_racct != NULL)
677 			prison_racct_free(rule->rr_subject.rs_prison_racct);
678 		break;
679 	case RCTL_SUBJECT_TYPE_USER:
680 		if (rule->rr_subject.rs_uip != NULL)
681 			uifree(rule->rr_subject.rs_uip);
682 		break;
683 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
684 		if (rule->rr_subject.rs_loginclass != NULL)
685 			loginclass_free(rule->rr_subject.rs_loginclass);
686 		break;
687 	default:
688 		panic("rctl_rule_release_subject: unknown subject type %d",
689 		    rule->rr_subject_type);
690 	}
691 }
692 
693 struct rctl_rule *
694 rctl_rule_alloc(int flags)
695 {
696 	struct rctl_rule *rule;
697 
698 	rule = uma_zalloc(rctl_rule_zone, flags);
699 	if (rule == NULL)
700 		return (NULL);
701 	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
702 	rule->rr_subject.rs_proc = NULL;
703 	rule->rr_subject.rs_uip = NULL;
704 	rule->rr_subject.rs_loginclass = NULL;
705 	rule->rr_subject.rs_prison_racct = NULL;
706 	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
707 	rule->rr_resource = RACCT_UNDEFINED;
708 	rule->rr_action = RCTL_ACTION_UNDEFINED;
709 	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
710 	refcount_init(&rule->rr_refcount, 1);
711 
712 	return (rule);
713 }
714 
715 struct rctl_rule *
716 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
717 {
718 	struct rctl_rule *copy;
719 
720 	copy = uma_zalloc(rctl_rule_zone, flags);
721 	if (copy == NULL)
722 		return (NULL);
723 	copy->rr_subject_type = rule->rr_subject_type;
724 	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
725 	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
726 	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
727 	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
728 	copy->rr_per = rule->rr_per;
729 	copy->rr_resource = rule->rr_resource;
730 	copy->rr_action = rule->rr_action;
731 	copy->rr_amount = rule->rr_amount;
732 	refcount_init(&copy->rr_refcount, 1);
733 	rctl_rule_acquire_subject(copy);
734 
735 	return (copy);
736 }
737 
738 void
739 rctl_rule_acquire(struct rctl_rule *rule)
740 {
741 
742 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
743 
744 	refcount_acquire(&rule->rr_refcount);
745 }
746 
747 static void
748 rctl_rule_free(void *context, int pending)
749 {
750 	struct rctl_rule *rule;
751 
752 	rule = (struct rctl_rule *)context;
753 
754 	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
755 
756 	/*
757 	 * We don't need locking here; rule is guaranteed to be inaccessible.
758 	 */
759 
760 	rctl_rule_release_subject(rule);
761 	uma_zfree(rctl_rule_zone, rule);
762 }
763 
764 void
765 rctl_rule_release(struct rctl_rule *rule)
766 {
767 
768 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
769 
770 	if (refcount_release(&rule->rr_refcount)) {
771 		/*
772 		 * rctl_rule_release() is often called when iterating
773 		 * over all the uidinfo structures in the system,
774 		 * holding uihashtbl_lock.  Since rctl_rule_free()
775 		 * might end up calling uifree(), this would lead
776 		 * to lock recursion.  Use taskqueue to avoid this.
777 		 */
778 		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
779 		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
780 	}
781 }
782 
783 static int
784 rctl_rule_fully_specified(const struct rctl_rule *rule)
785 {
786 
787 	switch (rule->rr_subject_type) {
788 	case RCTL_SUBJECT_TYPE_UNDEFINED:
789 		return (0);
790 	case RCTL_SUBJECT_TYPE_PROCESS:
791 		if (rule->rr_subject.rs_proc == NULL)
792 			return (0);
793 		break;
794 	case RCTL_SUBJECT_TYPE_USER:
795 		if (rule->rr_subject.rs_uip == NULL)
796 			return (0);
797 		break;
798 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
799 		if (rule->rr_subject.rs_loginclass == NULL)
800 			return (0);
801 		break;
802 	case RCTL_SUBJECT_TYPE_JAIL:
803 		if (rule->rr_subject.rs_prison_racct == NULL)
804 			return (0);
805 		break;
806 	default:
807 		panic("rctl_rule_fully_specified: unknown subject type %d",
808 		    rule->rr_subject_type);
809 	}
810 	if (rule->rr_resource == RACCT_UNDEFINED)
811 		return (0);
812 	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
813 		return (0);
814 	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
815 		return (0);
816 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
817 		return (0);
818 
819 	return (1);
820 }
821 
822 static int
823 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
824 {
825 	int error = 0;
826 	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
827 	     *amountstr, *perstr;
828 	struct rctl_rule *rule;
829 	id_t id;
830 
831 	rule = rctl_rule_alloc(M_WAITOK);
832 
833 	subjectstr = strsep(&rulestr, ":");
834 	subject_idstr = strsep(&rulestr, ":");
835 	resourcestr = strsep(&rulestr, ":");
836 	actionstr = strsep(&rulestr, "=/");
837 	amountstr = strsep(&rulestr, "/");
838 	perstr = rulestr;
839 
840 	if (subjectstr == NULL || subjectstr[0] == '\0')
841 		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
842 	else {
843 		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
844 		if (error != 0)
845 			goto out;
846 	}
847 
848 	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
849 		rule->rr_subject.rs_proc = NULL;
850 		rule->rr_subject.rs_uip = NULL;
851 		rule->rr_subject.rs_loginclass = NULL;
852 		rule->rr_subject.rs_prison_racct = NULL;
853 	} else {
854 		switch (rule->rr_subject_type) {
855 		case RCTL_SUBJECT_TYPE_UNDEFINED:
856 			error = EINVAL;
857 			goto out;
858 		case RCTL_SUBJECT_TYPE_PROCESS:
859 			error = str2id(subject_idstr, &id);
860 			if (error != 0)
861 				goto out;
862 			sx_assert(&allproc_lock, SA_LOCKED);
863 			rule->rr_subject.rs_proc = pfind(id);
864 			if (rule->rr_subject.rs_proc == NULL) {
865 				error = ESRCH;
866 				goto out;
867 			}
868 			PROC_UNLOCK(rule->rr_subject.rs_proc);
869 			break;
870 		case RCTL_SUBJECT_TYPE_USER:
871 			error = str2id(subject_idstr, &id);
872 			if (error != 0)
873 				goto out;
874 			rule->rr_subject.rs_uip = uifind(id);
875 			break;
876 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
877 			rule->rr_subject.rs_loginclass =
878 			    loginclass_find(subject_idstr);
879 			if (rule->rr_subject.rs_loginclass == NULL) {
880 				error = ENAMETOOLONG;
881 				goto out;
882 			}
883 			break;
884 		case RCTL_SUBJECT_TYPE_JAIL:
885 			rule->rr_subject.rs_prison_racct =
886 			    prison_racct_find(subject_idstr);
887 			if (rule->rr_subject.rs_prison_racct == NULL) {
888 				error = ENAMETOOLONG;
889 				goto out;
890 			}
891 			break;
892                default:
893                        panic("rctl_string_to_rule: unknown subject type %d",
894                            rule->rr_subject_type);
895                }
896 	}
897 
898 	if (resourcestr == NULL || resourcestr[0] == '\0')
899 		rule->rr_resource = RACCT_UNDEFINED;
900 	else {
901 		error = str2value(resourcestr, &rule->rr_resource,
902 		    resourcenames);
903 		if (error != 0)
904 			goto out;
905 	}
906 
907 	if (actionstr == NULL || actionstr[0] == '\0')
908 		rule->rr_action = RCTL_ACTION_UNDEFINED;
909 	else {
910 		error = str2value(actionstr, &rule->rr_action, actionnames);
911 		if (error != 0)
912 			goto out;
913 	}
914 
915 	if (amountstr == NULL || amountstr[0] == '\0')
916 		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
917 	else {
918 		error = str2int64(amountstr, &rule->rr_amount);
919 		if (error != 0)
920 			goto out;
921 		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
922 			rule->rr_amount *= 1000000;
923 	}
924 
925 	if (perstr == NULL || perstr[0] == '\0')
926 		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
927 	else {
928 		error = str2value(perstr, &rule->rr_per, subjectnames);
929 		if (error != 0)
930 			goto out;
931 	}
932 
933 out:
934 	if (error == 0)
935 		*rulep = rule;
936 	else
937 		rctl_rule_release(rule);
938 
939 	return (error);
940 }
941 
942 /*
943  * Link a rule with all the subjects it applies to.
944  */
945 int
946 rctl_rule_add(struct rctl_rule *rule)
947 {
948 	struct proc *p;
949 	struct ucred *cred;
950 	struct uidinfo *uip;
951 	struct prison *pr;
952 	struct prison_racct *prr;
953 	struct loginclass *lc;
954 	struct rctl_rule *rule2;
955 	int match;
956 
957 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
958 
959 	/*
960 	 * Some rules just don't make sense.  Note that the one below
961 	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
962 	 * for example, is not deniable in the racct sense, but the
963 	 * limit is enforced in a different way, so "deny" rules for %CPU
964 	 * do make sense.
965 	 */
966 	if (rule->rr_action == RCTL_ACTION_DENY &&
967 	    (rule->rr_resource == RACCT_CPU ||
968 	    rule->rr_resource == RACCT_WALLCLOCK))
969 		return (EOPNOTSUPP);
970 
971 	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
972 	    RACCT_IS_SLOPPY(rule->rr_resource))
973 		return (EOPNOTSUPP);
974 
975 	/*
976 	 * Make sure there are no duplicated rules.  Also, for the "deny"
977 	 * rules, remove ones differing only by "amount".
978 	 */
979 	if (rule->rr_action == RCTL_ACTION_DENY) {
980 		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
981 		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
982 		rctl_rule_remove(rule2);
983 		rctl_rule_release(rule2);
984 	} else
985 		rctl_rule_remove(rule);
986 
987 	switch (rule->rr_subject_type) {
988 	case RCTL_SUBJECT_TYPE_PROCESS:
989 		p = rule->rr_subject.rs_proc;
990 		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
991 		/*
992 		 * No resource limits for system processes.
993 		 */
994 		if (p->p_flag & P_SYSTEM)
995 			return (EPERM);
996 
997 		rctl_racct_add_rule(p->p_racct, rule);
998 		/*
999 		 * In case of per-process rule, we don't have anything more
1000 		 * to do.
1001 		 */
1002 		return (0);
1003 
1004 	case RCTL_SUBJECT_TYPE_USER:
1005 		uip = rule->rr_subject.rs_uip;
1006 		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1007 		rctl_racct_add_rule(uip->ui_racct, rule);
1008 		break;
1009 
1010 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1011 		lc = rule->rr_subject.rs_loginclass;
1012 		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1013 		rctl_racct_add_rule(lc->lc_racct, rule);
1014 		break;
1015 
1016 	case RCTL_SUBJECT_TYPE_JAIL:
1017 		prr = rule->rr_subject.rs_prison_racct;
1018 		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1019 		rctl_racct_add_rule(prr->prr_racct, rule);
1020 		break;
1021 
1022 	default:
1023 		panic("rctl_rule_add: unknown subject type %d",
1024 		    rule->rr_subject_type);
1025 	}
1026 
1027 	/*
1028 	 * Now go through all the processes and add the new rule to the ones
1029 	 * it applies to.
1030 	 */
1031 	sx_assert(&allproc_lock, SA_LOCKED);
1032 	FOREACH_PROC_IN_SYSTEM(p) {
1033 		if (p->p_flag & P_SYSTEM)
1034 			continue;
1035 		cred = p->p_ucred;
1036 		switch (rule->rr_subject_type) {
1037 		case RCTL_SUBJECT_TYPE_USER:
1038 			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1039 			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1040 				break;
1041 			continue;
1042 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1043 			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1044 				break;
1045 			continue;
1046 		case RCTL_SUBJECT_TYPE_JAIL:
1047 			match = 0;
1048 			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1049 				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1050 					match = 1;
1051 					break;
1052 				}
1053 			}
1054 			if (match)
1055 				break;
1056 			continue;
1057 		default:
1058 			panic("rctl_rule_add: unknown subject type %d",
1059 			    rule->rr_subject_type);
1060 		}
1061 
1062 		rctl_racct_add_rule(p->p_racct, rule);
1063 	}
1064 
1065 	return (0);
1066 }
1067 
1068 static void
1069 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1070 {
1071 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1072 	int found = 0;
1073 
1074 	rw_wlock(&rctl_lock);
1075 	found += rctl_racct_remove_rules(racct, filter);
1076 	rw_wunlock(&rctl_lock);
1077 
1078 	*((int *)arg3) += found;
1079 }
1080 
1081 /*
1082  * Remove all rules that match the filter.
1083  */
1084 int
1085 rctl_rule_remove(struct rctl_rule *filter)
1086 {
1087 	int found = 0;
1088 	struct proc *p;
1089 
1090 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1091 	    filter->rr_subject.rs_proc != NULL) {
1092 		p = filter->rr_subject.rs_proc;
1093 		rw_wlock(&rctl_lock);
1094 		found = rctl_racct_remove_rules(p->p_racct, filter);
1095 		rw_wunlock(&rctl_lock);
1096 		if (found)
1097 			return (0);
1098 		return (ESRCH);
1099 	}
1100 
1101 	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1102 	    (void *)&found);
1103 	ui_racct_foreach(rctl_rule_remove_callback, filter,
1104 	    (void *)&found);
1105 	prison_racct_foreach(rctl_rule_remove_callback, filter,
1106 	    (void *)&found);
1107 
1108 	sx_assert(&allproc_lock, SA_LOCKED);
1109 	rw_wlock(&rctl_lock);
1110 	FOREACH_PROC_IN_SYSTEM(p) {
1111 		found += rctl_racct_remove_rules(p->p_racct, filter);
1112 	}
1113 	rw_wunlock(&rctl_lock);
1114 
1115 	if (found)
1116 		return (0);
1117 	return (ESRCH);
1118 }
1119 
1120 /*
1121  * Appends a rule to the sbuf.
1122  */
1123 static void
1124 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1125 {
1126 	int64_t amount;
1127 
1128 	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1129 
1130 	switch (rule->rr_subject_type) {
1131 	case RCTL_SUBJECT_TYPE_PROCESS:
1132 		if (rule->rr_subject.rs_proc == NULL)
1133 			sbuf_printf(sb, ":");
1134 		else
1135 			sbuf_printf(sb, "%d:",
1136 			    rule->rr_subject.rs_proc->p_pid);
1137 		break;
1138 	case RCTL_SUBJECT_TYPE_USER:
1139 		if (rule->rr_subject.rs_uip == NULL)
1140 			sbuf_printf(sb, ":");
1141 		else
1142 			sbuf_printf(sb, "%d:",
1143 			    rule->rr_subject.rs_uip->ui_uid);
1144 		break;
1145 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1146 		if (rule->rr_subject.rs_loginclass == NULL)
1147 			sbuf_printf(sb, ":");
1148 		else
1149 			sbuf_printf(sb, "%s:",
1150 			    rule->rr_subject.rs_loginclass->lc_name);
1151 		break;
1152 	case RCTL_SUBJECT_TYPE_JAIL:
1153 		if (rule->rr_subject.rs_prison_racct == NULL)
1154 			sbuf_printf(sb, ":");
1155 		else
1156 			sbuf_printf(sb, "%s:",
1157 			    rule->rr_subject.rs_prison_racct->prr_name);
1158 		break;
1159 	default:
1160 		panic("rctl_rule_to_sbuf: unknown subject type %d",
1161 		    rule->rr_subject_type);
1162 	}
1163 
1164 	amount = rule->rr_amount;
1165 	if (amount != RCTL_AMOUNT_UNDEFINED &&
1166 	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1167 		amount /= 1000000;
1168 
1169 	sbuf_printf(sb, "%s:%s=%jd",
1170 	    rctl_resource_name(rule->rr_resource),
1171 	    rctl_action_name(rule->rr_action),
1172 	    amount);
1173 
1174 	if (rule->rr_per != rule->rr_subject_type)
1175 		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1176 }
1177 
1178 /*
1179  * Routine used by RCTL syscalls to read in input string.
1180  */
1181 static int
1182 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1183 {
1184 	int error;
1185 	char *str;
1186 
1187 	if (inbuflen <= 0)
1188 		return (EINVAL);
1189 
1190 	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1191 	error = copyinstr(inbufp, str, inbuflen, NULL);
1192 	if (error != 0) {
1193 		free(str, M_RCTL);
1194 		return (error);
1195 	}
1196 
1197 	*inputstr = str;
1198 
1199 	return (0);
1200 }
1201 
1202 /*
1203  * Routine used by RCTL syscalls to write out output string.
1204  */
1205 static int
1206 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1207 {
1208 	int error;
1209 
1210 	if (outputsbuf == NULL)
1211 		return (0);
1212 
1213 	sbuf_finish(outputsbuf);
1214 	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1215 		sbuf_delete(outputsbuf);
1216 		return (ERANGE);
1217 	}
1218 	error = copyout(sbuf_data(outputsbuf), outbufp,
1219 	    sbuf_len(outputsbuf) + 1);
1220 	sbuf_delete(outputsbuf);
1221 	return (error);
1222 }
1223 
1224 static struct sbuf *
1225 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1226 {
1227 	int i;
1228 	int64_t amount;
1229 	struct sbuf *sb;
1230 
1231 	sb = sbuf_new_auto();
1232 	for (i = 0; i <= RACCT_MAX; i++) {
1233 		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1234 			continue;
1235 		amount = racct->r_resources[i];
1236 		if (RACCT_IS_IN_MILLIONS(i))
1237 			amount /= 1000000;
1238 		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1239 	}
1240 	sbuf_setpos(sb, sbuf_len(sb) - 1);
1241 	return (sb);
1242 }
1243 
1244 int
1245 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1246 {
1247 	int error;
1248 	char *inputstr;
1249 	struct rctl_rule *filter;
1250 	struct sbuf *outputsbuf = NULL;
1251 	struct proc *p;
1252 	struct uidinfo *uip;
1253 	struct loginclass *lc;
1254 	struct prison_racct *prr;
1255 
1256 	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1257 	if (error != 0)
1258 		return (error);
1259 
1260 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1261 	if (error != 0)
1262 		return (error);
1263 
1264 	sx_slock(&allproc_lock);
1265 	error = rctl_string_to_rule(inputstr, &filter);
1266 	free(inputstr, M_RCTL);
1267 	if (error != 0) {
1268 		sx_sunlock(&allproc_lock);
1269 		return (error);
1270 	}
1271 
1272 	switch (filter->rr_subject_type) {
1273 	case RCTL_SUBJECT_TYPE_PROCESS:
1274 		p = filter->rr_subject.rs_proc;
1275 		if (p == NULL) {
1276 			error = EINVAL;
1277 			goto out;
1278 		}
1279 		if (p->p_flag & P_SYSTEM) {
1280 			error = EINVAL;
1281 			goto out;
1282 		}
1283 		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1284 		break;
1285 	case RCTL_SUBJECT_TYPE_USER:
1286 		uip = filter->rr_subject.rs_uip;
1287 		if (uip == NULL) {
1288 			error = EINVAL;
1289 			goto out;
1290 		}
1291 		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1292 		break;
1293 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1294 		lc = filter->rr_subject.rs_loginclass;
1295 		if (lc == NULL) {
1296 			error = EINVAL;
1297 			goto out;
1298 		}
1299 		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1300 		break;
1301 	case RCTL_SUBJECT_TYPE_JAIL:
1302 		prr = filter->rr_subject.rs_prison_racct;
1303 		if (prr == NULL) {
1304 			error = EINVAL;
1305 			goto out;
1306 		}
1307 		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1308 		break;
1309 	default:
1310 		error = EINVAL;
1311 	}
1312 out:
1313 	rctl_rule_release(filter);
1314 	sx_sunlock(&allproc_lock);
1315 	if (error != 0)
1316 		return (error);
1317 
1318 	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1319 
1320 	return (error);
1321 }
1322 
1323 static void
1324 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1325 {
1326 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1327 	struct rctl_rule_link *link;
1328 	struct sbuf *sb = (struct sbuf *)arg3;
1329 
1330 	rw_rlock(&rctl_lock);
1331 	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1332 		if (!rctl_rule_matches(link->rrl_rule, filter))
1333 			continue;
1334 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1335 		sbuf_printf(sb, ",");
1336 	}
1337 	rw_runlock(&rctl_lock);
1338 }
1339 
1340 int
1341 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1342 {
1343 	int error;
1344 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1345 	char *inputstr, *buf;
1346 	struct sbuf *sb;
1347 	struct rctl_rule *filter;
1348 	struct rctl_rule_link *link;
1349 	struct proc *p;
1350 
1351 	error = priv_check(td, PRIV_RCTL_GET_RULES);
1352 	if (error != 0)
1353 		return (error);
1354 
1355 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1356 	if (error != 0)
1357 		return (error);
1358 
1359 	sx_slock(&allproc_lock);
1360 	error = rctl_string_to_rule(inputstr, &filter);
1361 	free(inputstr, M_RCTL);
1362 	if (error != 0) {
1363 		sx_sunlock(&allproc_lock);
1364 		return (error);
1365 	}
1366 
1367 again:
1368 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1369 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1370 	KASSERT(sb != NULL, ("sbuf_new failed"));
1371 
1372 	sx_assert(&allproc_lock, SA_LOCKED);
1373 	FOREACH_PROC_IN_SYSTEM(p) {
1374 		rw_rlock(&rctl_lock);
1375 		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1376 			/*
1377 			 * Non-process rules will be added to the buffer later.
1378 			 * Adding them here would result in duplicated output.
1379 			 */
1380 			if (link->rrl_rule->rr_subject_type !=
1381 			    RCTL_SUBJECT_TYPE_PROCESS)
1382 				continue;
1383 			if (!rctl_rule_matches(link->rrl_rule, filter))
1384 				continue;
1385 			rctl_rule_to_sbuf(sb, link->rrl_rule);
1386 			sbuf_printf(sb, ",");
1387 		}
1388 		rw_runlock(&rctl_lock);
1389 	}
1390 
1391 	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1392 	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1393 	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1394 	if (sbuf_error(sb) == ENOMEM) {
1395 		sbuf_delete(sb);
1396 		free(buf, M_RCTL);
1397 		bufsize *= 4;
1398 		goto again;
1399 	}
1400 
1401 	/*
1402 	 * Remove trailing ",".
1403 	 */
1404 	if (sbuf_len(sb) > 0)
1405 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1406 
1407 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1408 
1409 	rctl_rule_release(filter);
1410 	sx_sunlock(&allproc_lock);
1411 	free(buf, M_RCTL);
1412 	return (error);
1413 }
1414 
1415 int
1416 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1417 {
1418 	int error;
1419 	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1420 	char *inputstr, *buf;
1421 	struct sbuf *sb;
1422 	struct rctl_rule *filter;
1423 	struct rctl_rule_link *link;
1424 
1425 	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1426 	if (error != 0)
1427 		return (error);
1428 
1429 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1430 	if (error != 0)
1431 		return (error);
1432 
1433 	sx_slock(&allproc_lock);
1434 	error = rctl_string_to_rule(inputstr, &filter);
1435 	free(inputstr, M_RCTL);
1436 	if (error != 0) {
1437 		sx_sunlock(&allproc_lock);
1438 		return (error);
1439 	}
1440 
1441 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1442 		rctl_rule_release(filter);
1443 		sx_sunlock(&allproc_lock);
1444 		return (EINVAL);
1445 	}
1446 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1447 		rctl_rule_release(filter);
1448 		sx_sunlock(&allproc_lock);
1449 		return (EOPNOTSUPP);
1450 	}
1451 	if (filter->rr_subject.rs_proc == NULL) {
1452 		rctl_rule_release(filter);
1453 		sx_sunlock(&allproc_lock);
1454 		return (EINVAL);
1455 	}
1456 
1457 again:
1458 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1459 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1460 	KASSERT(sb != NULL, ("sbuf_new failed"));
1461 
1462 	rw_rlock(&rctl_lock);
1463 	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1464 	    rrl_next) {
1465 		rctl_rule_to_sbuf(sb, link->rrl_rule);
1466 		sbuf_printf(sb, ",");
1467 	}
1468 	rw_runlock(&rctl_lock);
1469 	if (sbuf_error(sb) == ENOMEM) {
1470 		sbuf_delete(sb);
1471 		free(buf, M_RCTL);
1472 		bufsize *= 4;
1473 		goto again;
1474 	}
1475 
1476 	/*
1477 	 * Remove trailing ",".
1478 	 */
1479 	if (sbuf_len(sb) > 0)
1480 		sbuf_setpos(sb, sbuf_len(sb) - 1);
1481 
1482 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1483 	rctl_rule_release(filter);
1484 	sx_sunlock(&allproc_lock);
1485 	free(buf, M_RCTL);
1486 	return (error);
1487 }
1488 
1489 int
1490 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1491 {
1492 	int error;
1493 	struct rctl_rule *rule;
1494 	char *inputstr;
1495 
1496 	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1497 	if (error != 0)
1498 		return (error);
1499 
1500 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1501 	if (error != 0)
1502 		return (error);
1503 
1504 	sx_slock(&allproc_lock);
1505 	error = rctl_string_to_rule(inputstr, &rule);
1506 	free(inputstr, M_RCTL);
1507 	if (error != 0) {
1508 		sx_sunlock(&allproc_lock);
1509 		return (error);
1510 	}
1511 	/*
1512 	 * The 'per' part of a rule is optional.
1513 	 */
1514 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1515 	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1516 		rule->rr_per = rule->rr_subject_type;
1517 
1518 	if (!rctl_rule_fully_specified(rule)) {
1519 		error = EINVAL;
1520 		goto out;
1521 	}
1522 
1523 	error = rctl_rule_add(rule);
1524 
1525 out:
1526 	rctl_rule_release(rule);
1527 	sx_sunlock(&allproc_lock);
1528 	return (error);
1529 }
1530 
1531 int
1532 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1533 {
1534 	int error;
1535 	struct rctl_rule *filter;
1536 	char *inputstr;
1537 
1538 	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1539 	if (error != 0)
1540 		return (error);
1541 
1542 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1543 	if (error != 0)
1544 		return (error);
1545 
1546 	sx_slock(&allproc_lock);
1547 	error = rctl_string_to_rule(inputstr, &filter);
1548 	free(inputstr, M_RCTL);
1549 	if (error != 0) {
1550 		sx_sunlock(&allproc_lock);
1551 		return (error);
1552 	}
1553 
1554 	error = rctl_rule_remove(filter);
1555 	rctl_rule_release(filter);
1556 	sx_sunlock(&allproc_lock);
1557 
1558 	return (error);
1559 }
1560 
1561 /*
1562  * Update RCTL rule list after credential change.
1563  */
1564 void
1565 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1566 {
1567 	int rulecnt, i;
1568 	struct rctl_rule_link *link, *newlink;
1569 	struct uidinfo *newuip;
1570 	struct loginclass *newlc;
1571 	struct prison_racct *newprr;
1572 	LIST_HEAD(, rctl_rule_link) newrules;
1573 
1574 	newuip = newcred->cr_ruidinfo;
1575 	newlc = newcred->cr_loginclass;
1576 	newprr = newcred->cr_prison->pr_prison_racct;
1577 
1578 	LIST_INIT(&newrules);
1579 
1580 again:
1581 	/*
1582 	 * First, count the rules that apply to the process with new
1583 	 * credentials.
1584 	 */
1585 	rulecnt = 0;
1586 	rw_rlock(&rctl_lock);
1587 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1588 		if (link->rrl_rule->rr_subject_type ==
1589 		    RCTL_SUBJECT_TYPE_PROCESS)
1590 			rulecnt++;
1591 	}
1592 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1593 		rulecnt++;
1594 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1595 		rulecnt++;
1596 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1597 		rulecnt++;
1598 	rw_runlock(&rctl_lock);
1599 
1600 	/*
1601 	 * Create temporary list.  We've dropped the rctl_lock in order
1602 	 * to use M_WAITOK.
1603 	 */
1604 	for (i = 0; i < rulecnt; i++) {
1605 		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1606 		newlink->rrl_rule = NULL;
1607 		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1608 	}
1609 
1610 	newlink = LIST_FIRST(&newrules);
1611 
1612 	/*
1613 	 * Assign rules to the newly allocated list entries.
1614 	 */
1615 	rw_wlock(&rctl_lock);
1616 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1617 		if (link->rrl_rule->rr_subject_type ==
1618 		    RCTL_SUBJECT_TYPE_PROCESS) {
1619 			if (newlink == NULL)
1620 				goto goaround;
1621 			rctl_rule_acquire(link->rrl_rule);
1622 			newlink->rrl_rule = link->rrl_rule;
1623 			newlink = LIST_NEXT(newlink, rrl_next);
1624 			rulecnt--;
1625 		}
1626 	}
1627 
1628 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1629 		if (newlink == NULL)
1630 			goto goaround;
1631 		rctl_rule_acquire(link->rrl_rule);
1632 		newlink->rrl_rule = link->rrl_rule;
1633 		newlink = LIST_NEXT(newlink, rrl_next);
1634 		rulecnt--;
1635 	}
1636 
1637 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1638 		if (newlink == NULL)
1639 			goto goaround;
1640 		rctl_rule_acquire(link->rrl_rule);
1641 		newlink->rrl_rule = link->rrl_rule;
1642 		newlink = LIST_NEXT(newlink, rrl_next);
1643 		rulecnt--;
1644 	}
1645 
1646 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1647 		if (newlink == NULL)
1648 			goto goaround;
1649 		rctl_rule_acquire(link->rrl_rule);
1650 		newlink->rrl_rule = link->rrl_rule;
1651 		newlink = LIST_NEXT(newlink, rrl_next);
1652 		rulecnt--;
1653 	}
1654 
1655 	if (rulecnt == 0) {
1656 		/*
1657 		 * Free the old rule list.
1658 		 */
1659 		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1660 			link = LIST_FIRST(&p->p_racct->r_rule_links);
1661 			LIST_REMOVE(link, rrl_next);
1662 			rctl_rule_release(link->rrl_rule);
1663 			uma_zfree(rctl_rule_link_zone, link);
1664 		}
1665 
1666 		/*
1667 		 * Replace lists and we're done.
1668 		 *
1669 		 * XXX: Is there any way to switch list heads instead
1670 		 *      of iterating here?
1671 		 */
1672 		while (!LIST_EMPTY(&newrules)) {
1673 			newlink = LIST_FIRST(&newrules);
1674 			LIST_REMOVE(newlink, rrl_next);
1675 			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1676 			    newlink, rrl_next);
1677 		}
1678 
1679 		rw_wunlock(&rctl_lock);
1680 
1681 		return;
1682 	}
1683 
1684 goaround:
1685 	rw_wunlock(&rctl_lock);
1686 
1687 	/*
1688 	 * Rule list changed while we were not holding the rctl_lock.
1689 	 * Free the new list and try again.
1690 	 */
1691 	while (!LIST_EMPTY(&newrules)) {
1692 		newlink = LIST_FIRST(&newrules);
1693 		LIST_REMOVE(newlink, rrl_next);
1694 		if (newlink->rrl_rule != NULL)
1695 			rctl_rule_release(newlink->rrl_rule);
1696 		uma_zfree(rctl_rule_link_zone, newlink);
1697 	}
1698 
1699 	goto again;
1700 }
1701 
1702 /*
1703  * Assign RCTL rules to the newly created process.
1704  */
1705 int
1706 rctl_proc_fork(struct proc *parent, struct proc *child)
1707 {
1708 	int error;
1709 	struct rctl_rule_link *link;
1710 	struct rctl_rule *rule;
1711 
1712 	LIST_INIT(&child->p_racct->r_rule_links);
1713 
1714 	/*
1715 	 * No limits for kernel processes.
1716 	 */
1717 	if (child->p_flag & P_SYSTEM)
1718 		return (0);
1719 
1720 	/*
1721 	 * Nothing to inherit from P_SYSTEM parents.
1722 	 */
1723 	if (parent->p_racct == NULL) {
1724 		KASSERT(parent->p_flag & P_SYSTEM,
1725 		    ("non-system process without racct; p = %p", parent));
1726 		return (0);
1727 	}
1728 
1729 	rw_wlock(&rctl_lock);
1730 
1731 	/*
1732 	 * Go through limits applicable to the parent and assign them
1733 	 * to the child.  Rules with 'process' subject have to be duplicated
1734 	 * in order to make their rr_subject point to the new process.
1735 	 */
1736 	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1737 		if (link->rrl_rule->rr_subject_type ==
1738 		    RCTL_SUBJECT_TYPE_PROCESS) {
1739 			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1740 			if (rule == NULL)
1741 				goto fail;
1742 			KASSERT(rule->rr_subject.rs_proc == parent,
1743 			    ("rule->rr_subject.rs_proc != parent"));
1744 			rule->rr_subject.rs_proc = child;
1745 			error = rctl_racct_add_rule_locked(child->p_racct,
1746 			    rule);
1747 			rctl_rule_release(rule);
1748 			if (error != 0)
1749 				goto fail;
1750 		} else {
1751 			error = rctl_racct_add_rule_locked(child->p_racct,
1752 			    link->rrl_rule);
1753 			if (error != 0)
1754 				goto fail;
1755 		}
1756 	}
1757 
1758 	rw_wunlock(&rctl_lock);
1759 	return (0);
1760 
1761 fail:
1762 	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1763 		link = LIST_FIRST(&child->p_racct->r_rule_links);
1764 		LIST_REMOVE(link, rrl_next);
1765 		rctl_rule_release(link->rrl_rule);
1766 		uma_zfree(rctl_rule_link_zone, link);
1767 	}
1768 	rw_wunlock(&rctl_lock);
1769 	return (EAGAIN);
1770 }
1771 
1772 /*
1773  * Release rules attached to the racct.
1774  */
1775 void
1776 rctl_racct_release(struct racct *racct)
1777 {
1778 	struct rctl_rule_link *link;
1779 
1780 	rw_wlock(&rctl_lock);
1781 	while (!LIST_EMPTY(&racct->r_rule_links)) {
1782 		link = LIST_FIRST(&racct->r_rule_links);
1783 		LIST_REMOVE(link, rrl_next);
1784 		rctl_rule_release(link->rrl_rule);
1785 		uma_zfree(rctl_rule_link_zone, link);
1786 	}
1787 	rw_wunlock(&rctl_lock);
1788 }
1789 
1790 static void
1791 rctl_init(void)
1792 {
1793 
1794 	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1795 	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1796 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1797 	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1798 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1799 }
1800 
1801 #else /* !RCTL */
1802 
1803 int
1804 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1805 {
1806 
1807 	return (ENOSYS);
1808 }
1809 
1810 int
1811 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1812 {
1813 
1814 	return (ENOSYS);
1815 }
1816 
1817 int
1818 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1819 {
1820 
1821 	return (ENOSYS);
1822 }
1823 
1824 int
1825 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1826 {
1827 
1828 	return (ENOSYS);
1829 }
1830 
1831 int
1832 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1833 {
1834 
1835 	return (ENOSYS);
1836 }
1837 
1838 #endif /* !RCTL */
1839