xref: /freebsd/sys/kern/kern_racct.c (revision 675be9115aae86ad6b3d877155d4fd7822892105)
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_kdtrace.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/eventhandler.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/kthread.h>
43 #include <sys/lock.h>
44 #include <sys/loginclass.h>
45 #include <sys/malloc.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/racct.h>
49 #include <sys/resourcevar.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/sdt.h>
53 #include <sys/sx.h>
54 #include <sys/sysent.h>
55 #include <sys/sysproto.h>
56 #include <sys/umtx.h>
57 
58 #ifdef RCTL
59 #include <sys/rctl.h>
60 #endif
61 
62 #ifdef RACCT
63 
64 FEATURE(racct, "Resource Accounting");
65 
66 static struct mtx racct_lock;
67 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
68 
69 static uma_zone_t racct_zone;
70 
71 static void racct_sub_racct(struct racct *dest, const struct racct *src);
72 static void racct_sub_cred_locked(struct ucred *cred, int resource,
73 		uint64_t amount);
74 static void racct_add_cred_locked(struct ucred *cred, int resource,
75 		uint64_t amount);
76 
77 SDT_PROVIDER_DEFINE(racct);
78 SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
79     "uint64_t");
80 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
81     "struct proc *", "int", "uint64_t");
82 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
83     "int", "uint64_t");
84 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
85     "int", "uint64_t");
86 SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
87     "uint64_t");
88 SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
89     "struct proc *", "int", "uint64_t");
90 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
91     "uint64_t");
92 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
93     "int", "uint64_t");
94 SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
95 SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
96 SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
97     "struct racct *");
98 SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
99     "struct racct *", "struct racct *");
100 SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
101     "struct racct *");
102 
103 int racct_types[] = {
104 	[RACCT_CPU] =
105 		RACCT_IN_MILLIONS,
106 	[RACCT_DATA] =
107 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
108 	[RACCT_STACK] =
109 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
110 	[RACCT_CORE] =
111 		RACCT_DENIABLE,
112 	[RACCT_RSS] =
113 		RACCT_RECLAIMABLE,
114 	[RACCT_MEMLOCK] =
115 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
116 	[RACCT_NPROC] =
117 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
118 	[RACCT_NOFILE] =
119 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
120 	[RACCT_VMEM] =
121 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
122 	[RACCT_NPTS] =
123 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
124 	[RACCT_SWAP] =
125 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
126 	[RACCT_NTHR] =
127 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
128 	[RACCT_MSGQQUEUED] =
129 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
130 	[RACCT_MSGQSIZE] =
131 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
132 	[RACCT_NMSGQ] =
133 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
134 	[RACCT_NSEM] =
135 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
136 	[RACCT_NSEMOP] =
137 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
138 	[RACCT_NSHM] =
139 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
140 	[RACCT_SHMSIZE] =
141 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
142 	[RACCT_WALLCLOCK] =
143 		RACCT_IN_MILLIONS };
144 
145 static void
146 racct_add_racct(struct racct *dest, const struct racct *src)
147 {
148 	int i;
149 
150 	mtx_assert(&racct_lock, MA_OWNED);
151 
152 	/*
153 	 * Update resource usage in dest.
154 	 */
155 	for (i = 0; i <= RACCT_MAX; i++) {
156 		KASSERT(dest->r_resources[i] >= 0,
157 		    ("racct propagation meltdown: dest < 0"));
158 		KASSERT(src->r_resources[i] >= 0,
159 		    ("racct propagation meltdown: src < 0"));
160 		dest->r_resources[i] += src->r_resources[i];
161 	}
162 }
163 
164 static void
165 racct_sub_racct(struct racct *dest, const struct racct *src)
166 {
167 	int i;
168 
169 	mtx_assert(&racct_lock, MA_OWNED);
170 
171 	/*
172 	 * Update resource usage in dest.
173 	 */
174 	for (i = 0; i <= RACCT_MAX; i++) {
175 		if (!RACCT_IS_SLOPPY(i)) {
176 			KASSERT(dest->r_resources[i] >= 0,
177 			    ("racct propagation meltdown: dest < 0"));
178 			KASSERT(src->r_resources[i] >= 0,
179 			    ("racct propagation meltdown: src < 0"));
180 			KASSERT(src->r_resources[i] <= dest->r_resources[i],
181 			    ("racct propagation meltdown: src > dest"));
182 		}
183 		if (RACCT_IS_RECLAIMABLE(i)) {
184 			dest->r_resources[i] -= src->r_resources[i];
185 			if (dest->r_resources[i] < 0) {
186 				KASSERT(RACCT_IS_SLOPPY(i),
187 				    ("racct_sub_racct: usage < 0"));
188 				dest->r_resources[i] = 0;
189 			}
190 		}
191 	}
192 }
193 
194 void
195 racct_create(struct racct **racctp)
196 {
197 
198 	SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
199 
200 	KASSERT(*racctp == NULL, ("racct already allocated"));
201 
202 	*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
203 }
204 
205 static void
206 racct_destroy_locked(struct racct **racctp)
207 {
208 	int i;
209 	struct racct *racct;
210 
211 	SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
212 
213 	mtx_assert(&racct_lock, MA_OWNED);
214 	KASSERT(racctp != NULL, ("NULL racctp"));
215 	KASSERT(*racctp != NULL, ("NULL racct"));
216 
217 	racct = *racctp;
218 
219 	for (i = 0; i <= RACCT_MAX; i++) {
220 		if (RACCT_IS_SLOPPY(i))
221 			continue;
222 		if (!RACCT_IS_RECLAIMABLE(i))
223 			continue;
224 		KASSERT(racct->r_resources[i] == 0,
225 		    ("destroying non-empty racct: "
226 		    "%ju allocated for resource %d\n",
227 		    racct->r_resources[i], i));
228 	}
229 	uma_zfree(racct_zone, racct);
230 	*racctp = NULL;
231 }
232 
233 void
234 racct_destroy(struct racct **racct)
235 {
236 
237 	mtx_lock(&racct_lock);
238 	racct_destroy_locked(racct);
239 	mtx_unlock(&racct_lock);
240 }
241 
242 /*
243  * Increase consumption of 'resource' by 'amount' for 'racct'
244  * and all its parents.  Differently from other cases, 'amount' here
245  * may be less than zero.
246  */
247 static void
248 racct_alloc_resource(struct racct *racct, int resource,
249     uint64_t amount)
250 {
251 
252 	mtx_assert(&racct_lock, MA_OWNED);
253 	KASSERT(racct != NULL, ("NULL racct"));
254 
255 	racct->r_resources[resource] += amount;
256 	if (racct->r_resources[resource] < 0) {
257 		KASSERT(RACCT_IS_SLOPPY(resource),
258 		    ("racct_alloc_resource: usage < 0"));
259 		racct->r_resources[resource] = 0;
260 	}
261 }
262 
263 static int
264 racct_add_locked(struct proc *p, int resource, uint64_t amount)
265 {
266 #ifdef RCTL
267 	int error;
268 #endif
269 
270 	if (p->p_flag & P_SYSTEM)
271 		return (0);
272 
273 	SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
274 
275 	/*
276 	 * We need proc lock to dereference p->p_ucred.
277 	 */
278 	PROC_LOCK_ASSERT(p, MA_OWNED);
279 
280 #ifdef RCTL
281 	error = rctl_enforce(p, resource, amount);
282 	if (error && RACCT_IS_DENIABLE(resource)) {
283 		SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
284 		    amount, 0, 0);
285 		return (error);
286 	}
287 #endif
288 	racct_alloc_resource(p->p_racct, resource, amount);
289 	racct_add_cred_locked(p->p_ucred, resource, amount);
290 
291 	return (0);
292 }
293 
294 /*
295  * Increase allocation of 'resource' by 'amount' for process 'p'.
296  * Return 0 if it's below limits, or errno, if it's not.
297  */
298 int
299 racct_add(struct proc *p, int resource, uint64_t amount)
300 {
301 	int error;
302 
303 	mtx_lock(&racct_lock);
304 	error = racct_add_locked(p, resource, amount);
305 	mtx_unlock(&racct_lock);
306 	return (error);
307 }
308 
309 static void
310 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
311 {
312 	struct prison *pr;
313 
314 	SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
315 	    0, 0);
316 
317 	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
318 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
319 		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
320 		    amount);
321 	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
322 }
323 
324 /*
325  * Increase allocation of 'resource' by 'amount' for credential 'cred'.
326  * Doesn't check for limits and never fails.
327  *
328  * XXX: Shouldn't this ever return an error?
329  */
330 void
331 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
332 {
333 
334 	mtx_lock(&racct_lock);
335 	racct_add_cred_locked(cred, resource, amount);
336 	mtx_unlock(&racct_lock);
337 }
338 
339 /*
340  * Increase allocation of 'resource' by 'amount' for process 'p'.
341  * Doesn't check for limits and never fails.
342  */
343 void
344 racct_add_force(struct proc *p, int resource, uint64_t amount)
345 {
346 
347 	if (p->p_flag & P_SYSTEM)
348 		return;
349 
350 	SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
351 
352 	/*
353 	 * We need proc lock to dereference p->p_ucred.
354 	 */
355 	PROC_LOCK_ASSERT(p, MA_OWNED);
356 
357 	mtx_lock(&racct_lock);
358 	racct_alloc_resource(p->p_racct, resource, amount);
359 	mtx_unlock(&racct_lock);
360 	racct_add_cred(p->p_ucred, resource, amount);
361 }
362 
363 static int
364 racct_set_locked(struct proc *p, int resource, uint64_t amount)
365 {
366 	int64_t diff;
367 #ifdef RCTL
368 	int error;
369 #endif
370 
371 	if (p->p_flag & P_SYSTEM)
372 		return (0);
373 
374 	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
375 
376 	/*
377 	 * We need proc lock to dereference p->p_ucred.
378 	 */
379 	PROC_LOCK_ASSERT(p, MA_OWNED);
380 
381 	diff = amount - p->p_racct->r_resources[resource];
382 #ifdef notyet
383 	KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource),
384 	    ("racct_set: usage of non-reclaimable resource %d dropping",
385 	     resource));
386 #endif
387 #ifdef RCTL
388 	if (diff > 0) {
389 		error = rctl_enforce(p, resource, diff);
390 		if (error && RACCT_IS_DENIABLE(resource)) {
391 			SDT_PROBE(racct, kernel, rusage, set_failure, p,
392 			    resource, amount, 0, 0);
393 			return (error);
394 		}
395 	}
396 #endif
397 	racct_alloc_resource(p->p_racct, resource, diff);
398 	if (diff > 0)
399 		racct_add_cred_locked(p->p_ucred, resource, diff);
400 	else if (diff < 0)
401 		racct_sub_cred_locked(p->p_ucred, resource, -diff);
402 
403 	return (0);
404 }
405 
406 /*
407  * Set allocation of 'resource' to 'amount' for process 'p'.
408  * Return 0 if it's below limits, or errno, if it's not.
409  *
410  * Note that decreasing the allocation always returns 0,
411  * even if it's above the limit.
412  */
413 int
414 racct_set(struct proc *p, int resource, uint64_t amount)
415 {
416 	int error;
417 
418 	mtx_lock(&racct_lock);
419 	error = racct_set_locked(p, resource, amount);
420 	mtx_unlock(&racct_lock);
421 	return (error);
422 }
423 
424 void
425 racct_set_force(struct proc *p, int resource, uint64_t amount)
426 {
427 	int64_t diff;
428 
429 	if (p->p_flag & P_SYSTEM)
430 		return;
431 
432 	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
433 
434 	/*
435 	 * We need proc lock to dereference p->p_ucred.
436 	 */
437 	PROC_LOCK_ASSERT(p, MA_OWNED);
438 
439 	mtx_lock(&racct_lock);
440 	diff = amount - p->p_racct->r_resources[resource];
441 	racct_alloc_resource(p->p_racct, resource, diff);
442 	if (diff > 0)
443 		racct_add_cred_locked(p->p_ucred, resource, diff);
444 	else if (diff < 0)
445 		racct_sub_cred_locked(p->p_ucred, resource, -diff);
446 	mtx_unlock(&racct_lock);
447 }
448 
449 /*
450  * Returns amount of 'resource' the process 'p' can keep allocated.
451  * Allocating more than that would be denied, unless the resource
452  * is marked undeniable.  Amount of already allocated resource does
453  * not matter.
454  */
455 uint64_t
456 racct_get_limit(struct proc *p, int resource)
457 {
458 
459 #ifdef RCTL
460 	return (rctl_get_limit(p, resource));
461 #else
462 	return (UINT64_MAX);
463 #endif
464 }
465 
466 /*
467  * Returns amount of 'resource' the process 'p' can keep allocated.
468  * Allocating more than that would be denied, unless the resource
469  * is marked undeniable.  Amount of already allocated resource does
470  * matter.
471  */
472 uint64_t
473 racct_get_available(struct proc *p, int resource)
474 {
475 
476 #ifdef RCTL
477 	return (rctl_get_available(p, resource));
478 #else
479 	return (UINT64_MAX);
480 #endif
481 }
482 
483 /*
484  * Decrease allocation of 'resource' by 'amount' for process 'p'.
485  */
486 void
487 racct_sub(struct proc *p, int resource, uint64_t amount)
488 {
489 
490 	if (p->p_flag & P_SYSTEM)
491 		return;
492 
493 	SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
494 
495 	/*
496 	 * We need proc lock to dereference p->p_ucred.
497 	 */
498 	PROC_LOCK_ASSERT(p, MA_OWNED);
499 	KASSERT(RACCT_IS_RECLAIMABLE(resource),
500 	    ("racct_sub: called for non-reclaimable resource %d", resource));
501 
502 	mtx_lock(&racct_lock);
503 	KASSERT(amount <= p->p_racct->r_resources[resource],
504 	    ("racct_sub: freeing %ju of resource %d, which is more "
505 	     "than allocated %jd for %s (pid %d)", amount, resource,
506 	    (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
507 
508 	racct_alloc_resource(p->p_racct, resource, -amount);
509 	racct_sub_cred_locked(p->p_ucred, resource, amount);
510 	mtx_unlock(&racct_lock);
511 }
512 
513 static void
514 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
515 {
516 	struct prison *pr;
517 
518 	SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
519 	    0, 0);
520 
521 #ifdef notyet
522 	KASSERT(RACCT_IS_RECLAIMABLE(resource),
523 	    ("racct_sub_cred: called for non-reclaimable resource %d",
524 	     resource));
525 #endif
526 
527 	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
528 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
529 		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
530 		    -amount);
531 	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
532 }
533 
534 /*
535  * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
536  */
537 void
538 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
539 {
540 
541 	mtx_lock(&racct_lock);
542 	racct_sub_cred_locked(cred, resource, amount);
543 	mtx_unlock(&racct_lock);
544 }
545 
546 /*
547  * Inherit resource usage information from the parent process.
548  */
549 int
550 racct_proc_fork(struct proc *parent, struct proc *child)
551 {
552 	int i, error = 0;
553 
554 	/*
555 	 * Create racct for the child process.
556 	 */
557 	racct_create(&child->p_racct);
558 
559 	/*
560 	 * No resource accounting for kernel processes.
561 	 */
562 	if (child->p_flag & P_SYSTEM)
563 		return (0);
564 
565 	PROC_LOCK(parent);
566 	PROC_LOCK(child);
567 	mtx_lock(&racct_lock);
568 
569 #ifdef RCTL
570 	error = rctl_proc_fork(parent, child);
571 	if (error != 0)
572 		goto out;
573 #endif
574 
575 	/*
576 	 * Inherit resource usage.
577 	 */
578 	for (i = 0; i <= RACCT_MAX; i++) {
579 		if (parent->p_racct->r_resources[i] == 0 ||
580 		    !RACCT_IS_INHERITABLE(i))
581 			continue;
582 
583 		error = racct_set_locked(child, i,
584 		    parent->p_racct->r_resources[i]);
585 		if (error != 0)
586 			goto out;
587 	}
588 
589 	error = racct_add_locked(child, RACCT_NPROC, 1);
590 	error += racct_add_locked(child, RACCT_NTHR, 1);
591 
592 out:
593 	mtx_unlock(&racct_lock);
594 	PROC_UNLOCK(child);
595 	PROC_UNLOCK(parent);
596 
597 	return (error);
598 }
599 
600 /*
601  * Called at the end of fork1(), to handle rules that require the process
602  * to be fully initialized.
603  */
604 void
605 racct_proc_fork_done(struct proc *child)
606 {
607 
608 #ifdef RCTL
609 	PROC_LOCK(child);
610 	mtx_lock(&racct_lock);
611 	rctl_enforce(child, RACCT_NPROC, 0);
612 	rctl_enforce(child, RACCT_NTHR, 0);
613 	mtx_unlock(&racct_lock);
614 	PROC_UNLOCK(child);
615 #endif
616 }
617 
618 void
619 racct_proc_exit(struct proc *p)
620 {
621 	int i;
622 	uint64_t runtime;
623 
624 	PROC_LOCK(p);
625 	/*
626 	 * We don't need to calculate rux, proc_reap() has already done this.
627 	 */
628 	runtime = cputick2usec(p->p_rux.rux_runtime);
629 #ifdef notyet
630 	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
631 #else
632 	if (runtime < p->p_prev_runtime)
633 		runtime = p->p_prev_runtime;
634 #endif
635 	mtx_lock(&racct_lock);
636 	racct_set_locked(p, RACCT_CPU, runtime);
637 
638 	for (i = 0; i <= RACCT_MAX; i++) {
639 		if (p->p_racct->r_resources[i] == 0)
640 			continue;
641 	    	if (!RACCT_IS_RECLAIMABLE(i))
642 			continue;
643 		racct_set_locked(p, i, 0);
644 	}
645 
646 	mtx_unlock(&racct_lock);
647 	PROC_UNLOCK(p);
648 
649 #ifdef RCTL
650 	rctl_racct_release(p->p_racct);
651 #endif
652 	racct_destroy(&p->p_racct);
653 }
654 
655 /*
656  * Called after credentials change, to move resource utilisation
657  * between raccts.
658  */
659 void
660 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
661     struct ucred *newcred)
662 {
663 	struct uidinfo *olduip, *newuip;
664 	struct loginclass *oldlc, *newlc;
665 	struct prison *oldpr, *newpr, *pr;
666 
667 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
668 
669 	newuip = newcred->cr_ruidinfo;
670 	olduip = oldcred->cr_ruidinfo;
671 	newlc = newcred->cr_loginclass;
672 	oldlc = oldcred->cr_loginclass;
673 	newpr = newcred->cr_prison;
674 	oldpr = oldcred->cr_prison;
675 
676 	mtx_lock(&racct_lock);
677 	if (newuip != olduip) {
678 		racct_sub_racct(olduip->ui_racct, p->p_racct);
679 		racct_add_racct(newuip->ui_racct, p->p_racct);
680 	}
681 	if (newlc != oldlc) {
682 		racct_sub_racct(oldlc->lc_racct, p->p_racct);
683 		racct_add_racct(newlc->lc_racct, p->p_racct);
684 	}
685 	if (newpr != oldpr) {
686 		for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
687 			racct_sub_racct(pr->pr_prison_racct->prr_racct,
688 			    p->p_racct);
689 		for (pr = newpr; pr != NULL; pr = pr->pr_parent)
690 			racct_add_racct(pr->pr_prison_racct->prr_racct,
691 			    p->p_racct);
692 	}
693 	mtx_unlock(&racct_lock);
694 
695 #ifdef RCTL
696 	rctl_proc_ucred_changed(p, newcred);
697 #endif
698 }
699 
700 static void
701 racctd(void)
702 {
703 	struct thread *td;
704 	struct proc *p;
705 	struct timeval wallclock;
706 	uint64_t runtime;
707 
708 	for (;;) {
709 		sx_slock(&allproc_lock);
710 
711 		FOREACH_PROC_IN_SYSTEM(p) {
712 			if (p->p_state != PRS_NORMAL)
713 				continue;
714 			if (p->p_flag & P_SYSTEM)
715 				continue;
716 
717 			microuptime(&wallclock);
718 			timevalsub(&wallclock, &p->p_stats->p_start);
719 			PROC_LOCK(p);
720 			PROC_SLOCK(p);
721 			FOREACH_THREAD_IN_PROC(p, td) {
722 				ruxagg(p, td);
723 				thread_lock(td);
724 				thread_unlock(td);
725 			}
726 			runtime = cputick2usec(p->p_rux.rux_runtime);
727 			PROC_SUNLOCK(p);
728 #ifdef notyet
729 			KASSERT(runtime >= p->p_prev_runtime,
730 			    ("runtime < p_prev_runtime"));
731 #else
732 			if (runtime < p->p_prev_runtime)
733 				runtime = p->p_prev_runtime;
734 #endif
735 			p->p_prev_runtime = runtime;
736 			mtx_lock(&racct_lock);
737 			racct_set_locked(p, RACCT_CPU, runtime);
738 			racct_set_locked(p, RACCT_WALLCLOCK,
739 			    wallclock.tv_sec * 1000000 + wallclock.tv_usec);
740 			mtx_unlock(&racct_lock);
741 			PROC_UNLOCK(p);
742 		}
743 		sx_sunlock(&allproc_lock);
744 		pause("-", hz);
745 	}
746 }
747 
748 static struct kproc_desc racctd_kp = {
749 	"racctd",
750 	racctd,
751 	NULL
752 };
753 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
754 
755 static void
756 racct_init(void)
757 {
758 
759 	racct_zone = uma_zcreate("racct", sizeof(struct racct),
760 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
761 	/*
762 	 * XXX: Move this somewhere.
763 	 */
764 	prison0.pr_prison_racct = prison_racct_find("0");
765 }
766 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
767 
768 #else /* !RACCT */
769 
770 int
771 racct_add(struct proc *p, int resource, uint64_t amount)
772 {
773 
774 	return (0);
775 }
776 
777 void
778 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
779 {
780 }
781 
782 void
783 racct_add_force(struct proc *p, int resource, uint64_t amount)
784 {
785 
786 	return;
787 }
788 
789 int
790 racct_set(struct proc *p, int resource, uint64_t amount)
791 {
792 
793 	return (0);
794 }
795 
796 void
797 racct_set_force(struct proc *p, int resource, uint64_t amount)
798 {
799 }
800 
801 void
802 racct_sub(struct proc *p, int resource, uint64_t amount)
803 {
804 }
805 
806 void
807 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
808 {
809 }
810 
811 uint64_t
812 racct_get_limit(struct proc *p, int resource)
813 {
814 
815 	return (UINT64_MAX);
816 }
817 
818 uint64_t
819 racct_get_available(struct proc *p, int resource)
820 {
821 
822 	return (UINT64_MAX);
823 }
824 
825 void
826 racct_create(struct racct **racctp)
827 {
828 }
829 
830 void
831 racct_destroy(struct racct **racctp)
832 {
833 }
834 
835 int
836 racct_proc_fork(struct proc *parent, struct proc *child)
837 {
838 
839 	return (0);
840 }
841 
842 void
843 racct_proc_fork_done(struct proc *child)
844 {
845 }
846 
847 void
848 racct_proc_exit(struct proc *p)
849 {
850 }
851 
852 #endif /* !RACCT */
853