xref: /freebsd/sys/kern/kern_racct.c (revision 641a6cfb86023499caafe26a4d821a0b885cf00b)
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_kdtrace.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/eventhandler.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/kthread.h>
43 #include <sys/lock.h>
44 #include <sys/loginclass.h>
45 #include <sys/malloc.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/racct.h>
49 #include <sys/resourcevar.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/sdt.h>
53 #include <sys/sx.h>
54 #include <sys/sysent.h>
55 #include <sys/sysproto.h>
56 #include <sys/umtx.h>
57 
58 #ifdef RCTL
59 #include <sys/rctl.h>
60 #endif
61 
62 #ifdef RACCT
63 
64 FEATURE(racct, "Resource Accounting");
65 
66 static struct mtx racct_lock;
67 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
68 
69 static uma_zone_t racct_zone;
70 
71 static void racct_sub_racct(struct racct *dest, const struct racct *src);
72 static void racct_sub_cred_locked(struct ucred *cred, int resource,
73 		uint64_t amount);
74 static void racct_add_cred_locked(struct ucred *cred, int resource,
75 		uint64_t amount);
76 
77 SDT_PROVIDER_DEFINE(racct);
78 SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
79     "uint64_t");
80 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
81     "struct proc *", "int", "uint64_t");
82 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
83     "int", "uint64_t");
84 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
85     "int", "uint64_t");
86 SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
87     "uint64_t");
88 SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
89     "struct proc *", "int", "uint64_t");
90 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
91     "uint64_t");
92 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
93     "int", "uint64_t");
94 SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
95 SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
96 SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
97     "struct racct *");
98 SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
99     "struct racct *", "struct racct *");
100 SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
101     "struct racct *");
102 
103 int racct_types[] = {
104 	[RACCT_CPU] =
105 		RACCT_IN_MILLIONS,
106 	[RACCT_DATA] =
107 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
108 	[RACCT_STACK] =
109 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
110 	[RACCT_CORE] =
111 		RACCT_DENIABLE,
112 	[RACCT_RSS] =
113 		RACCT_RECLAIMABLE,
114 	[RACCT_MEMLOCK] =
115 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
116 	[RACCT_NPROC] =
117 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
118 	[RACCT_NOFILE] =
119 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
120 	[RACCT_VMEM] =
121 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
122 	[RACCT_NPTS] =
123 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
124 	[RACCT_SWAP] =
125 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
126 	[RACCT_NTHR] =
127 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
128 	[RACCT_MSGQQUEUED] =
129 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
130 	[RACCT_MSGQSIZE] =
131 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
132 	[RACCT_NMSGQ] =
133 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
134 	[RACCT_NSEM] =
135 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
136 	[RACCT_NSEMOP] =
137 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
138 	[RACCT_NSHM] =
139 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
140 	[RACCT_SHMSIZE] =
141 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
142 	[RACCT_WALLCLOCK] =
143 		RACCT_IN_MILLIONS };
144 
145 static void
146 racct_add_racct(struct racct *dest, const struct racct *src)
147 {
148 	int i;
149 
150 	mtx_assert(&racct_lock, MA_OWNED);
151 
152 	/*
153 	 * Update resource usage in dest.
154 	 */
155 	for (i = 0; i <= RACCT_MAX; i++) {
156 		KASSERT(dest->r_resources[i] >= 0,
157 		    ("racct propagation meltdown: dest < 0"));
158 		KASSERT(src->r_resources[i] >= 0,
159 		    ("racct propagation meltdown: src < 0"));
160 		dest->r_resources[i] += src->r_resources[i];
161 	}
162 }
163 
164 static void
165 racct_sub_racct(struct racct *dest, const struct racct *src)
166 {
167 	int i;
168 
169 	mtx_assert(&racct_lock, MA_OWNED);
170 
171 	/*
172 	 * Update resource usage in dest.
173 	 */
174 	for (i = 0; i <= RACCT_MAX; i++) {
175 		if (!RACCT_IS_SLOPPY(i)) {
176 			KASSERT(dest->r_resources[i] >= 0,
177 			    ("racct propagation meltdown: dest < 0"));
178 			KASSERT(src->r_resources[i] >= 0,
179 			    ("racct propagation meltdown: src < 0"));
180 			KASSERT(src->r_resources[i] <= dest->r_resources[i],
181 			    ("racct propagation meltdown: src > dest"));
182 		}
183 		if (RACCT_IS_RECLAIMABLE(i)) {
184 			dest->r_resources[i] -= src->r_resources[i];
185 			if (dest->r_resources[i] < 0) {
186 				KASSERT(RACCT_IS_SLOPPY(i),
187 				    ("racct_sub_racct: usage < 0"));
188 				dest->r_resources[i] = 0;
189 			}
190 		}
191 	}
192 }
193 
194 void
195 racct_create(struct racct **racctp)
196 {
197 
198 	SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
199 
200 	KASSERT(*racctp == NULL, ("racct already allocated"));
201 
202 	*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
203 }
204 
205 static void
206 racct_destroy_locked(struct racct **racctp)
207 {
208 	int i;
209 	struct racct *racct;
210 
211 	SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
212 
213 	mtx_assert(&racct_lock, MA_OWNED);
214 	KASSERT(racctp != NULL, ("NULL racctp"));
215 	KASSERT(*racctp != NULL, ("NULL racct"));
216 
217 	racct = *racctp;
218 
219 	for (i = 0; i <= RACCT_MAX; i++) {
220 		if (RACCT_IS_SLOPPY(i))
221 			continue;
222 		if (!RACCT_IS_RECLAIMABLE(i))
223 			continue;
224 		KASSERT(racct->r_resources[i] == 0,
225 		    ("destroying non-empty racct: "
226 		    "%ju allocated for resource %d\n",
227 		    racct->r_resources[i], i));
228 	}
229 	uma_zfree(racct_zone, racct);
230 	*racctp = NULL;
231 }
232 
233 void
234 racct_destroy(struct racct **racct)
235 {
236 
237 	mtx_lock(&racct_lock);
238 	racct_destroy_locked(racct);
239 	mtx_unlock(&racct_lock);
240 }
241 
242 /*
243  * Increase consumption of 'resource' by 'amount' for 'racct'
244  * and all its parents.  Differently from other cases, 'amount' here
245  * may be less than zero.
246  */
247 static void
248 racct_alloc_resource(struct racct *racct, int resource,
249     uint64_t amount)
250 {
251 
252 	mtx_assert(&racct_lock, MA_OWNED);
253 	KASSERT(racct != NULL, ("NULL racct"));
254 
255 	racct->r_resources[resource] += amount;
256 	if (racct->r_resources[resource] < 0) {
257 		KASSERT(RACCT_IS_SLOPPY(resource),
258 		    ("racct_alloc_resource: usage < 0"));
259 		racct->r_resources[resource] = 0;
260 	}
261 }
262 
263 static int
264 racct_add_locked(struct proc *p, int resource, uint64_t amount)
265 {
266 #ifdef RCTL
267 	int error;
268 #endif
269 
270 	SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
271 
272 	/*
273 	 * We need proc lock to dereference p->p_ucred.
274 	 */
275 	PROC_LOCK_ASSERT(p, MA_OWNED);
276 
277 #ifdef RCTL
278 	error = rctl_enforce(p, resource, amount);
279 	if (error && RACCT_IS_DENIABLE(resource)) {
280 		SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
281 		    amount, 0, 0);
282 		return (error);
283 	}
284 #endif
285 	racct_alloc_resource(p->p_racct, resource, amount);
286 	racct_add_cred_locked(p->p_ucred, resource, amount);
287 
288 	return (0);
289 }
290 
291 /*
292  * Increase allocation of 'resource' by 'amount' for process 'p'.
293  * Return 0 if it's below limits, or errno, if it's not.
294  */
295 int
296 racct_add(struct proc *p, int resource, uint64_t amount)
297 {
298 	int error;
299 
300 	mtx_lock(&racct_lock);
301 	error = racct_add_locked(p, resource, amount);
302 	mtx_unlock(&racct_lock);
303 	return (error);
304 }
305 
306 static void
307 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
308 {
309 	struct prison *pr;
310 
311 	SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
312 	    0, 0);
313 
314 	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
315 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
316 		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
317 		    amount);
318 	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
319 }
320 
321 /*
322  * Increase allocation of 'resource' by 'amount' for credential 'cred'.
323  * Doesn't check for limits and never fails.
324  *
325  * XXX: Shouldn't this ever return an error?
326  */
327 void
328 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
329 {
330 
331 	mtx_lock(&racct_lock);
332 	racct_add_cred_locked(cred, resource, amount);
333 	mtx_unlock(&racct_lock);
334 }
335 
336 /*
337  * Increase allocation of 'resource' by 'amount' for process 'p'.
338  * Doesn't check for limits and never fails.
339  */
340 void
341 racct_add_force(struct proc *p, int resource, uint64_t amount)
342 {
343 
344 	SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
345 
346 	/*
347 	 * We need proc lock to dereference p->p_ucred.
348 	 */
349 	PROC_LOCK_ASSERT(p, MA_OWNED);
350 
351 	mtx_lock(&racct_lock);
352 	racct_alloc_resource(p->p_racct, resource, amount);
353 	mtx_unlock(&racct_lock);
354 	racct_add_cred(p->p_ucred, resource, amount);
355 }
356 
357 static int
358 racct_set_locked(struct proc *p, int resource, uint64_t amount)
359 {
360 	int64_t diff;
361 #ifdef RCTL
362 	int error;
363 #endif
364 
365 	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
366 
367 	/*
368 	 * We need proc lock to dereference p->p_ucred.
369 	 */
370 	PROC_LOCK_ASSERT(p, MA_OWNED);
371 
372 	diff = amount - p->p_racct->r_resources[resource];
373 #ifdef notyet
374 	KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource),
375 	    ("racct_set: usage of non-reclaimable resource %d dropping",
376 	     resource));
377 #endif
378 #ifdef RCTL
379 	if (diff > 0) {
380 		error = rctl_enforce(p, resource, diff);
381 		if (error && RACCT_IS_DENIABLE(resource)) {
382 			SDT_PROBE(racct, kernel, rusage, set_failure, p,
383 			    resource, amount, 0, 0);
384 			return (error);
385 		}
386 	}
387 #endif
388 	racct_alloc_resource(p->p_racct, resource, diff);
389 	if (diff > 0)
390 		racct_add_cred_locked(p->p_ucred, resource, diff);
391 	else if (diff < 0)
392 		racct_sub_cred_locked(p->p_ucred, resource, -diff);
393 
394 	return (0);
395 }
396 
397 /*
398  * Set allocation of 'resource' to 'amount' for process 'p'.
399  * Return 0 if it's below limits, or errno, if it's not.
400  *
401  * Note that decreasing the allocation always returns 0,
402  * even if it's above the limit.
403  */
404 int
405 racct_set(struct proc *p, int resource, uint64_t amount)
406 {
407 	int error;
408 
409 	mtx_lock(&racct_lock);
410 	error = racct_set_locked(p, resource, amount);
411 	mtx_unlock(&racct_lock);
412 	return (error);
413 }
414 
415 void
416 racct_set_force(struct proc *p, int resource, uint64_t amount)
417 {
418 	int64_t diff;
419 
420 	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
421 
422 	/*
423 	 * We need proc lock to dereference p->p_ucred.
424 	 */
425 	PROC_LOCK_ASSERT(p, MA_OWNED);
426 
427 	mtx_lock(&racct_lock);
428 	diff = amount - p->p_racct->r_resources[resource];
429 	racct_alloc_resource(p->p_racct, resource, diff);
430 	if (diff > 0)
431 		racct_add_cred_locked(p->p_ucred, resource, diff);
432 	else if (diff < 0)
433 		racct_sub_cred_locked(p->p_ucred, resource, -diff);
434 	mtx_unlock(&racct_lock);
435 }
436 
437 /*
438  * Returns amount of 'resource' the process 'p' can keep allocated.
439  * Allocating more than that would be denied, unless the resource
440  * is marked undeniable.  Amount of already allocated resource does
441  * not matter.
442  */
443 uint64_t
444 racct_get_limit(struct proc *p, int resource)
445 {
446 
447 #ifdef RCTL
448 	return (rctl_get_limit(p, resource));
449 #else
450 	return (UINT64_MAX);
451 #endif
452 }
453 
454 /*
455  * Returns amount of 'resource' the process 'p' can keep allocated.
456  * Allocating more than that would be denied, unless the resource
457  * is marked undeniable.  Amount of already allocated resource does
458  * matter.
459  */
460 uint64_t
461 racct_get_available(struct proc *p, int resource)
462 {
463 
464 #ifdef RCTL
465 	return (rctl_get_available(p, resource));
466 #else
467 	return (UINT64_MAX);
468 #endif
469 }
470 
471 /*
472  * Decrease allocation of 'resource' by 'amount' for process 'p'.
473  */
474 void
475 racct_sub(struct proc *p, int resource, uint64_t amount)
476 {
477 
478 	SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
479 
480 	/*
481 	 * We need proc lock to dereference p->p_ucred.
482 	 */
483 	PROC_LOCK_ASSERT(p, MA_OWNED);
484 	KASSERT(RACCT_IS_RECLAIMABLE(resource),
485 	    ("racct_sub: called for non-reclaimable resource %d", resource));
486 
487 	mtx_lock(&racct_lock);
488 	KASSERT(amount <= p->p_racct->r_resources[resource],
489 	    ("racct_sub: freeing %ju of resource %d, which is more "
490 	     "than allocated %jd for %s (pid %d)", amount, resource,
491 	    (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
492 
493 	racct_alloc_resource(p->p_racct, resource, -amount);
494 	racct_sub_cred_locked(p->p_ucred, resource, amount);
495 	mtx_unlock(&racct_lock);
496 }
497 
498 static void
499 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
500 {
501 	struct prison *pr;
502 
503 	SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
504 	    0, 0);
505 
506 #ifdef notyet
507 	KASSERT(RACCT_IS_RECLAIMABLE(resource),
508 	    ("racct_sub_cred: called for non-reclaimable resource %d",
509 	     resource));
510 #endif
511 
512 	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
513 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
514 		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
515 		    -amount);
516 	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
517 }
518 
519 /*
520  * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
521  */
522 void
523 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
524 {
525 
526 	mtx_lock(&racct_lock);
527 	racct_sub_cred_locked(cred, resource, amount);
528 	mtx_unlock(&racct_lock);
529 }
530 
531 /*
532  * Inherit resource usage information from the parent process.
533  */
534 int
535 racct_proc_fork(struct proc *parent, struct proc *child)
536 {
537 	int i, error = 0;
538 
539 	/*
540 	 * Create racct for the child process.
541 	 */
542 	racct_create(&child->p_racct);
543 
544 	PROC_LOCK(parent);
545 	PROC_LOCK(child);
546 	mtx_lock(&racct_lock);
547 
548 #ifdef RCTL
549 	error = rctl_proc_fork(parent, child);
550 	if (error != 0)
551 		goto out;
552 #endif
553 
554 	/*
555 	 * Inherit resource usage.
556 	 */
557 	for (i = 0; i <= RACCT_MAX; i++) {
558 		if (parent->p_racct->r_resources[i] == 0 ||
559 		    !RACCT_IS_INHERITABLE(i))
560 			continue;
561 
562 		error = racct_set_locked(child, i,
563 		    parent->p_racct->r_resources[i]);
564 		if (error != 0)
565 			goto out;
566 	}
567 
568 	error = racct_add_locked(child, RACCT_NPROC, 1);
569 	error += racct_add_locked(child, RACCT_NTHR, 1);
570 
571 out:
572 	mtx_unlock(&racct_lock);
573 	PROC_UNLOCK(child);
574 	PROC_UNLOCK(parent);
575 
576 	if (error != 0)
577 		racct_proc_exit(child);
578 
579 	return (error);
580 }
581 
582 /*
583  * Called at the end of fork1(), to handle rules that require the process
584  * to be fully initialized.
585  */
586 void
587 racct_proc_fork_done(struct proc *child)
588 {
589 
590 #ifdef RCTL
591 	PROC_LOCK(child);
592 	mtx_lock(&racct_lock);
593 	rctl_enforce(child, RACCT_NPROC, 0);
594 	rctl_enforce(child, RACCT_NTHR, 0);
595 	mtx_unlock(&racct_lock);
596 	PROC_UNLOCK(child);
597 #endif
598 }
599 
600 void
601 racct_proc_exit(struct proc *p)
602 {
603 	int i;
604 	uint64_t runtime;
605 
606 	PROC_LOCK(p);
607 	/*
608 	 * We don't need to calculate rux, proc_reap() has already done this.
609 	 */
610 	runtime = cputick2usec(p->p_rux.rux_runtime);
611 #ifdef notyet
612 	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
613 #else
614 	if (runtime < p->p_prev_runtime)
615 		runtime = p->p_prev_runtime;
616 #endif
617 	mtx_lock(&racct_lock);
618 	racct_set_locked(p, RACCT_CPU, runtime);
619 
620 	for (i = 0; i <= RACCT_MAX; i++) {
621 		if (p->p_racct->r_resources[i] == 0)
622 			continue;
623 	    	if (!RACCT_IS_RECLAIMABLE(i))
624 			continue;
625 		racct_set_locked(p, i, 0);
626 	}
627 
628 	mtx_unlock(&racct_lock);
629 	PROC_UNLOCK(p);
630 
631 #ifdef RCTL
632 	rctl_racct_release(p->p_racct);
633 #endif
634 	racct_destroy(&p->p_racct);
635 }
636 
637 /*
638  * Called after credentials change, to move resource utilisation
639  * between raccts.
640  */
641 void
642 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
643     struct ucred *newcred)
644 {
645 	struct uidinfo *olduip, *newuip;
646 	struct loginclass *oldlc, *newlc;
647 	struct prison *oldpr, *newpr, *pr;
648 
649 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
650 
651 	newuip = newcred->cr_ruidinfo;
652 	olduip = oldcred->cr_ruidinfo;
653 	newlc = newcred->cr_loginclass;
654 	oldlc = oldcred->cr_loginclass;
655 	newpr = newcred->cr_prison;
656 	oldpr = oldcred->cr_prison;
657 
658 	mtx_lock(&racct_lock);
659 	if (newuip != olduip) {
660 		racct_sub_racct(olduip->ui_racct, p->p_racct);
661 		racct_add_racct(newuip->ui_racct, p->p_racct);
662 	}
663 	if (newlc != oldlc) {
664 		racct_sub_racct(oldlc->lc_racct, p->p_racct);
665 		racct_add_racct(newlc->lc_racct, p->p_racct);
666 	}
667 	if (newpr != oldpr) {
668 		for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
669 			racct_sub_racct(pr->pr_prison_racct->prr_racct,
670 			    p->p_racct);
671 		for (pr = newpr; pr != NULL; pr = pr->pr_parent)
672 			racct_add_racct(pr->pr_prison_racct->prr_racct,
673 			    p->p_racct);
674 	}
675 	mtx_unlock(&racct_lock);
676 
677 #ifdef RCTL
678 	rctl_proc_ucred_changed(p, newcred);
679 #endif
680 }
681 
682 void
683 racct_move(struct racct *dest, struct racct *src)
684 {
685 
686 	mtx_lock(&racct_lock);
687 
688 	racct_add_racct(dest, src);
689 	racct_sub_racct(src, src);
690 
691 	mtx_unlock(&racct_lock);
692 }
693 
694 static void
695 racctd(void)
696 {
697 	struct thread *td;
698 	struct proc *p;
699 	struct timeval wallclock;
700 	uint64_t runtime;
701 
702 	for (;;) {
703 		sx_slock(&allproc_lock);
704 
705 		FOREACH_PROC_IN_SYSTEM(p) {
706 			if (p->p_state != PRS_NORMAL)
707 				continue;
708 
709 			microuptime(&wallclock);
710 			timevalsub(&wallclock, &p->p_stats->p_start);
711 			PROC_LOCK(p);
712 			PROC_SLOCK(p);
713 			FOREACH_THREAD_IN_PROC(p, td)
714 				ruxagg(p, td);
715 			runtime = cputick2usec(p->p_rux.rux_runtime);
716 			PROC_SUNLOCK(p);
717 #ifdef notyet
718 			KASSERT(runtime >= p->p_prev_runtime,
719 			    ("runtime < p_prev_runtime"));
720 #else
721 			if (runtime < p->p_prev_runtime)
722 				runtime = p->p_prev_runtime;
723 #endif
724 			p->p_prev_runtime = runtime;
725 			mtx_lock(&racct_lock);
726 			racct_set_locked(p, RACCT_CPU, runtime);
727 			racct_set_locked(p, RACCT_WALLCLOCK,
728 			    (uint64_t)wallclock.tv_sec * 1000000 +
729 			    wallclock.tv_usec);
730 			mtx_unlock(&racct_lock);
731 			PROC_UNLOCK(p);
732 		}
733 		sx_sunlock(&allproc_lock);
734 		pause("-", hz);
735 	}
736 }
737 
738 static struct kproc_desc racctd_kp = {
739 	"racctd",
740 	racctd,
741 	NULL
742 };
743 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
744 
745 static void
746 racct_init(void)
747 {
748 
749 	racct_zone = uma_zcreate("racct", sizeof(struct racct),
750 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
751 	/*
752 	 * XXX: Move this somewhere.
753 	 */
754 	prison0.pr_prison_racct = prison_racct_find("0");
755 }
756 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
757 
758 #else /* !RACCT */
759 
760 int
761 racct_add(struct proc *p, int resource, uint64_t amount)
762 {
763 
764 	return (0);
765 }
766 
767 void
768 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
769 {
770 }
771 
772 void
773 racct_add_force(struct proc *p, int resource, uint64_t amount)
774 {
775 
776 	return;
777 }
778 
779 int
780 racct_set(struct proc *p, int resource, uint64_t amount)
781 {
782 
783 	return (0);
784 }
785 
786 void
787 racct_set_force(struct proc *p, int resource, uint64_t amount)
788 {
789 }
790 
791 void
792 racct_sub(struct proc *p, int resource, uint64_t amount)
793 {
794 }
795 
796 void
797 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
798 {
799 }
800 
801 uint64_t
802 racct_get_limit(struct proc *p, int resource)
803 {
804 
805 	return (UINT64_MAX);
806 }
807 
808 uint64_t
809 racct_get_available(struct proc *p, int resource)
810 {
811 
812 	return (UINT64_MAX);
813 }
814 
815 void
816 racct_create(struct racct **racctp)
817 {
818 }
819 
820 void
821 racct_destroy(struct racct **racctp)
822 {
823 }
824 
825 int
826 racct_proc_fork(struct proc *parent, struct proc *child)
827 {
828 
829 	return (0);
830 }
831 
832 void
833 racct_proc_fork_done(struct proc *child)
834 {
835 }
836 
837 void
838 racct_proc_exit(struct proc *p)
839 {
840 }
841 
842 #endif /* !RACCT */
843