xref: /freebsd/sys/kern/kern_racct.c (revision 87a5818245a2c83f385075aef0e9d01e0ae840fa)
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_kdtrace.h"
36 
37 #include <sys/param.h>
38 #include <sys/eventhandler.h>
39 #include <sys/param.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/kthread.h>
43 #include <sys/lock.h>
44 #include <sys/loginclass.h>
45 #include <sys/malloc.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/racct.h>
49 #include <sys/resourcevar.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/sdt.h>
53 #include <sys/sx.h>
54 #include <sys/sysent.h>
55 #include <sys/sysproto.h>
56 #include <sys/systm.h>
57 #include <sys/umtx.h>
58 
59 #ifdef RCTL
60 #include <sys/rctl.h>
61 #endif
62 
63 #ifdef RACCT
64 
65 FEATURE(racct, "Resource Accounting");
66 
67 static struct mtx racct_lock;
68 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
69 
70 static uma_zone_t racct_zone;
71 
72 static void racct_sub_racct(struct racct *dest, const struct racct *src);
73 static void racct_sub_cred_locked(struct ucred *cred, int resource,
74 		uint64_t amount);
75 static void racct_add_cred_locked(struct ucred *cred, int resource,
76 		uint64_t amount);
77 
78 SDT_PROVIDER_DEFINE(racct);
79 SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
80     "uint64_t");
81 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
82     "struct proc *", "int", "uint64_t");
83 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
84     "int", "uint64_t");
85 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
86     "int", "uint64_t");
87 SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
88     "uint64_t");
89 SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
90     "struct proc *", "int", "uint64_t");
91 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
92     "uint64_t");
93 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
94     "int", "uint64_t");
95 SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
96 SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
97 SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
98     "struct racct *");
99 SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
100     "struct racct *", "struct racct *");
101 SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
102     "struct racct *");
103 
104 int racct_types[] = {
105 	[RACCT_CPU] =
106 		RACCT_IN_THOUSANDS,
107 	[RACCT_FSIZE] =
108 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
109 	[RACCT_DATA] =
110 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
111 	[RACCT_STACK] =
112 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
113 	[RACCT_CORE] =
114 		RACCT_DENIABLE,
115 	[RACCT_RSS] =
116 		RACCT_RECLAIMABLE,
117 	[RACCT_MEMLOCK] =
118 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
119 	[RACCT_NPROC] =
120 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
121 	[RACCT_NOFILE] =
122 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
123 	[RACCT_SBSIZE] =
124 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
125 	[RACCT_VMEM] =
126 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
127 	[RACCT_NPTS] =
128 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
129 	[RACCT_SWAP] =
130 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
131 	[RACCT_NTHR] =
132 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
133 	[RACCT_MSGQQUEUED] =
134 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
135 	[RACCT_MSGQSIZE] =
136 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
137 	[RACCT_NMSGQ] =
138 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
139 	[RACCT_NSEM] =
140 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
141 	[RACCT_NSEMOP] =
142 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
143 	[RACCT_NSHM] =
144 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
145 	[RACCT_SHMSIZE] =
146 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
147 	[RACCT_WALLCLOCK] =
148 		RACCT_IN_THOUSANDS };
149 
150 static void
151 racct_add_racct(struct racct *dest, const struct racct *src)
152 {
153 	int i;
154 
155 	mtx_assert(&racct_lock, MA_OWNED);
156 
157 	/*
158 	 * Update resource usage in dest.
159 	 */
160 	for (i = 0; i <= RACCT_MAX; i++) {
161 		KASSERT(dest->r_resources[i] >= 0,
162 		    ("racct propagation meltdown: dest < 0"));
163 		KASSERT(src->r_resources[i] >= 0,
164 		    ("racct propagation meltdown: src < 0"));
165 		dest->r_resources[i] += src->r_resources[i];
166 	}
167 }
168 
169 static void
170 racct_sub_racct(struct racct *dest, const struct racct *src)
171 {
172 	int i;
173 
174 	mtx_assert(&racct_lock, MA_OWNED);
175 
176 	/*
177 	 * Update resource usage in dest.
178 	 */
179 	for (i = 0; i <= RACCT_MAX; i++) {
180 		if (!racct_is_sloppy(i)) {
181 			KASSERT(dest->r_resources[i] >= 0,
182 			    ("racct propagation meltdown: dest < 0"));
183 			KASSERT(src->r_resources[i] >= 0,
184 			    ("racct propagation meltdown: src < 0"));
185 			KASSERT(src->r_resources[i] <= dest->r_resources[i],
186 			    ("racct propagation meltdown: src > dest"));
187 		}
188 		if (racct_is_reclaimable(i)) {
189 			dest->r_resources[i] -= src->r_resources[i];
190 			if (dest->r_resources[i] < 0) {
191 				KASSERT(racct_is_sloppy(i),
192 				    ("racct_sub_racct: usage < 0"));
193 				dest->r_resources[i] = 0;
194 			}
195 		}
196 	}
197 }
198 
199 void
200 racct_create(struct racct **racctp)
201 {
202 
203 	SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
204 
205 	KASSERT(*racctp == NULL, ("racct already allocated"));
206 
207 	*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
208 }
209 
210 static void
211 racct_destroy_locked(struct racct **racctp)
212 {
213 	int i;
214 	struct racct *racct;
215 
216 	SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
217 
218 	mtx_assert(&racct_lock, MA_OWNED);
219 	KASSERT(racctp != NULL, ("NULL racctp"));
220 	KASSERT(*racctp != NULL, ("NULL racct"));
221 
222 	racct = *racctp;
223 
224 	for (i = 0; i <= RACCT_MAX; i++) {
225 		if (racct_is_sloppy(i))
226 			continue;
227 		if (!racct_is_reclaimable(i))
228 			continue;
229 		KASSERT(racct->r_resources[i] == 0,
230 		    ("destroying non-empty racct: "
231 		    "%ju allocated for resource %d\n",
232 		    racct->r_resources[i], i));
233 	}
234 	uma_zfree(racct_zone, racct);
235 	*racctp = NULL;
236 }
237 
238 void
239 racct_destroy(struct racct **racct)
240 {
241 
242 	mtx_lock(&racct_lock);
243 	racct_destroy_locked(racct);
244 	mtx_unlock(&racct_lock);
245 }
246 
247 /*
248  * Increase consumption of 'resource' by 'amount' for 'racct'
249  * and all its parents.  Differently from other cases, 'amount' here
250  * may be less than zero.
251  */
252 static void
253 racct_alloc_resource(struct racct *racct, int resource,
254     uint64_t amount)
255 {
256 
257 	mtx_assert(&racct_lock, MA_OWNED);
258 	KASSERT(racct != NULL, ("NULL racct"));
259 
260 	racct->r_resources[resource] += amount;
261 	if (racct->r_resources[resource] < 0) {
262 		KASSERT(racct_is_sloppy(resource),
263 		    ("racct_alloc_resource: usage < 0"));
264 		racct->r_resources[resource] = 0;
265 	}
266 }
267 
268 /*
269  * Increase allocation of 'resource' by 'amount' for process 'p'.
270  * Return 0 if it's below limits, or errno, if it's not.
271  */
272 int
273 racct_add(struct proc *p, int resource, uint64_t amount)
274 {
275 #ifdef RCTL
276 	int error;
277 #endif
278 
279 	if (p->p_flag & P_SYSTEM)
280 		return (0);
281 
282 	SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
283 
284 	/*
285 	 * We need proc lock to dereference p->p_ucred.
286 	 */
287 	PROC_LOCK_ASSERT(p, MA_OWNED);
288 
289 	mtx_lock(&racct_lock);
290 #ifdef RCTL
291 	error = rctl_enforce(p, resource, amount);
292 	if (error && racct_is_deniable(resource)) {
293 		SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
294 		    amount, 0, 0);
295 		mtx_unlock(&racct_lock);
296 		return (error);
297 	}
298 #endif
299 	racct_alloc_resource(p->p_racct, resource, amount);
300 	racct_add_cred_locked(p->p_ucred, resource, amount);
301 	mtx_unlock(&racct_lock);
302 
303 	return (0);
304 }
305 
306 static void
307 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
308 {
309 	struct prison *pr;
310 
311 	SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
312 	    0, 0);
313 
314 	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
315 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
316 		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
317 		    amount);
318 	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
319 }
320 
321 /*
322  * Increase allocation of 'resource' by 'amount' for credential 'cred'.
323  * Doesn't check for limits and never fails.
324  *
325  * XXX: Shouldn't this ever return an error?
326  */
327 void
328 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
329 {
330 
331 	mtx_lock(&racct_lock);
332 	racct_add_cred_locked(cred, resource, amount);
333 	mtx_unlock(&racct_lock);
334 }
335 
336 /*
337  * Increase allocation of 'resource' by 'amount' for process 'p'.
338  * Doesn't check for limits and never fails.
339  */
340 void
341 racct_add_force(struct proc *p, int resource, uint64_t amount)
342 {
343 
344 	if (p->p_flag & P_SYSTEM)
345 		return;
346 
347 	SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
348 
349 	/*
350 	 * We need proc lock to dereference p->p_ucred.
351 	 */
352 	PROC_LOCK_ASSERT(p, MA_OWNED);
353 
354 	mtx_lock(&racct_lock);
355 	racct_alloc_resource(p->p_racct, resource, amount);
356 	mtx_unlock(&racct_lock);
357 	racct_add_cred(p->p_ucred, resource, amount);
358 }
359 
360 static int
361 racct_set_locked(struct proc *p, int resource, uint64_t amount)
362 {
363 	int64_t diff;
364 #ifdef RCTL
365 	int error;
366 #endif
367 
368 	if (p->p_flag & P_SYSTEM)
369 		return (0);
370 
371 	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
372 
373 	/*
374 	 * We need proc lock to dereference p->p_ucred.
375 	 */
376 	PROC_LOCK_ASSERT(p, MA_OWNED);
377 
378 	diff = amount - p->p_racct->r_resources[resource];
379 #ifdef notyet
380 	KASSERT(diff >= 0 || racct_is_reclaimable(resource),
381 	    ("racct_set: usage of non-reclaimable resource %d dropping",
382 	     resource));
383 #endif
384 #ifdef RCTL
385 	if (diff > 0) {
386 		error = rctl_enforce(p, resource, diff);
387 		if (error && racct_is_deniable(resource)) {
388 			SDT_PROBE(racct, kernel, rusage, set_failure, p,
389 			    resource, amount, 0, 0);
390 			return (error);
391 		}
392 	}
393 #endif
394 	racct_alloc_resource(p->p_racct, resource, diff);
395 	if (diff > 0)
396 		racct_add_cred_locked(p->p_ucred, resource, diff);
397 	else if (diff < 0)
398 		racct_sub_cred_locked(p->p_ucred, resource, -diff);
399 
400 	return (0);
401 }
402 
403 /*
404  * Set allocation of 'resource' to 'amount' for process 'p'.
405  * Return 0 if it's below limits, or errno, if it's not.
406  *
407  * Note that decreasing the allocation always returns 0,
408  * even if it's above the limit.
409  */
410 int
411 racct_set(struct proc *p, int resource, uint64_t amount)
412 {
413 	int error;
414 
415 	mtx_lock(&racct_lock);
416 	error = racct_set_locked(p, resource, amount);
417 	mtx_unlock(&racct_lock);
418 	return (error);
419 }
420 
421 void
422 racct_set_force(struct proc *p, int resource, uint64_t amount)
423 {
424 	int64_t diff;
425 
426 	if (p->p_flag & P_SYSTEM)
427 		return;
428 
429 	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
430 
431 	/*
432 	 * We need proc lock to dereference p->p_ucred.
433 	 */
434 	PROC_LOCK_ASSERT(p, MA_OWNED);
435 
436 	mtx_lock(&racct_lock);
437 	diff = amount - p->p_racct->r_resources[resource];
438 	racct_alloc_resource(p->p_racct, resource, diff);
439 	if (diff > 0)
440 		racct_add_cred_locked(p->p_ucred, resource, diff);
441 	else if (diff < 0)
442 		racct_sub_cred_locked(p->p_ucred, resource, -diff);
443 	mtx_unlock(&racct_lock);
444 }
445 
446 /*
447  * Returns amount of 'resource' the process 'p' can keep allocated.
448  * Allocating more than that would be denied, unless the resource
449  * is marked undeniable.  Amount of already allocated resource does
450  * not matter.
451  */
452 uint64_t
453 racct_get_limit(struct proc *p, int resource)
454 {
455 
456 #ifdef RCTL
457 	return (rctl_get_limit(p, resource));
458 #else
459 	return (UINT64_MAX);
460 #endif
461 }
462 
463 /*
464  * Returns amount of 'resource' the process 'p' can keep allocated.
465  * Allocating more than that would be denied, unless the resource
466  * is marked undeniable.  Amount of already allocated resource does
467  * matter.
468  */
469 uint64_t
470 racct_get_available(struct proc *p, int resource)
471 {
472 
473 #ifdef RCTL
474 	return (rctl_get_available(p, resource));
475 #else
476 	return (UINT64_MAX);
477 #endif
478 }
479 
480 /*
481  * Decrease allocation of 'resource' by 'amount' for process 'p'.
482  */
483 void
484 racct_sub(struct proc *p, int resource, uint64_t amount)
485 {
486 
487 	if (p->p_flag & P_SYSTEM)
488 		return;
489 
490 	SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
491 
492 	/*
493 	 * We need proc lock to dereference p->p_ucred.
494 	 */
495 	PROC_LOCK_ASSERT(p, MA_OWNED);
496 	KASSERT(racct_is_reclaimable(resource),
497 	    ("racct_sub: called for non-reclaimable resource %d", resource));
498 
499 	mtx_lock(&racct_lock);
500 	KASSERT(amount <= p->p_racct->r_resources[resource],
501 	    ("racct_sub: freeing %ju of resource %d, which is more "
502 	     "than allocated %jd for %s (pid %d)", amount, resource,
503 	    (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
504 
505 	racct_alloc_resource(p->p_racct, resource, -amount);
506 	racct_sub_cred_locked(p->p_ucred, resource, amount);
507 	mtx_unlock(&racct_lock);
508 }
509 
510 static void
511 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
512 {
513 	struct prison *pr;
514 
515 	SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
516 	    0, 0);
517 
518 #ifdef notyet
519 	KASSERT(racct_is_reclaimable(resource),
520 	    ("racct_sub_cred: called for non-reclaimable resource %d",
521 	     resource));
522 #endif
523 
524 	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
525 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
526 		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
527 		    -amount);
528 	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
529 }
530 
531 /*
532  * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
533  */
534 void
535 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
536 {
537 
538 	mtx_lock(&racct_lock);
539 	racct_sub_cred_locked(cred, resource, amount);
540 	mtx_unlock(&racct_lock);
541 }
542 
543 /*
544  * Inherit resource usage information from the parent process.
545  */
546 int
547 racct_proc_fork(struct proc *parent, struct proc *child)
548 {
549 	int i, error = 0;
550 
551 	/*
552 	 * Create racct for the child process.
553 	 */
554 	racct_create(&child->p_racct);
555 
556 	/*
557 	 * No resource accounting for kernel processes.
558 	 */
559 	if (child->p_flag & P_SYSTEM)
560 		return (0);
561 
562 	PROC_LOCK(parent);
563 	PROC_LOCK(child);
564 	mtx_lock(&racct_lock);
565 
566 	/*
567 	 * Inherit resource usage.
568 	 */
569 	for (i = 0; i <= RACCT_MAX; i++) {
570 		if (parent->p_racct->r_resources[i] == 0 ||
571 		    !racct_is_inheritable(i))
572 			continue;
573 
574 		error = racct_set_locked(child, i,
575 		    parent->p_racct->r_resources[i]);
576 		if (error != 0) {
577 			/*
578 			 * XXX: The only purpose of these two lines is
579 			 * to prevent from tripping checks in racct_destroy().
580 			 */
581 			for (i = 0; i <= RACCT_MAX; i++)
582 				racct_set_locked(child, i, 0);
583 			goto out;
584 		}
585 	}
586 
587 #ifdef RCTL
588 	error = rctl_proc_fork(parent, child);
589 	if (error != 0) {
590 		/*
591 		 * XXX: The only purpose of these two lines is to prevent from
592 		 * tripping checks in racct_destroy().
593 		 */
594 		for (i = 0; i <= RACCT_MAX; i++)
595 			racct_set_locked(child, i, 0);
596 	}
597 #endif
598 
599 out:
600 	if (error != 0)
601 		racct_destroy_locked(&child->p_racct);
602 	mtx_unlock(&racct_lock);
603 	PROC_UNLOCK(child);
604 	PROC_UNLOCK(parent);
605 
606 	return (error);
607 }
608 
609 void
610 racct_proc_exit(struct proc *p)
611 {
612 	uint64_t runtime;
613 
614 	PROC_LOCK(p);
615 	/*
616 	 * We don't need to calculate rux, proc_reap() has already done this.
617 	 */
618 	runtime = cputick2usec(p->p_rux.rux_runtime);
619 #ifdef notyet
620 	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
621 #else
622 	if (runtime < p->p_prev_runtime)
623 		runtime = p->p_prev_runtime;
624 #endif
625 	racct_set(p, RACCT_CPU, runtime);
626 
627 	/*
628 	 * XXX: Free this some other way.
629 	 */
630 	racct_set(p, RACCT_FSIZE, 0);
631 	racct_set(p, RACCT_NPTS, 0);
632 	racct_set(p, RACCT_NTHR, 0);
633 	racct_set(p, RACCT_RSS, 0);
634 	PROC_UNLOCK(p);
635 
636 #ifdef RCTL
637 	rctl_racct_release(p->p_racct);
638 #endif
639 	racct_destroy(&p->p_racct);
640 }
641 
642 /*
643  * Called after credentials change, to move resource utilisation
644  * between raccts.
645  */
646 void
647 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
648     struct ucred *newcred)
649 {
650 	struct uidinfo *olduip, *newuip;
651 	struct loginclass *oldlc, *newlc;
652 	struct prison *oldpr, *newpr, *pr;
653 
654 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
655 
656 	newuip = newcred->cr_ruidinfo;
657 	olduip = oldcred->cr_ruidinfo;
658 	newlc = newcred->cr_loginclass;
659 	oldlc = oldcred->cr_loginclass;
660 	newpr = newcred->cr_prison;
661 	oldpr = oldcred->cr_prison;
662 
663 	mtx_lock(&racct_lock);
664 	if (newuip != olduip) {
665 		racct_sub_racct(olduip->ui_racct, p->p_racct);
666 		racct_add_racct(newuip->ui_racct, p->p_racct);
667 	}
668 	if (newlc != oldlc) {
669 		racct_sub_racct(oldlc->lc_racct, p->p_racct);
670 		racct_add_racct(newlc->lc_racct, p->p_racct);
671 	}
672 	if (newpr != oldpr) {
673 		for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
674 			racct_sub_racct(pr->pr_prison_racct->prr_racct,
675 			    p->p_racct);
676 		for (pr = newpr; pr != NULL; pr = pr->pr_parent)
677 			racct_add_racct(pr->pr_prison_racct->prr_racct,
678 			    p->p_racct);
679 	}
680 	mtx_unlock(&racct_lock);
681 
682 #ifdef RCTL
683 	rctl_proc_ucred_changed(p, newcred);
684 #endif
685 }
686 
687 static void
688 racctd(void)
689 {
690 	struct thread *td;
691 	struct proc *p;
692 	struct timeval wallclock;
693 	uint64_t runtime;
694 
695 	for (;;) {
696 		sx_slock(&allproc_lock);
697 
698 		FOREACH_PROC_IN_SYSTEM(p) {
699 			if (p->p_state != PRS_NORMAL)
700 				continue;
701 			if (p->p_flag & P_SYSTEM)
702 				continue;
703 
704 			microuptime(&wallclock);
705 			timevalsub(&wallclock, &p->p_stats->p_start);
706 			PROC_LOCK(p);
707 			PROC_SLOCK(p);
708 			FOREACH_THREAD_IN_PROC(p, td) {
709 				ruxagg(p, td);
710 				thread_lock(td);
711 				thread_unlock(td);
712 			}
713 			runtime = cputick2usec(p->p_rux.rux_runtime);
714 			PROC_SUNLOCK(p);
715 #ifdef notyet
716 			KASSERT(runtime >= p->p_prev_runtime,
717 			    ("runtime < p_prev_runtime"));
718 #else
719 			if (runtime < p->p_prev_runtime)
720 				runtime = p->p_prev_runtime;
721 #endif
722 			p->p_prev_runtime = runtime;
723 			mtx_lock(&racct_lock);
724 			racct_set_locked(p, RACCT_CPU, runtime);
725 			racct_set_locked(p, RACCT_WALLCLOCK,
726 			    wallclock.tv_sec * 1000000 + wallclock.tv_usec);
727 			mtx_unlock(&racct_lock);
728 			PROC_UNLOCK(p);
729 		}
730 		sx_sunlock(&allproc_lock);
731 		pause("-", hz);
732 	}
733 }
734 
735 static struct kproc_desc racctd_kp = {
736 	"racctd",
737 	racctd,
738 	NULL
739 };
740 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
741 
742 static void
743 racct_init(void)
744 {
745 
746 	racct_zone = uma_zcreate("racct", sizeof(struct racct),
747 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
748 	/*
749 	 * XXX: Move this somewhere.
750 	 */
751 	prison0.pr_prison_racct = prison_racct_find("0");
752 }
753 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
754 
755 #else /* !RACCT */
756 
757 int
758 racct_add(struct proc *p, int resource, uint64_t amount)
759 {
760 
761 	return (0);
762 }
763 
764 void
765 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
766 {
767 }
768 
769 void
770 racct_add_force(struct proc *p, int resource, uint64_t amount)
771 {
772 
773 	return;
774 }
775 
776 int
777 racct_set(struct proc *p, int resource, uint64_t amount)
778 {
779 
780 	return (0);
781 }
782 
783 void
784 racct_set_force(struct proc *p, int resource, uint64_t amount)
785 {
786 }
787 
788 void
789 racct_sub(struct proc *p, int resource, uint64_t amount)
790 {
791 }
792 
793 void
794 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
795 {
796 }
797 
798 uint64_t
799 racct_get_limit(struct proc *p, int resource)
800 {
801 
802 	return (UINT64_MAX);
803 }
804 
805 uint64_t
806 racct_get_available(struct proc *p, int resource)
807 {
808 
809 	return (UINT64_MAX);
810 }
811 
812 void
813 racct_create(struct racct **racctp)
814 {
815 }
816 
817 void
818 racct_destroy(struct racct **racctp)
819 {
820 }
821 
822 int
823 racct_proc_fork(struct proc *parent, struct proc *child)
824 {
825 
826 	return (0);
827 }
828 
829 void
830 racct_proc_exit(struct proc *p)
831 {
832 }
833 
834 #endif /* !RACCT */
835