xref: /freebsd/sys/kern/kern_resource.c (revision 77b7cdf1999ee965ad494fddd184b18f532ac91a)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/sysproto.h>
47 #include <sys/file.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mutex.h>
52 #include <sys/proc.h>
53 #include <sys/resourcevar.h>
54 #include <sys/sched.h>
55 #include <sys/sx.h>
56 #include <sys/sysent.h>
57 #include <sys/time.h>
58 
59 #include <vm/vm.h>
60 #include <vm/vm_param.h>
61 #include <vm/pmap.h>
62 #include <vm/vm_map.h>
63 
64 static int donice(struct thread *td, struct proc *chgp, int n);
65 
66 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
67 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
68 static struct mtx uihashtbl_mtx;
69 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
70 static u_long uihash;		/* size of hash table - 1 */
71 
72 static struct uidinfo	*uilookup(uid_t uid);
73 
74 /*
75  * Resource controls and accounting.
76  */
77 
78 #ifndef _SYS_SYSPROTO_H_
79 struct getpriority_args {
80 	int	which;
81 	int	who;
82 };
83 #endif
84 /*
85  * MPSAFE
86  */
87 int
88 getpriority(td, uap)
89 	struct thread *td;
90 	register struct getpriority_args *uap;
91 {
92 	struct proc *p;
93 	int low = PRIO_MAX + 1;
94 	int error = 0;
95 	struct ksegrp *kg;
96 
97 	switch (uap->which) {
98 	case PRIO_PROCESS:
99 		if (uap->who == 0)
100 			low = td->td_ksegrp->kg_nice;
101 		else {
102 			p = pfind(uap->who);
103 			if (p == NULL)
104 				break;
105 			if (p_cansee(td, p) == 0) {
106 				FOREACH_KSEGRP_IN_PROC(p, kg) {
107 					if (kg->kg_nice < low)
108 						low = kg->kg_nice;
109 				}
110 			}
111 			PROC_UNLOCK(p);
112 		}
113 		break;
114 
115 	case PRIO_PGRP: {
116 		register struct pgrp *pg;
117 
118 		sx_slock(&proctree_lock);
119 		if (uap->who == 0) {
120 			pg = td->td_proc->p_pgrp;
121 			PGRP_LOCK(pg);
122 		} else {
123 			pg = pgfind(uap->who);
124 			if (pg == NULL) {
125 				sx_sunlock(&proctree_lock);
126 				break;
127 			}
128 		}
129 		sx_sunlock(&proctree_lock);
130 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
131 			PROC_LOCK(p);
132 			if (!p_cansee(td, p)) {
133 				FOREACH_KSEGRP_IN_PROC(p, kg) {
134 					if (kg->kg_nice < low)
135 						low = kg->kg_nice;
136 				}
137 			}
138 			PROC_UNLOCK(p);
139 		}
140 		PGRP_UNLOCK(pg);
141 		break;
142 	}
143 
144 	case PRIO_USER:
145 		if (uap->who == 0)
146 			uap->who = td->td_ucred->cr_uid;
147 		sx_slock(&allproc_lock);
148 		LIST_FOREACH(p, &allproc, p_list) {
149 			PROC_LOCK(p);
150 			if (!p_cansee(td, p) &&
151 			    p->p_ucred->cr_uid == uap->who) {
152 				FOREACH_KSEGRP_IN_PROC(p, kg) {
153 					if (kg->kg_nice < low)
154 						low = kg->kg_nice;
155 				}
156 			}
157 			PROC_UNLOCK(p);
158 		}
159 		sx_sunlock(&allproc_lock);
160 		break;
161 
162 	default:
163 		error = EINVAL;
164 		break;
165 	}
166 	if (low == PRIO_MAX + 1 && error == 0)
167 		error = ESRCH;
168 	td->td_retval[0] = low;
169 	return (error);
170 }
171 
172 #ifndef _SYS_SYSPROTO_H_
173 struct setpriority_args {
174 	int	which;
175 	int	who;
176 	int	prio;
177 };
178 #endif
179 /*
180  * MPSAFE
181  */
182 /* ARGSUSED */
183 int
184 setpriority(td, uap)
185 	struct thread *td;
186 	register struct setpriority_args *uap;
187 {
188 	struct proc *curp = td->td_proc;
189 	register struct proc *p;
190 	int found = 0, error = 0;
191 
192 	switch (uap->which) {
193 	case PRIO_PROCESS:
194 		if (uap->who == 0) {
195 			PROC_LOCK(curp);
196 			error = donice(td, curp, uap->prio);
197 			PROC_UNLOCK(curp);
198 		} else {
199 			p = pfind(uap->who);
200 			if (p == 0)
201 				break;
202 			if (p_cansee(td, p) == 0)
203 				error = donice(td, p, uap->prio);
204 			PROC_UNLOCK(p);
205 		}
206 		found++;
207 		break;
208 
209 	case PRIO_PGRP: {
210 		register struct pgrp *pg;
211 
212 		sx_slock(&proctree_lock);
213 		if (uap->who == 0) {
214 			pg = curp->p_pgrp;
215 			PGRP_LOCK(pg);
216 		} else {
217 			pg = pgfind(uap->who);
218 			if (pg == NULL) {
219 				sx_sunlock(&proctree_lock);
220 				break;
221 			}
222 		}
223 		sx_sunlock(&proctree_lock);
224 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
225 			PROC_LOCK(p);
226 			if (!p_cansee(td, p)) {
227 				error = donice(td, p, uap->prio);
228 				found++;
229 			}
230 			PROC_UNLOCK(p);
231 		}
232 		PGRP_UNLOCK(pg);
233 		break;
234 	}
235 
236 	case PRIO_USER:
237 		if (uap->who == 0)
238 			uap->who = td->td_ucred->cr_uid;
239 		sx_slock(&allproc_lock);
240 		FOREACH_PROC_IN_SYSTEM(p) {
241 			PROC_LOCK(p);
242 			if (p->p_ucred->cr_uid == uap->who &&
243 			    !p_cansee(td, p)) {
244 				error = donice(td, p, uap->prio);
245 				found++;
246 			}
247 			PROC_UNLOCK(p);
248 		}
249 		sx_sunlock(&allproc_lock);
250 		break;
251 
252 	default:
253 		error = EINVAL;
254 		break;
255 	}
256 	if (found == 0 && error == 0)
257 		error = ESRCH;
258 	return (error);
259 }
260 
261 /*
262  * Set "nice" for a process. Doesn't really understand threaded processes well
263  * but does try. Has the unfortunate side effect of making all the NICE
264  * values for a process's ksegrps the same.. This suggests that
265  * NICE valuse should be stored as a process nice and deltas for the ksegrps.
266  * (but not yet).
267  */
268 static int
269 donice(struct thread *td, struct proc *p, int n)
270 {
271 	int	error;
272 	int low = PRIO_MAX + 1;
273 	struct ksegrp *kg;
274 
275 	PROC_LOCK_ASSERT(p, MA_OWNED);
276 	if ((error = p_cansched(td, p)))
277 		return (error);
278 	if (n > PRIO_MAX)
279 		n = PRIO_MAX;
280 	if (n < PRIO_MIN)
281 		n = PRIO_MIN;
282 	/*
283 	 * Only allow nicing if to more than the lowest nice.
284 	 * e.g.  nices of 4,3,2  allow nice to 3 but not 1
285 	 */
286 	FOREACH_KSEGRP_IN_PROC(p, kg) {
287 		if (kg->kg_nice < low)
288 			low = kg->kg_nice;
289 	}
290  	if (n < low && suser(td))
291 		return (EACCES);
292 	mtx_lock_spin(&sched_lock);
293 	FOREACH_KSEGRP_IN_PROC(p, kg) {
294 		sched_nice(kg, n);
295 	}
296 	mtx_unlock_spin(&sched_lock);
297 	return (0);
298 }
299 
300 /* rtprio system call */
301 #ifndef _SYS_SYSPROTO_H_
302 struct rtprio_args {
303 	int		function;
304 	pid_t		pid;
305 	struct rtprio	*rtp;
306 };
307 #endif
308 
309 /*
310  * Set realtime priority
311  */
312 
313 /*
314  * MPSAFE
315  */
316 /* ARGSUSED */
317 int
318 rtprio(td, uap)
319 	struct thread *td;
320 	register struct rtprio_args *uap;
321 {
322 	struct proc *curp = td->td_proc;
323 	register struct proc *p;
324 	struct rtprio rtp;
325 	int error, cierror = 0;
326 
327 	/* Perform copyin before acquiring locks if needed. */
328 	if (uap->function == RTP_SET)
329 		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
330 
331 	if (uap->pid == 0) {
332 		p = curp;
333 		PROC_LOCK(p);
334 	} else {
335 		p = pfind(uap->pid);
336 		if (p == NULL)
337 			return (ESRCH);
338 	}
339 
340 	switch (uap->function) {
341 	case RTP_LOOKUP:
342 		if ((error = p_cansee(td, p)))
343 			break;
344 		mtx_lock_spin(&sched_lock);
345 		pri_to_rtp(FIRST_KSEGRP_IN_PROC(p), &rtp);
346 		mtx_unlock_spin(&sched_lock);
347 		PROC_UNLOCK(p);
348 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
349 	case RTP_SET:
350 		if ((error = p_cansched(td, p)) || (error = cierror))
351 			break;
352 		/* disallow setting rtprio in most cases if not superuser */
353 		if (suser(td) != 0) {
354 			/* can't set someone else's */
355 			if (uap->pid) {
356 				error = EPERM;
357 				break;
358 			}
359 			/* can't set realtime priority */
360 /*
361  * Realtime priority has to be restricted for reasons which should be
362  * obvious. However, for idle priority, there is a potential for
363  * system deadlock if an idleprio process gains a lock on a resource
364  * that other processes need (and the idleprio process can't run
365  * due to a CPU-bound normal process). Fix me! XXX
366  */
367 #if 0
368  			if (RTP_PRIO_IS_REALTIME(rtp.type))
369 #endif
370 			if (rtp.type != RTP_PRIO_NORMAL) {
371 				error = EPERM;
372 				break;
373 			}
374 		}
375 		mtx_lock_spin(&sched_lock);
376 		error = rtp_to_pri(&rtp, FIRST_KSEGRP_IN_PROC(p));
377 		mtx_unlock_spin(&sched_lock);
378 		break;
379 	default:
380 		error = EINVAL;
381 		break;
382 	}
383 	PROC_UNLOCK(p);
384 	return (error);
385 }
386 
387 int
388 rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg)
389 {
390 
391 	mtx_assert(&sched_lock, MA_OWNED);
392 	if (rtp->prio > RTP_PRIO_MAX)
393 		return (EINVAL);
394 	switch (RTP_PRIO_BASE(rtp->type)) {
395 	case RTP_PRIO_REALTIME:
396 		kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio;
397 		break;
398 	case RTP_PRIO_NORMAL:
399 		kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
400 		break;
401 	case RTP_PRIO_IDLE:
402 		kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio;
403 		break;
404 	default:
405 		return (EINVAL);
406 	}
407 	sched_class(kg, rtp->type);
408 	if (curthread->td_ksegrp == kg) {
409 		curthread->td_base_pri = kg->kg_user_pri;
410 		curthread->td_priority = kg->kg_user_pri; /* XXX dubious */
411 	}
412 	return (0);
413 }
414 
415 void
416 pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp)
417 {
418 
419 	mtx_assert(&sched_lock, MA_OWNED);
420 	switch (PRI_BASE(kg->kg_pri_class)) {
421 	case PRI_REALTIME:
422 		rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME;
423 		break;
424 	case PRI_TIMESHARE:
425 		rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE;
426 		break;
427 	case PRI_IDLE:
428 		rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE;
429 		break;
430 	default:
431 		break;
432 	}
433 	rtp->type = kg->kg_pri_class;
434 }
435 
436 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
437 #ifndef _SYS_SYSPROTO_H_
438 struct osetrlimit_args {
439 	u_int	which;
440 	struct	orlimit *rlp;
441 };
442 #endif
443 /*
444  * MPSAFE
445  */
446 /* ARGSUSED */
447 int
448 osetrlimit(td, uap)
449 	struct thread *td;
450 	register struct osetrlimit_args *uap;
451 {
452 	struct orlimit olim;
453 	struct rlimit lim;
454 	int error;
455 
456 	if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
457 		return (error);
458 	lim.rlim_cur = olim.rlim_cur;
459 	lim.rlim_max = olim.rlim_max;
460 	mtx_lock(&Giant);
461 	error = dosetrlimit(td, uap->which, &lim);
462 	mtx_unlock(&Giant);
463 	return (error);
464 }
465 
466 #ifndef _SYS_SYSPROTO_H_
467 struct ogetrlimit_args {
468 	u_int	which;
469 	struct	orlimit *rlp;
470 };
471 #endif
472 /*
473  * MPSAFE
474  */
475 /* ARGSUSED */
476 int
477 ogetrlimit(td, uap)
478 	struct thread *td;
479 	register struct ogetrlimit_args *uap;
480 {
481 	struct proc *p = td->td_proc;
482 	struct orlimit olim;
483 	int error;
484 
485 	if (uap->which >= RLIM_NLIMITS)
486 		return (EINVAL);
487 	mtx_lock(&Giant);
488 	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
489 	if (olim.rlim_cur == -1)
490 		olim.rlim_cur = 0x7fffffff;
491 	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
492 	if (olim.rlim_max == -1)
493 		olim.rlim_max = 0x7fffffff;
494 	error = copyout(&olim, uap->rlp, sizeof(olim));
495 	mtx_unlock(&Giant);
496 	return (error);
497 }
498 #endif /* COMPAT_43 || COMPAT_SUNOS */
499 
500 #ifndef _SYS_SYSPROTO_H_
501 struct __setrlimit_args {
502 	u_int	which;
503 	struct	rlimit *rlp;
504 };
505 #endif
506 /*
507  * MPSAFE
508  */
509 /* ARGSUSED */
510 int
511 setrlimit(td, uap)
512 	struct thread *td;
513 	register struct __setrlimit_args *uap;
514 {
515 	struct rlimit alim;
516 	int error;
517 
518 	if ((error = copyin(uap->rlp, &alim, sizeof (struct rlimit))))
519 		return (error);
520 	mtx_lock(&Giant);
521 	error = dosetrlimit(td, uap->which, &alim);
522 	mtx_unlock(&Giant);
523 	return (error);
524 }
525 
526 int
527 dosetrlimit(td, which, limp)
528 	struct thread *td;
529 	u_int which;
530 	struct rlimit *limp;
531 {
532 	struct proc *p = td->td_proc;
533 	register struct rlimit *alimp;
534 	int error;
535 
536 	GIANT_REQUIRED;
537 
538 	if (which >= RLIM_NLIMITS)
539 		return (EINVAL);
540 	alimp = &p->p_rlimit[which];
541 
542 	/*
543 	 * Preserve historical bugs by treating negative limits as unsigned.
544 	 */
545 	if (limp->rlim_cur < 0)
546 		limp->rlim_cur = RLIM_INFINITY;
547 	if (limp->rlim_max < 0)
548 		limp->rlim_max = RLIM_INFINITY;
549 
550 	if (limp->rlim_cur > alimp->rlim_max ||
551 	    limp->rlim_max > alimp->rlim_max)
552 		if ((error = suser_cred(td->td_ucred, PRISON_ROOT)))
553 			return (error);
554 	if (limp->rlim_cur > limp->rlim_max)
555 		limp->rlim_cur = limp->rlim_max;
556 	if (p->p_limit->p_refcnt > 1) {
557 		p->p_limit->p_refcnt--;
558 		p->p_limit = limcopy(p->p_limit);
559 		alimp = &p->p_rlimit[which];
560 	}
561 
562 	switch (which) {
563 
564 	case RLIMIT_CPU:
565 		mtx_lock_spin(&sched_lock);
566 		p->p_cpulimit = limp->rlim_cur;
567 		mtx_unlock_spin(&sched_lock);
568 		break;
569 	case RLIMIT_DATA:
570 		if (limp->rlim_cur > maxdsiz)
571 			limp->rlim_cur = maxdsiz;
572 		if (limp->rlim_max > maxdsiz)
573 			limp->rlim_max = maxdsiz;
574 		break;
575 
576 	case RLIMIT_STACK:
577 		if (limp->rlim_cur > maxssiz)
578 			limp->rlim_cur = maxssiz;
579 		if (limp->rlim_max > maxssiz)
580 			limp->rlim_max = maxssiz;
581 		/*
582 		 * Stack is allocated to the max at exec time with only
583 		 * "rlim_cur" bytes accessible.  If stack limit is going
584 		 * up make more accessible, if going down make inaccessible.
585 		 */
586 		if (limp->rlim_cur != alimp->rlim_cur) {
587 			vm_offset_t addr;
588 			vm_size_t size;
589 			vm_prot_t prot;
590 
591 			if (limp->rlim_cur > alimp->rlim_cur) {
592 				prot = p->p_sysent->sv_stackprot;
593 				size = limp->rlim_cur - alimp->rlim_cur;
594 				addr = p->p_sysent->sv_usrstack -
595 				    limp->rlim_cur;
596 			} else {
597 				prot = VM_PROT_NONE;
598 				size = alimp->rlim_cur - limp->rlim_cur;
599 				addr = p->p_sysent->sv_usrstack -
600 				    alimp->rlim_cur;
601 			}
602 			addr = trunc_page(addr);
603 			size = round_page(size);
604 			(void) vm_map_protect(&p->p_vmspace->vm_map,
605 					      addr, addr+size, prot, FALSE);
606 		}
607 		break;
608 
609 	case RLIMIT_NOFILE:
610 		if (limp->rlim_cur > maxfilesperproc)
611 			limp->rlim_cur = maxfilesperproc;
612 		if (limp->rlim_max > maxfilesperproc)
613 			limp->rlim_max = maxfilesperproc;
614 		break;
615 
616 	case RLIMIT_NPROC:
617 		if (limp->rlim_cur > maxprocperuid)
618 			limp->rlim_cur = maxprocperuid;
619 		if (limp->rlim_max > maxprocperuid)
620 			limp->rlim_max = maxprocperuid;
621 		if (limp->rlim_cur < 1)
622 			limp->rlim_cur = 1;
623 		if (limp->rlim_max < 1)
624 			limp->rlim_max = 1;
625 		break;
626 	}
627 	*alimp = *limp;
628 	return (0);
629 }
630 
631 #ifndef _SYS_SYSPROTO_H_
632 struct __getrlimit_args {
633 	u_int	which;
634 	struct	rlimit *rlp;
635 };
636 #endif
637 /*
638  * MPSAFE
639  */
640 /* ARGSUSED */
641 int
642 getrlimit(td, uap)
643 	struct thread *td;
644 	register struct __getrlimit_args *uap;
645 {
646 	int error;
647 	struct proc *p = td->td_proc;
648 
649 	if (uap->which >= RLIM_NLIMITS)
650 		return (EINVAL);
651 	mtx_lock(&Giant);
652 	error = copyout(&p->p_rlimit[uap->which], uap->rlp,
653 		    sizeof (struct rlimit));
654 	mtx_unlock(&Giant);
655 	return(error);
656 }
657 
658 /*
659  * Transform the running time and tick information in proc p into user,
660  * system, and interrupt time usage.
661  */
662 void
663 calcru(p, up, sp, ip)
664 	struct proc *p;
665 	struct timeval *up;
666 	struct timeval *sp;
667 	struct timeval *ip;
668 {
669 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
670 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
671 	struct timeval tv;
672 	struct bintime bt;
673 
674 	mtx_assert(&sched_lock, MA_OWNED);
675 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
676 
677 	ut = p->p_uticks;
678 	st = p->p_sticks;
679 	it = p->p_iticks;
680 
681 	tt = ut + st + it;
682 	if (tt == 0) {
683 		st = 1;
684 		tt = 1;
685 	}
686 
687 	if (curthread->td_proc == p) {
688 		/*
689 		 * Adjust for the current time slice.  This is actually fairly
690 		 * important since the error here is on the order of a time
691 		 * quantum, which is much greater than the sampling error.
692 		 * XXXKSE use a different test due to threads on other
693 		 * processors also being 'current'.
694 		 */
695 
696 		binuptime(&bt);
697 		bintime_sub(&bt, PCPU_PTR(switchtime));
698 		bintime_add(&bt, &p->p_runtime);
699 	} else {
700 		bt = p->p_runtime;
701 	}
702 	bintime2timeval(&bt, &tv);
703 	tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
704 	ptu = p->p_uu + p->p_su + p->p_iu;
705 	if (tu < ptu || (int64_t)tu < 0) {
706 		/* XXX no %qd in kernel.  Truncate. */
707 		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
708 		       (long)tu, p->p_pid, p->p_comm);
709 		tu = ptu;
710 	}
711 
712 	/* Subdivide tu. */
713 	uu = (tu * ut) / tt;
714 	su = (tu * st) / tt;
715 	iu = tu - uu - su;
716 
717 	/* Enforce monotonicity. */
718 	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
719 		if (uu < p->p_uu)
720 			uu = p->p_uu;
721 		else if (uu + p->p_su + p->p_iu > tu)
722 			uu = tu - p->p_su - p->p_iu;
723 		if (st == 0)
724 			su = p->p_su;
725 		else {
726 			su = ((tu - uu) * st) / (st + it);
727 			if (su < p->p_su)
728 				su = p->p_su;
729 			else if (uu + su + p->p_iu > tu)
730 				su = tu - uu - p->p_iu;
731 		}
732 		KASSERT(uu + su + p->p_iu <= tu,
733 		    	("calcru: monotonisation botch 1"));
734 		iu = tu - uu - su;
735 		KASSERT(iu >= p->p_iu,
736 		    	("calcru: monotonisation botch 2"));
737 	}
738 	p->p_uu = uu;
739 	p->p_su = su;
740 	p->p_iu = iu;
741 
742 	up->tv_sec = uu / 1000000;
743 	up->tv_usec = uu % 1000000;
744 	sp->tv_sec = su / 1000000;
745 	sp->tv_usec = su % 1000000;
746 	if (ip != NULL) {
747 		ip->tv_sec = iu / 1000000;
748 		ip->tv_usec = iu % 1000000;
749 	}
750 }
751 
752 #ifndef _SYS_SYSPROTO_H_
753 struct getrusage_args {
754 	int	who;
755 	struct	rusage *rusage;
756 };
757 #endif
758 /*
759  * MPSAFE
760  */
761 /* ARGSUSED */
762 int
763 getrusage(td, uap)
764 	register struct thread *td;
765 	register struct getrusage_args *uap;
766 {
767 	struct proc *p = td->td_proc;
768 	register struct rusage *rup;
769 	int error = 0;
770 
771 	mtx_lock(&Giant);
772 
773 	switch (uap->who) {
774 	case RUSAGE_SELF:
775 		rup = &p->p_stats->p_ru;
776 		mtx_lock_spin(&sched_lock);
777 		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
778 		mtx_unlock_spin(&sched_lock);
779 		break;
780 
781 	case RUSAGE_CHILDREN:
782 		rup = &p->p_stats->p_cru;
783 		break;
784 
785 	default:
786 		rup = NULL;
787 		error = EINVAL;
788 		break;
789 	}
790 	mtx_unlock(&Giant);
791 	if (error == 0) {
792 		/* XXX Unlocked access to p_stats->p_ru or p_cru. */
793 		error = copyout(rup, uap->rusage, sizeof (struct rusage));
794 	}
795 	return(error);
796 }
797 
798 void
799 ruadd(ru, ru2)
800 	register struct rusage *ru, *ru2;
801 {
802 	register long *ip, *ip2;
803 	register int i;
804 
805 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
806 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
807 	if (ru->ru_maxrss < ru2->ru_maxrss)
808 		ru->ru_maxrss = ru2->ru_maxrss;
809 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
810 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
811 		*ip++ += *ip2++;
812 }
813 
814 /*
815  * Make a copy of the plimit structure.
816  * We share these structures copy-on-write after fork,
817  * and copy when a limit is changed.
818  */
819 struct plimit *
820 limcopy(lim)
821 	struct plimit *lim;
822 {
823 	register struct plimit *copy;
824 
825 	MALLOC(copy, struct plimit *, sizeof(struct plimit),
826 	    M_SUBPROC, M_WAITOK);
827 	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
828 	copy->p_refcnt = 1;
829 	return (copy);
830 }
831 
832 /*
833  * Find the uidinfo structure for a uid.  This structure is used to
834  * track the total resource consumption (process count, socket buffer
835  * size, etc.) for the uid and impose limits.
836  */
837 void
838 uihashinit()
839 {
840 
841 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
842 	mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF);
843 }
844 
845 /*
846  * lookup a uidinfo struct for the parameter uid.
847  * uihashtbl_mtx must be locked.
848  */
849 static struct uidinfo *
850 uilookup(uid)
851 	uid_t uid;
852 {
853 	struct	uihashhead *uipp;
854 	struct	uidinfo *uip;
855 
856 	mtx_assert(&uihashtbl_mtx, MA_OWNED);
857 	uipp = UIHASH(uid);
858 	LIST_FOREACH(uip, uipp, ui_hash)
859 		if (uip->ui_uid == uid)
860 			break;
861 
862 	return (uip);
863 }
864 
865 /*
866  * Find or allocate a struct uidinfo for a particular uid.
867  * Increase refcount on uidinfo struct returned.
868  * uifree() should be called on a struct uidinfo when released.
869  */
870 struct uidinfo *
871 uifind(uid)
872 	uid_t uid;
873 {
874 	struct	uidinfo *uip;
875 
876 	mtx_lock(&uihashtbl_mtx);
877 	uip = uilookup(uid);
878 	if (uip == NULL) {
879 		struct  uidinfo *old_uip;
880 
881 		mtx_unlock(&uihashtbl_mtx);
882 		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
883 		mtx_lock(&uihashtbl_mtx);
884 		/*
885 		 * There's a chance someone created our uidinfo while we
886 		 * were in malloc and not holding the lock, so we have to
887 		 * make sure we don't insert a duplicate uidinfo
888 		 */
889 		if ((old_uip = uilookup(uid)) != NULL) {
890 			/* someone else beat us to it */
891 			free(uip, M_UIDINFO);
892 			uip = old_uip;
893 		} else {
894 			uip->ui_mtxp = mtx_pool_alloc();
895 			uip->ui_uid = uid;
896 			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
897 		}
898 	}
899 	uihold(uip);
900 	mtx_unlock(&uihashtbl_mtx);
901 	return (uip);
902 }
903 
904 /*
905  * Place another refcount on a uidinfo struct.
906  */
907 void
908 uihold(uip)
909 	struct uidinfo *uip;
910 {
911 
912 	UIDINFO_LOCK(uip);
913 	uip->ui_ref++;
914 	UIDINFO_UNLOCK(uip);
915 }
916 
917 /*-
918  * Since uidinfo structs have a long lifetime, we use an
919  * opportunistic refcounting scheme to avoid locking the lookup hash
920  * for each release.
921  *
922  * If the refcount hits 0, we need to free the structure,
923  * which means we need to lock the hash.
924  * Optimal case:
925  *   After locking the struct and lowering the refcount, if we find
926  *   that we don't need to free, simply unlock and return.
927  * Suboptimal case:
928  *   If refcount lowering results in need to free, bump the count
929  *   back up, loose the lock and aquire the locks in the proper
930  *   order to try again.
931  */
932 void
933 uifree(uip)
934 	struct uidinfo *uip;
935 {
936 
937 	/* Prepare for optimal case. */
938 	UIDINFO_LOCK(uip);
939 
940 	if (--uip->ui_ref != 0) {
941 		UIDINFO_UNLOCK(uip);
942 		return;
943 	}
944 
945 	/* Prepare for suboptimal case. */
946 	uip->ui_ref++;
947 	UIDINFO_UNLOCK(uip);
948 	mtx_lock(&uihashtbl_mtx);
949 	UIDINFO_LOCK(uip);
950 
951 	/*
952 	 * We must subtract one from the count again because we backed out
953 	 * our initial subtraction before dropping the lock.
954 	 * Since another thread may have added a reference after we dropped the
955 	 * initial lock we have to test for zero again.
956 	 */
957 	if (--uip->ui_ref == 0) {
958 		LIST_REMOVE(uip, ui_hash);
959 		mtx_unlock(&uihashtbl_mtx);
960 		if (uip->ui_sbsize != 0)
961 			/* XXX no %qd in kernel.  Truncate. */
962 			printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
963 			    uip->ui_uid, (long)uip->ui_sbsize);
964 		if (uip->ui_proccnt != 0)
965 			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
966 			    uip->ui_uid, uip->ui_proccnt);
967 		UIDINFO_UNLOCK(uip);
968 		FREE(uip, M_UIDINFO);
969 		return;
970 	}
971 
972 	mtx_unlock(&uihashtbl_mtx);
973 	UIDINFO_UNLOCK(uip);
974 }
975 
976 /*
977  * Change the count associated with number of processes
978  * a given user is using.  When 'max' is 0, don't enforce a limit
979  */
980 int
981 chgproccnt(uip, diff, max)
982 	struct	uidinfo	*uip;
983 	int	diff;
984 	int	max;
985 {
986 
987 	UIDINFO_LOCK(uip);
988 	/* don't allow them to exceed max, but allow subtraction */
989 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
990 		UIDINFO_UNLOCK(uip);
991 		return (0);
992 	}
993 	uip->ui_proccnt += diff;
994 	if (uip->ui_proccnt < 0)
995 		printf("negative proccnt for uid = %d\n", uip->ui_uid);
996 	UIDINFO_UNLOCK(uip);
997 	return (1);
998 }
999 
1000 /*
1001  * Change the total socket buffer size a user has used.
1002  */
1003 int
1004 chgsbsize(uip, hiwat, to, max)
1005 	struct	uidinfo	*uip;
1006 	u_int  *hiwat;
1007 	u_int	to;
1008 	rlim_t	max;
1009 {
1010 	rlim_t new;
1011 	int s;
1012 
1013 	s = splnet();
1014 	UIDINFO_LOCK(uip);
1015 	new = uip->ui_sbsize + to - *hiwat;
1016 	/* don't allow them to exceed max, but allow subtraction */
1017 	if (to > *hiwat && new > max) {
1018 		splx(s);
1019 		UIDINFO_UNLOCK(uip);
1020 		return (0);
1021 	}
1022 	uip->ui_sbsize = new;
1023 	*hiwat = to;
1024 	if (uip->ui_sbsize < 0)
1025 		printf("negative sbsize for uid = %d\n", uip->ui_uid);
1026 	splx(s);
1027 	UIDINFO_UNLOCK(uip);
1028 	return (1);
1029 }
1030