xref: /freebsd/sys/kern/kern_resource.c (revision f6a4109212fd8fbabc731f07b2dd5c7e07fbec33)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "opt_compat.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/sysproto.h>
49 #include <sys/file.h>
50 #include <sys/kernel.h>
51 #include <sys/lock.h>
52 #include <sys/malloc.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/sched.h>
57 #include <sys/sx.h>
58 #include <sys/sysent.h>
59 #include <sys/time.h>
60 
61 #include <vm/vm.h>
62 #include <vm/vm_param.h>
63 #include <vm/pmap.h>
64 #include <vm/vm_map.h>
65 
66 static int donice(struct thread *td, struct proc *chgp, int n);
67 
68 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
69 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
70 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
71 static struct mtx uihashtbl_mtx;
72 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
73 static u_long uihash;		/* size of hash table - 1 */
74 
75 static struct uidinfo	*uilookup(uid_t uid);
76 
77 /*
78  * Resource controls and accounting.
79  */
80 
81 #ifndef _SYS_SYSPROTO_H_
82 struct getpriority_args {
83 	int	which;
84 	int	who;
85 };
86 #endif
87 /*
88  * MPSAFE
89  */
90 int
91 getpriority(td, uap)
92 	struct thread *td;
93 	register struct getpriority_args *uap;
94 {
95 	struct ksegrp *kg;
96 	struct proc *p;
97 	int error, low;
98 
99 	error = 0;
100 	low = PRIO_MAX + 1;
101 	switch (uap->which) {
102 
103 	case PRIO_PROCESS:
104 		if (uap->who == 0)
105 			low = td->td_ksegrp->kg_nice;
106 		else {
107 			p = pfind(uap->who);
108 			if (p == NULL)
109 				break;
110 			if (p_cansee(td, p) == 0) {
111 				FOREACH_KSEGRP_IN_PROC(p, kg) {
112 					if (kg->kg_nice < low)
113 						low = kg->kg_nice;
114 				}
115 			}
116 			PROC_UNLOCK(p);
117 		}
118 		break;
119 
120 	case PRIO_PGRP: {
121 		register struct pgrp *pg;
122 
123 		sx_slock(&proctree_lock);
124 		if (uap->who == 0) {
125 			pg = td->td_proc->p_pgrp;
126 			PGRP_LOCK(pg);
127 		} else {
128 			pg = pgfind(uap->who);
129 			if (pg == NULL) {
130 				sx_sunlock(&proctree_lock);
131 				break;
132 			}
133 		}
134 		sx_sunlock(&proctree_lock);
135 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
136 			PROC_LOCK(p);
137 			if (!p_cansee(td, p)) {
138 				FOREACH_KSEGRP_IN_PROC(p, kg) {
139 					if (kg->kg_nice < low)
140 						low = kg->kg_nice;
141 				}
142 			}
143 			PROC_UNLOCK(p);
144 		}
145 		PGRP_UNLOCK(pg);
146 		break;
147 	}
148 
149 	case PRIO_USER:
150 		if (uap->who == 0)
151 			uap->who = td->td_ucred->cr_uid;
152 		sx_slock(&allproc_lock);
153 		LIST_FOREACH(p, &allproc, p_list) {
154 			PROC_LOCK(p);
155 			if (!p_cansee(td, p) &&
156 			    p->p_ucred->cr_uid == uap->who) {
157 				FOREACH_KSEGRP_IN_PROC(p, kg) {
158 					if (kg->kg_nice < low)
159 						low = kg->kg_nice;
160 				}
161 			}
162 			PROC_UNLOCK(p);
163 		}
164 		sx_sunlock(&allproc_lock);
165 		break;
166 
167 	default:
168 		error = EINVAL;
169 		break;
170 	}
171 	if (low == PRIO_MAX + 1 && error == 0)
172 		error = ESRCH;
173 	td->td_retval[0] = low;
174 	return (error);
175 }
176 
177 #ifndef _SYS_SYSPROTO_H_
178 struct setpriority_args {
179 	int	which;
180 	int	who;
181 	int	prio;
182 };
183 #endif
184 /*
185  * MPSAFE
186  */
187 int
188 setpriority(td, uap)
189 	struct thread *td;
190 	register struct setpriority_args *uap;
191 {
192 	struct proc *curp;
193 	register struct proc *p;
194 	int found = 0, error = 0;
195 
196 	curp = td->td_proc;
197 	switch (uap->which) {
198 	case PRIO_PROCESS:
199 		if (uap->who == 0) {
200 			PROC_LOCK(curp);
201 			error = donice(td, curp, uap->prio);
202 			PROC_UNLOCK(curp);
203 		} else {
204 			p = pfind(uap->who);
205 			if (p == 0)
206 				break;
207 			if (p_cansee(td, p) == 0)
208 				error = donice(td, p, uap->prio);
209 			PROC_UNLOCK(p);
210 		}
211 		found++;
212 		break;
213 
214 	case PRIO_PGRP: {
215 		register struct pgrp *pg;
216 
217 		sx_slock(&proctree_lock);
218 		if (uap->who == 0) {
219 			pg = curp->p_pgrp;
220 			PGRP_LOCK(pg);
221 		} else {
222 			pg = pgfind(uap->who);
223 			if (pg == NULL) {
224 				sx_sunlock(&proctree_lock);
225 				break;
226 			}
227 		}
228 		sx_sunlock(&proctree_lock);
229 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
230 			PROC_LOCK(p);
231 			if (!p_cansee(td, p)) {
232 				error = donice(td, p, uap->prio);
233 				found++;
234 			}
235 			PROC_UNLOCK(p);
236 		}
237 		PGRP_UNLOCK(pg);
238 		break;
239 	}
240 
241 	case PRIO_USER:
242 		if (uap->who == 0)
243 			uap->who = td->td_ucred->cr_uid;
244 		sx_slock(&allproc_lock);
245 		FOREACH_PROC_IN_SYSTEM(p) {
246 			PROC_LOCK(p);
247 			if (p->p_ucred->cr_uid == uap->who &&
248 			    !p_cansee(td, p)) {
249 				error = donice(td, p, uap->prio);
250 				found++;
251 			}
252 			PROC_UNLOCK(p);
253 		}
254 		sx_sunlock(&allproc_lock);
255 		break;
256 
257 	default:
258 		error = EINVAL;
259 		break;
260 	}
261 	if (found == 0 && error == 0)
262 		error = ESRCH;
263 	return (error);
264 }
265 
266 /*
267  * Set "nice" for a process.  Doesn't really understand threaded processes
268  * well but does try.  Has the unfortunate side effect of making all the NICE
269  * values for a process's ksegrps the same.  This suggests that
270  * NICE values should be stored as a process nice and deltas for the ksegrps.
271  * (but not yet).
272  */
273 static int
274 donice(struct thread *td, struct proc *p, int n)
275 {
276 	struct ksegrp *kg;
277 	int error, low;
278 
279 	low = PRIO_MAX + 1;
280 	PROC_LOCK_ASSERT(p, MA_OWNED);
281 	if ((error = p_cansched(td, p)))
282 		return (error);
283 	if (n > PRIO_MAX)
284 		n = PRIO_MAX;
285 	if (n < PRIO_MIN)
286 		n = PRIO_MIN;
287 	/*
288 	 * Only allow nicing if to more than the lowest nice.
289 	 * E.g., for nices of 4,3,2 allow nice to 3 but not 1
290 	 */
291 	FOREACH_KSEGRP_IN_PROC(p, kg) {
292 		if (kg->kg_nice < low)
293 			low = kg->kg_nice;
294 	}
295  	if (n < low && suser(td) != 0)
296 		return (EACCES);
297 	mtx_lock_spin(&sched_lock);
298 	FOREACH_KSEGRP_IN_PROC(p, kg) {
299 		sched_nice(kg, n);
300 	}
301 	mtx_unlock_spin(&sched_lock);
302 	return (0);
303 }
304 
305 /*
306  * Set realtime priority
307  *
308  * MPSAFE
309  */
310 #ifndef _SYS_SYSPROTO_H_
311 struct rtprio_args {
312 	int		function;
313 	pid_t		pid;
314 	struct rtprio	*rtp;
315 };
316 #endif
317 
318 int
319 rtprio(td, uap)
320 	struct thread *td;
321 	register struct rtprio_args *uap;
322 {
323 	struct proc *curp;
324 	register struct proc *p;
325 	struct rtprio rtp;
326 	int cierror, error;
327 
328 	/* Perform copyin before acquiring locks if needed. */
329 	if (uap->function == RTP_SET)
330 		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
331 	else
332 		cierror = 0;
333 
334 	curp = td->td_proc;
335 	if (uap->pid == 0) {
336 		p = curp;
337 		PROC_LOCK(p);
338 	} else {
339 		p = pfind(uap->pid);
340 		if (p == NULL)
341 			return (ESRCH);
342 	}
343 
344 	switch (uap->function) {
345 	case RTP_LOOKUP:
346 		if ((error = p_cansee(td, p)))
347 			break;
348 		mtx_lock_spin(&sched_lock);
349 		pri_to_rtp(FIRST_KSEGRP_IN_PROC(p), &rtp);
350 		mtx_unlock_spin(&sched_lock);
351 		PROC_UNLOCK(p);
352 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
353 	case RTP_SET:
354 		if ((error = p_cansched(td, p)) || (error = cierror))
355 			break;
356 		/* disallow setting rtprio in most cases if not superuser */
357 		if (suser(td) != 0) {
358 			/* can't set someone else's */
359 			if (uap->pid) {
360 				error = EPERM;
361 				break;
362 			}
363 			/* can't set realtime priority */
364 /*
365  * Realtime priority has to be restricted for reasons which should be
366  * obvious. However, for idle priority, there is a potential for
367  * system deadlock if an idleprio process gains a lock on a resource
368  * that other processes need (and the idleprio process can't run
369  * due to a CPU-bound normal process). Fix me! XXX
370  */
371 #if 0
372  			if (RTP_PRIO_IS_REALTIME(rtp.type))
373 #endif
374 			if (rtp.type != RTP_PRIO_NORMAL) {
375 				error = EPERM;
376 				break;
377 			}
378 		}
379 		mtx_lock_spin(&sched_lock);
380 		error = rtp_to_pri(&rtp, FIRST_KSEGRP_IN_PROC(p));
381 		mtx_unlock_spin(&sched_lock);
382 		break;
383 	default:
384 		error = EINVAL;
385 		break;
386 	}
387 	PROC_UNLOCK(p);
388 	return (error);
389 }
390 
391 int
392 rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg)
393 {
394 
395 	mtx_assert(&sched_lock, MA_OWNED);
396 	if (rtp->prio > RTP_PRIO_MAX)
397 		return (EINVAL);
398 	switch (RTP_PRIO_BASE(rtp->type)) {
399 	case RTP_PRIO_REALTIME:
400 		kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio;
401 		break;
402 	case RTP_PRIO_NORMAL:
403 		kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
404 		break;
405 	case RTP_PRIO_IDLE:
406 		kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio;
407 		break;
408 	default:
409 		return (EINVAL);
410 	}
411 	sched_class(kg, rtp->type);
412 	if (curthread->td_ksegrp == kg) {
413 		curthread->td_base_pri = kg->kg_user_pri;
414 		sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */
415 	}
416 	return (0);
417 }
418 
419 void
420 pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp)
421 {
422 
423 	mtx_assert(&sched_lock, MA_OWNED);
424 	switch (PRI_BASE(kg->kg_pri_class)) {
425 	case PRI_REALTIME:
426 		rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME;
427 		break;
428 	case PRI_TIMESHARE:
429 		rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE;
430 		break;
431 	case PRI_IDLE:
432 		rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE;
433 		break;
434 	default:
435 		break;
436 	}
437 	rtp->type = kg->kg_pri_class;
438 }
439 
440 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
441 #ifndef _SYS_SYSPROTO_H_
442 struct osetrlimit_args {
443 	u_int	which;
444 	struct	orlimit *rlp;
445 };
446 #endif
447 /*
448  * MPSAFE
449  */
450 int
451 osetrlimit(td, uap)
452 	struct thread *td;
453 	register struct osetrlimit_args *uap;
454 {
455 	struct orlimit olim;
456 	struct rlimit lim;
457 	int error;
458 
459 	if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
460 		return (error);
461 	lim.rlim_cur = olim.rlim_cur;
462 	lim.rlim_max = olim.rlim_max;
463 	error = kern_setrlimit(td, uap->which, &lim);
464 	return (error);
465 }
466 
467 #ifndef _SYS_SYSPROTO_H_
468 struct ogetrlimit_args {
469 	u_int	which;
470 	struct	orlimit *rlp;
471 };
472 #endif
473 /*
474  * MPSAFE
475  */
476 int
477 ogetrlimit(td, uap)
478 	struct thread *td;
479 	register struct ogetrlimit_args *uap;
480 {
481 	struct orlimit olim;
482 	struct rlimit rl;
483 	struct proc *p;
484 	int error;
485 
486 	if (uap->which >= RLIM_NLIMITS)
487 		return (EINVAL);
488 	p = td->td_proc;
489 	PROC_LOCK(p);
490 	lim_rlimit(p, uap->which, &rl);
491 	PROC_UNLOCK(p);
492 
493 	/*
494 	 * XXX would be more correct to convert only RLIM_INFINITY to the
495 	 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
496 	 * values.  Most 64->32 and 32->16 conversions, including not
497 	 * unimportant ones of uids are even more broken than what we
498 	 * do here (they blindly truncate).  We don't do this correctly
499 	 * here since we have little experience with EOVERFLOW yet.
500 	 * Elsewhere, getuid() can't fail...
501 	 */
502 	olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
503 	olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
504 	error = copyout(&olim, uap->rlp, sizeof(olim));
505 	return (error);
506 }
507 #endif /* COMPAT_43 || COMPAT_SUNOS */
508 
509 #ifndef _SYS_SYSPROTO_H_
510 struct __setrlimit_args {
511 	u_int	which;
512 	struct	rlimit *rlp;
513 };
514 #endif
515 /*
516  * MPSAFE
517  */
518 int
519 setrlimit(td, uap)
520 	struct thread *td;
521 	register struct __setrlimit_args *uap;
522 {
523 	struct rlimit alim;
524 	int error;
525 
526 	if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
527 		return (error);
528 	error = kern_setrlimit(td, uap->which, &alim);
529 	return (error);
530 }
531 
532 int
533 kern_setrlimit(td, which, limp)
534 	struct thread *td;
535 	u_int which;
536 	struct rlimit *limp;
537 {
538 	struct plimit *newlim, *oldlim;
539 	struct proc *p;
540 	register struct rlimit *alimp;
541 	rlim_t oldssiz;
542 	int error;
543 
544 	if (which >= RLIM_NLIMITS)
545 		return (EINVAL);
546 
547 	/*
548 	 * Preserve historical bugs by treating negative limits as unsigned.
549 	 */
550 	if (limp->rlim_cur < 0)
551 		limp->rlim_cur = RLIM_INFINITY;
552 	if (limp->rlim_max < 0)
553 		limp->rlim_max = RLIM_INFINITY;
554 
555 	oldssiz = 0;
556 	p = td->td_proc;
557 	newlim = lim_alloc();
558 	PROC_LOCK(p);
559 	oldlim = p->p_limit;
560 	alimp = &oldlim->pl_rlimit[which];
561 	if (limp->rlim_cur > alimp->rlim_max ||
562 	    limp->rlim_max > alimp->rlim_max)
563 		if ((error = suser_cred(td->td_ucred, PRISON_ROOT))) {
564 			PROC_UNLOCK(p);
565 			lim_free(newlim);
566 			return (error);
567 	}
568 	if (limp->rlim_cur > limp->rlim_max)
569 		limp->rlim_cur = limp->rlim_max;
570 	lim_copy(newlim, oldlim);
571 	alimp = &newlim->pl_rlimit[which];
572 
573 	switch (which) {
574 
575 	case RLIMIT_CPU:
576 		mtx_lock_spin(&sched_lock);
577 		p->p_cpulimit = limp->rlim_cur;
578 		mtx_unlock_spin(&sched_lock);
579 		break;
580 	case RLIMIT_DATA:
581 		if (limp->rlim_cur > maxdsiz)
582 			limp->rlim_cur = maxdsiz;
583 		if (limp->rlim_max > maxdsiz)
584 			limp->rlim_max = maxdsiz;
585 		break;
586 
587 	case RLIMIT_STACK:
588 		if (limp->rlim_cur > maxssiz)
589 			limp->rlim_cur = maxssiz;
590 		if (limp->rlim_max > maxssiz)
591 			limp->rlim_max = maxssiz;
592 		oldssiz = alimp->rlim_cur;
593 		break;
594 
595 	case RLIMIT_NOFILE:
596 		if (limp->rlim_cur > maxfilesperproc)
597 			limp->rlim_cur = maxfilesperproc;
598 		if (limp->rlim_max > maxfilesperproc)
599 			limp->rlim_max = maxfilesperproc;
600 		break;
601 
602 	case RLIMIT_NPROC:
603 		if (limp->rlim_cur > maxprocperuid)
604 			limp->rlim_cur = maxprocperuid;
605 		if (limp->rlim_max > maxprocperuid)
606 			limp->rlim_max = maxprocperuid;
607 		if (limp->rlim_cur < 1)
608 			limp->rlim_cur = 1;
609 		if (limp->rlim_max < 1)
610 			limp->rlim_max = 1;
611 		break;
612 	}
613 	*alimp = *limp;
614 	p->p_limit = newlim;
615 	PROC_UNLOCK(p);
616 	lim_free(oldlim);
617 
618 	if (which == RLIMIT_STACK) {
619 		/*
620 		 * Stack is allocated to the max at exec time with only
621 		 * "rlim_cur" bytes accessible.  If stack limit is going
622 		 * up make more accessible, if going down make inaccessible.
623 		 */
624 		if (limp->rlim_cur != oldssiz) {
625 			vm_offset_t addr;
626 			vm_size_t size;
627 			vm_prot_t prot;
628 
629 			mtx_lock(&Giant);
630 			if (limp->rlim_cur > oldssiz) {
631 				prot = p->p_sysent->sv_stackprot;
632 				size = limp->rlim_cur - oldssiz;
633 				addr = p->p_sysent->sv_usrstack -
634 				    limp->rlim_cur;
635 			} else {
636 				prot = VM_PROT_NONE;
637 				size = oldssiz - limp->rlim_cur;
638 				addr = p->p_sysent->sv_usrstack -
639 				    oldssiz;
640 			}
641 			addr = trunc_page(addr);
642 			size = round_page(size);
643 			(void) vm_map_protect(&p->p_vmspace->vm_map,
644 					      addr, addr+size, prot, FALSE);
645 			mtx_unlock(&Giant);
646 		}
647 	}
648 	return (0);
649 }
650 
651 #ifndef _SYS_SYSPROTO_H_
652 struct __getrlimit_args {
653 	u_int	which;
654 	struct	rlimit *rlp;
655 };
656 #endif
657 /*
658  * MPSAFE
659  */
660 /* ARGSUSED */
661 int
662 getrlimit(td, uap)
663 	struct thread *td;
664 	register struct __getrlimit_args *uap;
665 {
666 	struct rlimit rlim;
667 	struct proc *p;
668 	int error;
669 
670 	if (uap->which >= RLIM_NLIMITS)
671 		return (EINVAL);
672 	p = td->td_proc;
673 	PROC_LOCK(p);
674 	lim_rlimit(p, uap->which, &rlim);
675 	PROC_UNLOCK(p);
676 	error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
677 	return(error);
678 }
679 
680 /*
681  * Transform the running time and tick information in proc p into user,
682  * system, and interrupt time usage.
683  */
684 void
685 calcru(p, up, sp, ip)
686 	struct proc *p;
687 	struct timeval *up;
688 	struct timeval *sp;
689 	struct timeval *ip;
690 {
691 	struct bintime bt;
692 	struct timeval tv;
693 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
694 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
695 
696 	mtx_assert(&sched_lock, MA_OWNED);
697 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
698 
699 	ut = p->p_uticks;
700 	st = p->p_sticks;
701 	it = p->p_iticks;
702 
703 	tt = ut + st + it;
704 	if (tt == 0) {
705 		st = 1;
706 		tt = 1;
707 	}
708 	if (p == curthread->td_proc) {
709 		/*
710 		 * Adjust for the current time slice.  This is actually fairly
711 		 * important since the error here is on the order of a time
712 		 * quantum, which is much greater than the sampling error.
713 		 * XXXKSE use a different test due to threads on other
714 		 * processors also being 'current'.
715 		 */
716 		binuptime(&bt);
717 		bintime_sub(&bt, PCPU_PTR(switchtime));
718 		bintime_add(&bt, &p->p_runtime);
719 	} else
720 		bt = p->p_runtime;
721 	bintime2timeval(&bt, &tv);
722 	tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
723 	ptu = p->p_uu + p->p_su + p->p_iu;
724 	if (tu < ptu || (int64_t)tu < 0) {
725 		printf("calcru: negative time of %jd usec for pid %d (%s)\n",
726 		    (intmax_t)tu, p->p_pid, p->p_comm);
727 		tu = ptu;
728 	}
729 
730 	/* Subdivide tu. */
731 	uu = (tu * ut) / tt;
732 	su = (tu * st) / tt;
733 	iu = tu - uu - su;
734 
735 	/* Enforce monotonicity. */
736 	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
737 		if (uu < p->p_uu)
738 			uu = p->p_uu;
739 		else if (uu + p->p_su + p->p_iu > tu)
740 			uu = tu - p->p_su - p->p_iu;
741 		if (st == 0)
742 			su = p->p_su;
743 		else {
744 			su = ((tu - uu) * st) / (st + it);
745 			if (su < p->p_su)
746 				su = p->p_su;
747 			else if (uu + su + p->p_iu > tu)
748 				su = tu - uu - p->p_iu;
749 		}
750 		KASSERT(uu + su + p->p_iu <= tu,
751 		    ("calcru: monotonisation botch 1"));
752 		iu = tu - uu - su;
753 		KASSERT(iu >= p->p_iu,
754 		    ("calcru: monotonisation botch 2"));
755 	}
756 	p->p_uu = uu;
757 	p->p_su = su;
758 	p->p_iu = iu;
759 
760 	up->tv_sec = uu / 1000000;
761 	up->tv_usec = uu % 1000000;
762 	sp->tv_sec = su / 1000000;
763 	sp->tv_usec = su % 1000000;
764 	if (ip != NULL) {
765 		ip->tv_sec = iu / 1000000;
766 		ip->tv_usec = iu % 1000000;
767 	}
768 }
769 
770 #ifndef _SYS_SYSPROTO_H_
771 struct getrusage_args {
772 	int	who;
773 	struct	rusage *rusage;
774 };
775 #endif
776 /*
777  * MPSAFE
778  */
779 /* ARGSUSED */
780 int
781 getrusage(td, uap)
782 	register struct thread *td;
783 	register struct getrusage_args *uap;
784 {
785 	struct rusage ru;
786 	struct proc *p;
787 
788 	p = td->td_proc;
789 	switch (uap->who) {
790 
791 	case RUSAGE_SELF:
792 		mtx_lock(&Giant);
793 		mtx_lock_spin(&sched_lock);
794 		calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime,
795 		    NULL);
796 		mtx_unlock_spin(&sched_lock);
797 		ru = p->p_stats->p_ru;
798 		mtx_unlock(&Giant);
799 		break;
800 
801 	case RUSAGE_CHILDREN:
802 		mtx_lock(&Giant);
803 		ru = p->p_stats->p_cru;
804 		mtx_unlock(&Giant);
805 		break;
806 
807 	default:
808 		return (EINVAL);
809 		break;
810 	}
811 	return (copyout(&ru, uap->rusage, sizeof(struct rusage)));
812 }
813 
814 void
815 ruadd(ru, ru2)
816 	register struct rusage *ru, *ru2;
817 {
818 	register long *ip, *ip2;
819 	register int i;
820 
821 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
822 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
823 	if (ru->ru_maxrss < ru2->ru_maxrss)
824 		ru->ru_maxrss = ru2->ru_maxrss;
825 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
826 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
827 		*ip++ += *ip2++;
828 }
829 
830 /*
831  * Allocate a new resource limits structure and initialize its
832  * reference count and mutex pointer.
833  */
834 struct plimit *
835 lim_alloc()
836 {
837 	struct plimit *limp;
838 
839 	limp = (struct plimit *)malloc(sizeof(struct plimit), M_PLIMIT,
840 	    M_WAITOK);
841 	limp->pl_refcnt = 1;
842 	limp->pl_mtx = mtx_pool_alloc(mtxpool_sleep);
843 	return (limp);
844 }
845 
846 struct plimit *
847 lim_hold(limp)
848 	struct plimit *limp;
849 {
850 
851 	LIM_LOCK(limp);
852 	limp->pl_refcnt++;
853 	LIM_UNLOCK(limp);
854 	return (limp);
855 }
856 
857 void
858 lim_free(limp)
859 	struct plimit *limp;
860 {
861 
862 	LIM_LOCK(limp);
863 	KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow"));
864 	if (--limp->pl_refcnt == 0) {
865 		LIM_UNLOCK(limp);
866 		free((void *)limp, M_PLIMIT);
867 		return;
868 	}
869 	LIM_UNLOCK(limp);
870 }
871 
872 /*
873  * Make a copy of the plimit structure.
874  * We share these structures copy-on-write after fork.
875  */
876 void
877 lim_copy(dst, src)
878 	struct plimit *dst, *src;
879 {
880 
881 	KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit"));
882 	bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
883 }
884 
885 /*
886  * Return the hard limit for a particular system resource.  The
887  * which parameter specifies the index into the rlimit array.
888  */
889 rlim_t
890 lim_max(struct proc *p, int which)
891 {
892 	struct rlimit rl;
893 
894 	lim_rlimit(p, which, &rl);
895 	return (rl.rlim_max);
896 }
897 
898 /*
899  * Return the current (soft) limit for a particular system resource.
900  * The which parameter which specifies the index into the rlimit array
901  */
902 rlim_t
903 lim_cur(struct proc *p, int which)
904 {
905 	struct rlimit rl;
906 
907 	lim_rlimit(p, which, &rl);
908 	return (rl.rlim_cur);
909 }
910 
911 /*
912  * Return a copy of the entire rlimit structure for the system limit
913  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
914  */
915 void
916 lim_rlimit(struct proc *p, int which, struct rlimit *rlp)
917 {
918 
919 	PROC_LOCK_ASSERT(p, MA_OWNED);
920 	KASSERT(which >= 0 && which < RLIM_NLIMITS,
921 	    ("request for invalid resource limit"));
922 	*rlp = p->p_limit->pl_rlimit[which];
923 }
924 
925 /*
926  * Find the uidinfo structure for a uid.  This structure is used to
927  * track the total resource consumption (process count, socket buffer
928  * size, etc.) for the uid and impose limits.
929  */
930 void
931 uihashinit()
932 {
933 
934 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
935 	mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF);
936 }
937 
938 /*
939  * Look up a uidinfo struct for the parameter uid.
940  * uihashtbl_mtx must be locked.
941  */
942 static struct uidinfo *
943 uilookup(uid)
944 	uid_t uid;
945 {
946 	struct uihashhead *uipp;
947 	struct uidinfo *uip;
948 
949 	mtx_assert(&uihashtbl_mtx, MA_OWNED);
950 	uipp = UIHASH(uid);
951 	LIST_FOREACH(uip, uipp, ui_hash)
952 		if (uip->ui_uid == uid)
953 			break;
954 
955 	return (uip);
956 }
957 
958 /*
959  * Find or allocate a struct uidinfo for a particular uid.
960  * Increase refcount on uidinfo struct returned.
961  * uifree() should be called on a struct uidinfo when released.
962  */
963 struct uidinfo *
964 uifind(uid)
965 	uid_t uid;
966 {
967 	struct uidinfo *old_uip, *uip;
968 
969 	mtx_lock(&uihashtbl_mtx);
970 	uip = uilookup(uid);
971 	if (uip == NULL) {
972 		mtx_unlock(&uihashtbl_mtx);
973 		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
974 		mtx_lock(&uihashtbl_mtx);
975 		/*
976 		 * There's a chance someone created our uidinfo while we
977 		 * were in malloc and not holding the lock, so we have to
978 		 * make sure we don't insert a duplicate uidinfo.
979 		 */
980 		if ((old_uip = uilookup(uid)) != NULL) {
981 			/* Someone else beat us to it. */
982 			free(uip, M_UIDINFO);
983 			uip = old_uip;
984 		} else {
985 			uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep);
986 			uip->ui_uid = uid;
987 			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
988 		}
989 	}
990 	uihold(uip);
991 	mtx_unlock(&uihashtbl_mtx);
992 	return (uip);
993 }
994 
995 /*
996  * Place another refcount on a uidinfo struct.
997  */
998 void
999 uihold(uip)
1000 	struct uidinfo *uip;
1001 {
1002 
1003 	UIDINFO_LOCK(uip);
1004 	uip->ui_ref++;
1005 	UIDINFO_UNLOCK(uip);
1006 }
1007 
1008 /*-
1009  * Since uidinfo structs have a long lifetime, we use an
1010  * opportunistic refcounting scheme to avoid locking the lookup hash
1011  * for each release.
1012  *
1013  * If the refcount hits 0, we need to free the structure,
1014  * which means we need to lock the hash.
1015  * Optimal case:
1016  *   After locking the struct and lowering the refcount, if we find
1017  *   that we don't need to free, simply unlock and return.
1018  * Suboptimal case:
1019  *   If refcount lowering results in need to free, bump the count
1020  *   back up, loose the lock and aquire the locks in the proper
1021  *   order to try again.
1022  */
1023 void
1024 uifree(uip)
1025 	struct uidinfo *uip;
1026 {
1027 
1028 	/* Prepare for optimal case. */
1029 	UIDINFO_LOCK(uip);
1030 
1031 	if (--uip->ui_ref != 0) {
1032 		UIDINFO_UNLOCK(uip);
1033 		return;
1034 	}
1035 
1036 	/* Prepare for suboptimal case. */
1037 	uip->ui_ref++;
1038 	UIDINFO_UNLOCK(uip);
1039 	mtx_lock(&uihashtbl_mtx);
1040 	UIDINFO_LOCK(uip);
1041 
1042 	/*
1043 	 * We must subtract one from the count again because we backed out
1044 	 * our initial subtraction before dropping the lock.
1045 	 * Since another thread may have added a reference after we dropped the
1046 	 * initial lock we have to test for zero again.
1047 	 */
1048 	if (--uip->ui_ref == 0) {
1049 		LIST_REMOVE(uip, ui_hash);
1050 		mtx_unlock(&uihashtbl_mtx);
1051 		if (uip->ui_sbsize != 0)
1052 			/* XXX no %qd in kernel.  Truncate. */
1053 			printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
1054 			    uip->ui_uid, (long)uip->ui_sbsize);
1055 		if (uip->ui_proccnt != 0)
1056 			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1057 			    uip->ui_uid, uip->ui_proccnt);
1058 		UIDINFO_UNLOCK(uip);
1059 		FREE(uip, M_UIDINFO);
1060 		return;
1061 	}
1062 
1063 	mtx_unlock(&uihashtbl_mtx);
1064 	UIDINFO_UNLOCK(uip);
1065 }
1066 
1067 /*
1068  * Change the count associated with number of processes
1069  * a given user is using.  When 'max' is 0, don't enforce a limit
1070  */
1071 int
1072 chgproccnt(uip, diff, max)
1073 	struct	uidinfo	*uip;
1074 	int	diff;
1075 	int	max;
1076 {
1077 
1078 	UIDINFO_LOCK(uip);
1079 	/* Don't allow them to exceed max, but allow subtraction. */
1080 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
1081 		UIDINFO_UNLOCK(uip);
1082 		return (0);
1083 	}
1084 	uip->ui_proccnt += diff;
1085 	if (uip->ui_proccnt < 0)
1086 		printf("negative proccnt for uid = %d\n", uip->ui_uid);
1087 	UIDINFO_UNLOCK(uip);
1088 	return (1);
1089 }
1090 
1091 /*
1092  * Change the total socket buffer size a user has used.
1093  */
1094 int
1095 chgsbsize(uip, hiwat, to, max)
1096 	struct	uidinfo	*uip;
1097 	u_int  *hiwat;
1098 	u_int	to;
1099 	rlim_t	max;
1100 {
1101 	rlim_t new;
1102 	int s;
1103 
1104 	s = splnet();
1105 	UIDINFO_LOCK(uip);
1106 	new = uip->ui_sbsize + to - *hiwat;
1107 	/* Don't allow them to exceed max, but allow subtraction */
1108 	if (to > *hiwat && new > max) {
1109 		splx(s);
1110 		UIDINFO_UNLOCK(uip);
1111 		return (0);
1112 	}
1113 	uip->ui_sbsize = new;
1114 	*hiwat = to;
1115 	if (uip->ui_sbsize < 0)
1116 		printf("negative sbsize for uid = %d\n", uip->ui_uid);
1117 	splx(s);
1118 	UIDINFO_UNLOCK(uip);
1119 	return (1);
1120 }
1121