xref: /freebsd/sys/kern/kern_resource.c (revision 6b3455a7665208c366849f0b2b3bc916fb97516e)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/sysproto.h>
45 #include <sys/file.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/mutex.h>
50 #include <sys/proc.h>
51 #include <sys/resourcevar.h>
52 #include <sys/sched.h>
53 #include <sys/sx.h>
54 #include <sys/sysent.h>
55 #include <sys/time.h>
56 
57 #include <vm/vm.h>
58 #include <vm/vm_param.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_map.h>
61 
62 static int donice(struct thread *td, struct proc *chgp, int n);
63 
64 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
65 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
66 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
67 static struct mtx uihashtbl_mtx;
68 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
69 static u_long uihash;		/* size of hash table - 1 */
70 
71 static struct uidinfo	*uilookup(uid_t uid);
72 
73 /*
74  * Resource controls and accounting.
75  */
76 
77 #ifndef _SYS_SYSPROTO_H_
78 struct getpriority_args {
79 	int	which;
80 	int	who;
81 };
82 #endif
83 /*
84  * MPSAFE
85  */
86 int
87 getpriority(td, uap)
88 	struct thread *td;
89 	register struct getpriority_args *uap;
90 {
91 	struct proc *p;
92 	int error, low;
93 
94 	error = 0;
95 	low = PRIO_MAX + 1;
96 	switch (uap->which) {
97 
98 	case PRIO_PROCESS:
99 		if (uap->who == 0)
100 			low = td->td_proc->p_nice;
101 		else {
102 			p = pfind(uap->who);
103 			if (p == NULL)
104 				break;
105 			if (p_cansee(td, p) == 0) {
106 				low = p->p_nice;
107 			}
108 			PROC_UNLOCK(p);
109 		}
110 		break;
111 
112 	case PRIO_PGRP: {
113 		register struct pgrp *pg;
114 
115 		sx_slock(&proctree_lock);
116 		if (uap->who == 0) {
117 			pg = td->td_proc->p_pgrp;
118 			PGRP_LOCK(pg);
119 		} else {
120 			pg = pgfind(uap->who);
121 			if (pg == NULL) {
122 				sx_sunlock(&proctree_lock);
123 				break;
124 			}
125 		}
126 		sx_sunlock(&proctree_lock);
127 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
128 			PROC_LOCK(p);
129 			if (!p_cansee(td, p)) {
130 				if (p->p_nice < low)
131 					low = p->p_nice;
132 			}
133 			PROC_UNLOCK(p);
134 		}
135 		PGRP_UNLOCK(pg);
136 		break;
137 	}
138 
139 	case PRIO_USER:
140 		if (uap->who == 0)
141 			uap->who = td->td_ucred->cr_uid;
142 		sx_slock(&allproc_lock);
143 		LIST_FOREACH(p, &allproc, p_list) {
144 			PROC_LOCK(p);
145 			if (!p_cansee(td, p) &&
146 			    p->p_ucred->cr_uid == uap->who) {
147 				if (p->p_nice < low)
148 					low = p->p_nice;
149 			}
150 			PROC_UNLOCK(p);
151 		}
152 		sx_sunlock(&allproc_lock);
153 		break;
154 
155 	default:
156 		error = EINVAL;
157 		break;
158 	}
159 	if (low == PRIO_MAX + 1 && error == 0)
160 		error = ESRCH;
161 	td->td_retval[0] = low;
162 	return (error);
163 }
164 
165 #ifndef _SYS_SYSPROTO_H_
166 struct setpriority_args {
167 	int	which;
168 	int	who;
169 	int	prio;
170 };
171 #endif
172 /*
173  * MPSAFE
174  */
175 int
176 setpriority(td, uap)
177 	struct thread *td;
178 	register struct setpriority_args *uap;
179 {
180 	struct proc *curp;
181 	register struct proc *p;
182 	int found = 0, error = 0;
183 
184 	curp = td->td_proc;
185 	switch (uap->which) {
186 	case PRIO_PROCESS:
187 		if (uap->who == 0) {
188 			PROC_LOCK(curp);
189 			error = donice(td, curp, uap->prio);
190 			PROC_UNLOCK(curp);
191 		} else {
192 			p = pfind(uap->who);
193 			if (p == 0)
194 				break;
195 			if (p_cansee(td, p) == 0)
196 				error = donice(td, p, uap->prio);
197 			PROC_UNLOCK(p);
198 		}
199 		found++;
200 		break;
201 
202 	case PRIO_PGRP: {
203 		register struct pgrp *pg;
204 
205 		sx_slock(&proctree_lock);
206 		if (uap->who == 0) {
207 			pg = curp->p_pgrp;
208 			PGRP_LOCK(pg);
209 		} else {
210 			pg = pgfind(uap->who);
211 			if (pg == NULL) {
212 				sx_sunlock(&proctree_lock);
213 				break;
214 			}
215 		}
216 		sx_sunlock(&proctree_lock);
217 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
218 			PROC_LOCK(p);
219 			if (!p_cansee(td, p)) {
220 				error = donice(td, p, uap->prio);
221 				found++;
222 			}
223 			PROC_UNLOCK(p);
224 		}
225 		PGRP_UNLOCK(pg);
226 		break;
227 	}
228 
229 	case PRIO_USER:
230 		if (uap->who == 0)
231 			uap->who = td->td_ucred->cr_uid;
232 		sx_slock(&allproc_lock);
233 		FOREACH_PROC_IN_SYSTEM(p) {
234 			PROC_LOCK(p);
235 			if (p->p_ucred->cr_uid == uap->who &&
236 			    !p_cansee(td, p)) {
237 				error = donice(td, p, uap->prio);
238 				found++;
239 			}
240 			PROC_UNLOCK(p);
241 		}
242 		sx_sunlock(&allproc_lock);
243 		break;
244 
245 	default:
246 		error = EINVAL;
247 		break;
248 	}
249 	if (found == 0 && error == 0)
250 		error = ESRCH;
251 	return (error);
252 }
253 
254 /*
255  * Set "nice" for a (whole) process.
256  */
257 static int
258 donice(struct thread *td, struct proc *p, int n)
259 {
260 	int error;
261 
262 	PROC_LOCK_ASSERT(p, MA_OWNED);
263 	if ((error = p_cansched(td, p)))
264 		return (error);
265 	if (n > PRIO_MAX)
266 		n = PRIO_MAX;
267 	if (n < PRIO_MIN)
268 		n = PRIO_MIN;
269  	if (n <  p->p_nice && suser(td) != 0)
270 		return (EACCES);
271 	mtx_lock_spin(&sched_lock);
272 	sched_nice(p, n);
273 	mtx_unlock_spin(&sched_lock);
274 	return (0);
275 }
276 
277 /*
278  * Set realtime priority
279  *
280  * MPSAFE
281  */
282 #ifndef _SYS_SYSPROTO_H_
283 struct rtprio_args {
284 	int		function;
285 	pid_t		pid;
286 	struct rtprio	*rtp;
287 };
288 #endif
289 
290 int
291 rtprio(td, uap)
292 	struct thread *td;		/* curthread */
293 	register struct rtprio_args *uap;
294 {
295 	struct proc *curp;
296 	struct proc *p;
297 	struct ksegrp *kg;
298 	struct rtprio rtp;
299 	int cierror, error;
300 
301 	/* Perform copyin before acquiring locks if needed. */
302 	if (uap->function == RTP_SET)
303 		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
304 	else
305 		cierror = 0;
306 
307 	curp = td->td_proc;
308 	if (uap->pid == 0) {
309 		p = curp;
310 		PROC_LOCK(p);
311 	} else {
312 		p = pfind(uap->pid);
313 		if (p == NULL)
314 			return (ESRCH);
315 	}
316 
317 	switch (uap->function) {
318 	case RTP_LOOKUP:
319 		if ((error = p_cansee(td, p)))
320 			break;
321 		mtx_lock_spin(&sched_lock);
322 		/*
323 		 * Return OUR priority if no pid specified,
324 		 * or if one is, report the highest priority
325 		 * in the process. There isn't much more you can do as
326 		 * there is only room to return a single priority.
327 		 * XXXKSE  Maybe need a new interface to report
328 		 * priorities of multiple system scope threads.
329 		 * Note: specifying our own pid is not the same
330 		 * as leaving it zero.
331 		 */
332 		if (uap->pid == 0) {
333 			pri_to_rtp(td->td_ksegrp, &rtp);
334 		} else {
335 			struct rtprio rtp2;
336 
337 			rtp.type = RTP_PRIO_IDLE;
338 			rtp.prio = RTP_PRIO_MAX;
339 			FOREACH_KSEGRP_IN_PROC(p, kg) {
340 				pri_to_rtp(kg, &rtp2);
341 				if ((rtp2.type <  rtp.type) ||
342 				    ((rtp2.type == rtp.type) &&
343 				     (rtp2.prio < rtp.prio))) {
344 					rtp.type = rtp2.type;
345 					rtp.prio = rtp2.prio;
346 				}
347 			}
348 		}
349 		mtx_unlock_spin(&sched_lock);
350 		PROC_UNLOCK(p);
351 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
352 	case RTP_SET:
353 		if ((error = p_cansched(td, p)) || (error = cierror))
354 			break;
355 		/* disallow setting rtprio in most cases if not superuser */
356 		if (suser(td) != 0) {
357 			/* can't set someone else's */
358 			if (uap->pid) {
359 				error = EPERM;
360 				break;
361 			}
362 			/* can't set realtime priority */
363 /*
364  * Realtime priority has to be restricted for reasons which should be
365  * obvious. However, for idle priority, there is a potential for
366  * system deadlock if an idleprio process gains a lock on a resource
367  * that other processes need (and the idleprio process can't run
368  * due to a CPU-bound normal process). Fix me! XXX
369  */
370 #if 0
371  			if (RTP_PRIO_IS_REALTIME(rtp.type))
372 #endif
373 			if (rtp.type != RTP_PRIO_NORMAL) {
374 				error = EPERM;
375 				break;
376 			}
377 		}
378 		mtx_lock_spin(&sched_lock);
379 		/*
380 		 * If we are setting our own priority, set just our
381 		 * KSEGRP but if we are doing another process,
382 		 * do all the groups on that process. If we
383 		 * specify our own pid we do the latter.
384 		 */
385 		if (uap->pid == 0) {
386 			error = rtp_to_pri(&rtp, td->td_ksegrp);
387 		} else {
388 			FOREACH_KSEGRP_IN_PROC(p, kg) {
389 				if ((error = rtp_to_pri(&rtp, kg)) != 0) {
390 					break;
391 				}
392 			}
393 		}
394 		mtx_unlock_spin(&sched_lock);
395 		break;
396 	default:
397 		error = EINVAL;
398 		break;
399 	}
400 	PROC_UNLOCK(p);
401 	return (error);
402 }
403 
404 int
405 rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg)
406 {
407 
408 	mtx_assert(&sched_lock, MA_OWNED);
409 	if (rtp->prio > RTP_PRIO_MAX)
410 		return (EINVAL);
411 	switch (RTP_PRIO_BASE(rtp->type)) {
412 	case RTP_PRIO_REALTIME:
413 		kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio;
414 		break;
415 	case RTP_PRIO_NORMAL:
416 		kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
417 		break;
418 	case RTP_PRIO_IDLE:
419 		kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio;
420 		break;
421 	default:
422 		return (EINVAL);
423 	}
424 	sched_class(kg, rtp->type);
425 	if (curthread->td_ksegrp == kg) {
426 		curthread->td_base_pri = kg->kg_user_pri;
427 		sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */
428 	}
429 	return (0);
430 }
431 
432 void
433 pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp)
434 {
435 
436 	mtx_assert(&sched_lock, MA_OWNED);
437 	switch (PRI_BASE(kg->kg_pri_class)) {
438 	case PRI_REALTIME:
439 		rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME;
440 		break;
441 	case PRI_TIMESHARE:
442 		rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE;
443 		break;
444 	case PRI_IDLE:
445 		rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE;
446 		break;
447 	default:
448 		break;
449 	}
450 	rtp->type = kg->kg_pri_class;
451 }
452 
453 #if defined(COMPAT_43)
454 #ifndef _SYS_SYSPROTO_H_
455 struct osetrlimit_args {
456 	u_int	which;
457 	struct	orlimit *rlp;
458 };
459 #endif
460 /*
461  * MPSAFE
462  */
463 int
464 osetrlimit(td, uap)
465 	struct thread *td;
466 	register struct osetrlimit_args *uap;
467 {
468 	struct orlimit olim;
469 	struct rlimit lim;
470 	int error;
471 
472 	if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
473 		return (error);
474 	lim.rlim_cur = olim.rlim_cur;
475 	lim.rlim_max = olim.rlim_max;
476 	error = kern_setrlimit(td, uap->which, &lim);
477 	return (error);
478 }
479 
480 #ifndef _SYS_SYSPROTO_H_
481 struct ogetrlimit_args {
482 	u_int	which;
483 	struct	orlimit *rlp;
484 };
485 #endif
486 /*
487  * MPSAFE
488  */
489 int
490 ogetrlimit(td, uap)
491 	struct thread *td;
492 	register struct ogetrlimit_args *uap;
493 {
494 	struct orlimit olim;
495 	struct rlimit rl;
496 	struct proc *p;
497 	int error;
498 
499 	if (uap->which >= RLIM_NLIMITS)
500 		return (EINVAL);
501 	p = td->td_proc;
502 	PROC_LOCK(p);
503 	lim_rlimit(p, uap->which, &rl);
504 	PROC_UNLOCK(p);
505 
506 	/*
507 	 * XXX would be more correct to convert only RLIM_INFINITY to the
508 	 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
509 	 * values.  Most 64->32 and 32->16 conversions, including not
510 	 * unimportant ones of uids are even more broken than what we
511 	 * do here (they blindly truncate).  We don't do this correctly
512 	 * here since we have little experience with EOVERFLOW yet.
513 	 * Elsewhere, getuid() can't fail...
514 	 */
515 	olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
516 	olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
517 	error = copyout(&olim, uap->rlp, sizeof(olim));
518 	return (error);
519 }
520 #endif /* COMPAT_43 */
521 
522 #ifndef _SYS_SYSPROTO_H_
523 struct __setrlimit_args {
524 	u_int	which;
525 	struct	rlimit *rlp;
526 };
527 #endif
528 /*
529  * MPSAFE
530  */
531 int
532 setrlimit(td, uap)
533 	struct thread *td;
534 	register struct __setrlimit_args *uap;
535 {
536 	struct rlimit alim;
537 	int error;
538 
539 	if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
540 		return (error);
541 	error = kern_setrlimit(td, uap->which, &alim);
542 	return (error);
543 }
544 
545 int
546 kern_setrlimit(td, which, limp)
547 	struct thread *td;
548 	u_int which;
549 	struct rlimit *limp;
550 {
551 	struct plimit *newlim, *oldlim;
552 	struct proc *p;
553 	register struct rlimit *alimp;
554 	rlim_t oldssiz;
555 	int error;
556 
557 	if (which >= RLIM_NLIMITS)
558 		return (EINVAL);
559 
560 	/*
561 	 * Preserve historical bugs by treating negative limits as unsigned.
562 	 */
563 	if (limp->rlim_cur < 0)
564 		limp->rlim_cur = RLIM_INFINITY;
565 	if (limp->rlim_max < 0)
566 		limp->rlim_max = RLIM_INFINITY;
567 
568 	oldssiz = 0;
569 	p = td->td_proc;
570 	newlim = lim_alloc();
571 	PROC_LOCK(p);
572 	oldlim = p->p_limit;
573 	alimp = &oldlim->pl_rlimit[which];
574 	if (limp->rlim_cur > alimp->rlim_max ||
575 	    limp->rlim_max > alimp->rlim_max)
576 		if ((error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL))) {
577 			PROC_UNLOCK(p);
578 			lim_free(newlim);
579 			return (error);
580 	}
581 	if (limp->rlim_cur > limp->rlim_max)
582 		limp->rlim_cur = limp->rlim_max;
583 	lim_copy(newlim, oldlim);
584 	alimp = &newlim->pl_rlimit[which];
585 
586 	switch (which) {
587 
588 	case RLIMIT_CPU:
589 		mtx_lock_spin(&sched_lock);
590 		p->p_cpulimit = limp->rlim_cur;
591 		mtx_unlock_spin(&sched_lock);
592 		break;
593 	case RLIMIT_DATA:
594 		if (limp->rlim_cur > maxdsiz)
595 			limp->rlim_cur = maxdsiz;
596 		if (limp->rlim_max > maxdsiz)
597 			limp->rlim_max = maxdsiz;
598 		break;
599 
600 	case RLIMIT_STACK:
601 		if (limp->rlim_cur > maxssiz)
602 			limp->rlim_cur = maxssiz;
603 		if (limp->rlim_max > maxssiz)
604 			limp->rlim_max = maxssiz;
605 		oldssiz = alimp->rlim_cur;
606 		break;
607 
608 	case RLIMIT_NOFILE:
609 		if (limp->rlim_cur > maxfilesperproc)
610 			limp->rlim_cur = maxfilesperproc;
611 		if (limp->rlim_max > maxfilesperproc)
612 			limp->rlim_max = maxfilesperproc;
613 		break;
614 
615 	case RLIMIT_NPROC:
616 		if (limp->rlim_cur > maxprocperuid)
617 			limp->rlim_cur = maxprocperuid;
618 		if (limp->rlim_max > maxprocperuid)
619 			limp->rlim_max = maxprocperuid;
620 		if (limp->rlim_cur < 1)
621 			limp->rlim_cur = 1;
622 		if (limp->rlim_max < 1)
623 			limp->rlim_max = 1;
624 		break;
625 	}
626 	*alimp = *limp;
627 	p->p_limit = newlim;
628 	PROC_UNLOCK(p);
629 	lim_free(oldlim);
630 
631 	if (which == RLIMIT_STACK) {
632 		/*
633 		 * Stack is allocated to the max at exec time with only
634 		 * "rlim_cur" bytes accessible.  If stack limit is going
635 		 * up make more accessible, if going down make inaccessible.
636 		 */
637 		if (limp->rlim_cur != oldssiz) {
638 			vm_offset_t addr;
639 			vm_size_t size;
640 			vm_prot_t prot;
641 
642 			mtx_lock(&Giant);
643 			if (limp->rlim_cur > oldssiz) {
644 				prot = p->p_sysent->sv_stackprot;
645 				size = limp->rlim_cur - oldssiz;
646 				addr = p->p_sysent->sv_usrstack -
647 				    limp->rlim_cur;
648 			} else {
649 				prot = VM_PROT_NONE;
650 				size = oldssiz - limp->rlim_cur;
651 				addr = p->p_sysent->sv_usrstack -
652 				    oldssiz;
653 			}
654 			addr = trunc_page(addr);
655 			size = round_page(size);
656 			(void) vm_map_protect(&p->p_vmspace->vm_map,
657 					      addr, addr+size, prot, FALSE);
658 			mtx_unlock(&Giant);
659 		}
660 	}
661 	return (0);
662 }
663 
664 #ifndef _SYS_SYSPROTO_H_
665 struct __getrlimit_args {
666 	u_int	which;
667 	struct	rlimit *rlp;
668 };
669 #endif
670 /*
671  * MPSAFE
672  */
673 /* ARGSUSED */
674 int
675 getrlimit(td, uap)
676 	struct thread *td;
677 	register struct __getrlimit_args *uap;
678 {
679 	struct rlimit rlim;
680 	struct proc *p;
681 	int error;
682 
683 	if (uap->which >= RLIM_NLIMITS)
684 		return (EINVAL);
685 	p = td->td_proc;
686 	PROC_LOCK(p);
687 	lim_rlimit(p, uap->which, &rlim);
688 	PROC_UNLOCK(p);
689 	error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
690 	return(error);
691 }
692 
693 /*
694  * Transform the running time and tick information in proc p into user,
695  * system, and interrupt time usage.
696  */
697 void
698 calcru(p, up, sp, ip)
699 	struct proc *p;
700 	struct timeval *up;
701 	struct timeval *sp;
702 	struct timeval *ip;
703 {
704 	struct bintime bt, rt;
705 	struct timeval tv;
706 	struct thread *td;
707 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
708 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
709 	int problemcase;
710 
711 	mtx_assert(&sched_lock, MA_OWNED);
712 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
713 
714 	ut = p->p_uticks;
715 	st = p->p_sticks;
716 	it = p->p_iticks;
717 
718 	tt = ut + st + it;
719 	if (tt == 0) {
720 		st = 1;
721 		tt = 1;
722 	}
723 	rt = p->p_runtime;
724 	problemcase = 0;
725 	FOREACH_THREAD_IN_PROC(p, td) {
726 		/*
727 		 * Adjust for the current time slice.  This is actually fairly
728 		 * important since the error here is on the order of a time
729 		 * quantum, which is much greater than the sampling error.
730 		 */
731 		if (td == curthread) {
732 			binuptime(&bt);
733 			bintime_sub(&bt, PCPU_PTR(switchtime));
734 			bintime_add(&rt, &bt);
735 		} else if (TD_IS_RUNNING(td)) {
736 			/*
737 			 * XXX: this case should add the difference between
738 			 * the current time and the switch time as above,
739 			 * but the switch time is inaccessible, so we can't
740 			 * do the adjustment and will end up with a wrong
741 			 * runtime.  A previous call with a different
742 			 * curthread may have obtained a (right or wrong)
743 			 * runtime that is in advance of ours.  Just set a
744 			 * flag to avoid warning about this known problem.
745 			 */
746 			problemcase = 1;
747 		}
748 	}
749 	bintime2timeval(&rt, &tv);
750 	tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
751 	ptu = p->p_uu + p->p_su + p->p_iu;
752 	if (tu < ptu) {
753 		if (!problemcase)
754 			printf(
755 "calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n",
756 			    (uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
757 		tu = ptu;
758 	}
759 	if ((int64_t)tu < 0) {
760 		printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
761 		    (intmax_t)tu, p->p_pid, p->p_comm);
762 		tu = ptu;
763 	}
764 
765 	/* Subdivide tu. */
766 	uu = (tu * ut) / tt;
767 	su = (tu * st) / tt;
768 	iu = tu - uu - su;
769 
770 	/* Enforce monotonicity. */
771 	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
772 		if (uu < p->p_uu)
773 			uu = p->p_uu;
774 		else if (uu + p->p_su + p->p_iu > tu)
775 			uu = tu - p->p_su - p->p_iu;
776 		if (st == 0)
777 			su = p->p_su;
778 		else {
779 			su = ((tu - uu) * st) / (st + it);
780 			if (su < p->p_su)
781 				su = p->p_su;
782 			else if (uu + su + p->p_iu > tu)
783 				su = tu - uu - p->p_iu;
784 		}
785 		KASSERT(uu + su + p->p_iu <= tu,
786 		    ("calcru: monotonisation botch 1"));
787 		iu = tu - uu - su;
788 		KASSERT(iu >= p->p_iu,
789 		    ("calcru: monotonisation botch 2"));
790 	}
791 	p->p_uu = uu;
792 	p->p_su = su;
793 	p->p_iu = iu;
794 
795 	up->tv_sec = uu / 1000000;
796 	up->tv_usec = uu % 1000000;
797 	sp->tv_sec = su / 1000000;
798 	sp->tv_usec = su % 1000000;
799 	if (ip != NULL) {
800 		ip->tv_sec = iu / 1000000;
801 		ip->tv_usec = iu % 1000000;
802 	}
803 }
804 
805 #ifndef _SYS_SYSPROTO_H_
806 struct getrusage_args {
807 	int	who;
808 	struct	rusage *rusage;
809 };
810 #endif
811 /*
812  * MPSAFE
813  */
814 /* ARGSUSED */
815 int
816 getrusage(td, uap)
817 	register struct thread *td;
818 	register struct getrusage_args *uap;
819 {
820 	struct rusage ru;
821 	struct proc *p;
822 
823 	p = td->td_proc;
824 	switch (uap->who) {
825 
826 	case RUSAGE_SELF:
827 		mtx_lock(&Giant);
828 		mtx_lock_spin(&sched_lock);
829 		calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime,
830 		    NULL);
831 		mtx_unlock_spin(&sched_lock);
832 		ru = p->p_stats->p_ru;
833 		mtx_unlock(&Giant);
834 		break;
835 
836 	case RUSAGE_CHILDREN:
837 		mtx_lock(&Giant);
838 		ru = p->p_stats->p_cru;
839 		mtx_unlock(&Giant);
840 		break;
841 
842 	default:
843 		return (EINVAL);
844 		break;
845 	}
846 	return (copyout(&ru, uap->rusage, sizeof(struct rusage)));
847 }
848 
849 void
850 ruadd(ru, ru2)
851 	register struct rusage *ru, *ru2;
852 {
853 	register long *ip, *ip2;
854 	register int i;
855 
856 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
857 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
858 	if (ru->ru_maxrss < ru2->ru_maxrss)
859 		ru->ru_maxrss = ru2->ru_maxrss;
860 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
861 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
862 		*ip++ += *ip2++;
863 }
864 
865 /*
866  * Allocate a new resource limits structure and initialize its
867  * reference count and mutex pointer.
868  */
869 struct plimit *
870 lim_alloc()
871 {
872 	struct plimit *limp;
873 
874 	limp = (struct plimit *)malloc(sizeof(struct plimit), M_PLIMIT,
875 	    M_WAITOK);
876 	limp->pl_refcnt = 1;
877 	limp->pl_mtx = mtx_pool_alloc(mtxpool_sleep);
878 	return (limp);
879 }
880 
881 struct plimit *
882 lim_hold(limp)
883 	struct plimit *limp;
884 {
885 
886 	LIM_LOCK(limp);
887 	limp->pl_refcnt++;
888 	LIM_UNLOCK(limp);
889 	return (limp);
890 }
891 
892 void
893 lim_free(limp)
894 	struct plimit *limp;
895 {
896 
897 	LIM_LOCK(limp);
898 	KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow"));
899 	if (--limp->pl_refcnt == 0) {
900 		LIM_UNLOCK(limp);
901 		free((void *)limp, M_PLIMIT);
902 		return;
903 	}
904 	LIM_UNLOCK(limp);
905 }
906 
907 /*
908  * Make a copy of the plimit structure.
909  * We share these structures copy-on-write after fork.
910  */
911 void
912 lim_copy(dst, src)
913 	struct plimit *dst, *src;
914 {
915 
916 	KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit"));
917 	bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
918 }
919 
920 /*
921  * Return the hard limit for a particular system resource.  The
922  * which parameter specifies the index into the rlimit array.
923  */
924 rlim_t
925 lim_max(struct proc *p, int which)
926 {
927 	struct rlimit rl;
928 
929 	lim_rlimit(p, which, &rl);
930 	return (rl.rlim_max);
931 }
932 
933 /*
934  * Return the current (soft) limit for a particular system resource.
935  * The which parameter which specifies the index into the rlimit array
936  */
937 rlim_t
938 lim_cur(struct proc *p, int which)
939 {
940 	struct rlimit rl;
941 
942 	lim_rlimit(p, which, &rl);
943 	return (rl.rlim_cur);
944 }
945 
946 /*
947  * Return a copy of the entire rlimit structure for the system limit
948  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
949  */
950 void
951 lim_rlimit(struct proc *p, int which, struct rlimit *rlp)
952 {
953 
954 	PROC_LOCK_ASSERT(p, MA_OWNED);
955 	KASSERT(which >= 0 && which < RLIM_NLIMITS,
956 	    ("request for invalid resource limit"));
957 	*rlp = p->p_limit->pl_rlimit[which];
958 }
959 
960 /*
961  * Find the uidinfo structure for a uid.  This structure is used to
962  * track the total resource consumption (process count, socket buffer
963  * size, etc.) for the uid and impose limits.
964  */
965 void
966 uihashinit()
967 {
968 
969 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
970 	mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF);
971 }
972 
973 /*
974  * Look up a uidinfo struct for the parameter uid.
975  * uihashtbl_mtx must be locked.
976  */
977 static struct uidinfo *
978 uilookup(uid)
979 	uid_t uid;
980 {
981 	struct uihashhead *uipp;
982 	struct uidinfo *uip;
983 
984 	mtx_assert(&uihashtbl_mtx, MA_OWNED);
985 	uipp = UIHASH(uid);
986 	LIST_FOREACH(uip, uipp, ui_hash)
987 		if (uip->ui_uid == uid)
988 			break;
989 
990 	return (uip);
991 }
992 
993 /*
994  * Find or allocate a struct uidinfo for a particular uid.
995  * Increase refcount on uidinfo struct returned.
996  * uifree() should be called on a struct uidinfo when released.
997  */
998 struct uidinfo *
999 uifind(uid)
1000 	uid_t uid;
1001 {
1002 	struct uidinfo *old_uip, *uip;
1003 
1004 	mtx_lock(&uihashtbl_mtx);
1005 	uip = uilookup(uid);
1006 	if (uip == NULL) {
1007 		mtx_unlock(&uihashtbl_mtx);
1008 		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
1009 		mtx_lock(&uihashtbl_mtx);
1010 		/*
1011 		 * There's a chance someone created our uidinfo while we
1012 		 * were in malloc and not holding the lock, so we have to
1013 		 * make sure we don't insert a duplicate uidinfo.
1014 		 */
1015 		if ((old_uip = uilookup(uid)) != NULL) {
1016 			/* Someone else beat us to it. */
1017 			free(uip, M_UIDINFO);
1018 			uip = old_uip;
1019 		} else {
1020 			uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep);
1021 			uip->ui_uid = uid;
1022 			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
1023 		}
1024 	}
1025 	uihold(uip);
1026 	mtx_unlock(&uihashtbl_mtx);
1027 	return (uip);
1028 }
1029 
1030 /*
1031  * Place another refcount on a uidinfo struct.
1032  */
1033 void
1034 uihold(uip)
1035 	struct uidinfo *uip;
1036 {
1037 
1038 	UIDINFO_LOCK(uip);
1039 	uip->ui_ref++;
1040 	UIDINFO_UNLOCK(uip);
1041 }
1042 
1043 /*-
1044  * Since uidinfo structs have a long lifetime, we use an
1045  * opportunistic refcounting scheme to avoid locking the lookup hash
1046  * for each release.
1047  *
1048  * If the refcount hits 0, we need to free the structure,
1049  * which means we need to lock the hash.
1050  * Optimal case:
1051  *   After locking the struct and lowering the refcount, if we find
1052  *   that we don't need to free, simply unlock and return.
1053  * Suboptimal case:
1054  *   If refcount lowering results in need to free, bump the count
1055  *   back up, loose the lock and aquire the locks in the proper
1056  *   order to try again.
1057  */
1058 void
1059 uifree(uip)
1060 	struct uidinfo *uip;
1061 {
1062 
1063 	/* Prepare for optimal case. */
1064 	UIDINFO_LOCK(uip);
1065 
1066 	if (--uip->ui_ref != 0) {
1067 		UIDINFO_UNLOCK(uip);
1068 		return;
1069 	}
1070 
1071 	/* Prepare for suboptimal case. */
1072 	uip->ui_ref++;
1073 	UIDINFO_UNLOCK(uip);
1074 	mtx_lock(&uihashtbl_mtx);
1075 	UIDINFO_LOCK(uip);
1076 
1077 	/*
1078 	 * We must subtract one from the count again because we backed out
1079 	 * our initial subtraction before dropping the lock.
1080 	 * Since another thread may have added a reference after we dropped the
1081 	 * initial lock we have to test for zero again.
1082 	 */
1083 	if (--uip->ui_ref == 0) {
1084 		LIST_REMOVE(uip, ui_hash);
1085 		mtx_unlock(&uihashtbl_mtx);
1086 		if (uip->ui_sbsize != 0)
1087 			printf("freeing uidinfo: uid = %d, sbsize = %jd\n",
1088 			    uip->ui_uid, (intmax_t)uip->ui_sbsize);
1089 		if (uip->ui_proccnt != 0)
1090 			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1091 			    uip->ui_uid, uip->ui_proccnt);
1092 		UIDINFO_UNLOCK(uip);
1093 		FREE(uip, M_UIDINFO);
1094 		return;
1095 	}
1096 
1097 	mtx_unlock(&uihashtbl_mtx);
1098 	UIDINFO_UNLOCK(uip);
1099 }
1100 
1101 /*
1102  * Change the count associated with number of processes
1103  * a given user is using.  When 'max' is 0, don't enforce a limit
1104  */
1105 int
1106 chgproccnt(uip, diff, max)
1107 	struct	uidinfo	*uip;
1108 	int	diff;
1109 	int	max;
1110 {
1111 
1112 	UIDINFO_LOCK(uip);
1113 	/* Don't allow them to exceed max, but allow subtraction. */
1114 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
1115 		UIDINFO_UNLOCK(uip);
1116 		return (0);
1117 	}
1118 	uip->ui_proccnt += diff;
1119 	if (uip->ui_proccnt < 0)
1120 		printf("negative proccnt for uid = %d\n", uip->ui_uid);
1121 	UIDINFO_UNLOCK(uip);
1122 	return (1);
1123 }
1124 
1125 /*
1126  * Change the total socket buffer size a user has used.
1127  */
1128 int
1129 chgsbsize(uip, hiwat, to, max)
1130 	struct	uidinfo	*uip;
1131 	u_int  *hiwat;
1132 	u_int	to;
1133 	rlim_t	max;
1134 {
1135 	rlim_t new;
1136 	int s;
1137 
1138 	s = splnet();
1139 	UIDINFO_LOCK(uip);
1140 	new = uip->ui_sbsize + to - *hiwat;
1141 	/* Don't allow them to exceed max, but allow subtraction */
1142 	if (to > *hiwat && new > max) {
1143 		splx(s);
1144 		UIDINFO_UNLOCK(uip);
1145 		return (0);
1146 	}
1147 	uip->ui_sbsize = new;
1148 	*hiwat = to;
1149 	if (uip->ui_sbsize < 0)
1150 		printf("negative sbsize for uid = %d\n", uip->ui_uid);
1151 	splx(s);
1152 	UIDINFO_UNLOCK(uip);
1153 	return (1);
1154 }
1155