xref: /freebsd/sys/kern/kern_resource.c (revision 2357939bc239bd5334a169b62313806178dd8f30)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/sysproto.h>
45 #include <sys/file.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/mutex.h>
50 #include <sys/proc.h>
51 #include <sys/resourcevar.h>
52 #include <sys/sched.h>
53 #include <sys/sx.h>
54 #include <sys/sysent.h>
55 #include <sys/time.h>
56 
57 #include <vm/vm.h>
58 #include <vm/vm_param.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_map.h>
61 
62 static int donice(struct thread *td, struct proc *chgp, int n);
63 
64 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
65 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
66 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
67 static struct mtx uihashtbl_mtx;
68 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
69 static u_long uihash;		/* size of hash table - 1 */
70 
71 static struct uidinfo	*uilookup(uid_t uid);
72 
73 /*
74  * Resource controls and accounting.
75  */
76 
77 #ifndef _SYS_SYSPROTO_H_
78 struct getpriority_args {
79 	int	which;
80 	int	who;
81 };
82 #endif
83 /*
84  * MPSAFE
85  */
86 int
87 getpriority(td, uap)
88 	struct thread *td;
89 	register struct getpriority_args *uap;
90 {
91 	struct ksegrp *kg;
92 	struct proc *p;
93 	int error, low;
94 
95 	error = 0;
96 	low = PRIO_MAX + 1;
97 	switch (uap->which) {
98 
99 	case PRIO_PROCESS:
100 		if (uap->who == 0)
101 			low = td->td_ksegrp->kg_nice;
102 		else {
103 			p = pfind(uap->who);
104 			if (p == NULL)
105 				break;
106 			if (p_cansee(td, p) == 0) {
107 				FOREACH_KSEGRP_IN_PROC(p, kg) {
108 					if (kg->kg_nice < low)
109 						low = kg->kg_nice;
110 				}
111 			}
112 			PROC_UNLOCK(p);
113 		}
114 		break;
115 
116 	case PRIO_PGRP: {
117 		register struct pgrp *pg;
118 
119 		sx_slock(&proctree_lock);
120 		if (uap->who == 0) {
121 			pg = td->td_proc->p_pgrp;
122 			PGRP_LOCK(pg);
123 		} else {
124 			pg = pgfind(uap->who);
125 			if (pg == NULL) {
126 				sx_sunlock(&proctree_lock);
127 				break;
128 			}
129 		}
130 		sx_sunlock(&proctree_lock);
131 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
132 			PROC_LOCK(p);
133 			if (!p_cansee(td, p)) {
134 				FOREACH_KSEGRP_IN_PROC(p, kg) {
135 					if (kg->kg_nice < low)
136 						low = kg->kg_nice;
137 				}
138 			}
139 			PROC_UNLOCK(p);
140 		}
141 		PGRP_UNLOCK(pg);
142 		break;
143 	}
144 
145 	case PRIO_USER:
146 		if (uap->who == 0)
147 			uap->who = td->td_ucred->cr_uid;
148 		sx_slock(&allproc_lock);
149 		LIST_FOREACH(p, &allproc, p_list) {
150 			PROC_LOCK(p);
151 			if (!p_cansee(td, p) &&
152 			    p->p_ucred->cr_uid == uap->who) {
153 				FOREACH_KSEGRP_IN_PROC(p, kg) {
154 					if (kg->kg_nice < low)
155 						low = kg->kg_nice;
156 				}
157 			}
158 			PROC_UNLOCK(p);
159 		}
160 		sx_sunlock(&allproc_lock);
161 		break;
162 
163 	default:
164 		error = EINVAL;
165 		break;
166 	}
167 	if (low == PRIO_MAX + 1 && error == 0)
168 		error = ESRCH;
169 	td->td_retval[0] = low;
170 	return (error);
171 }
172 
173 #ifndef _SYS_SYSPROTO_H_
174 struct setpriority_args {
175 	int	which;
176 	int	who;
177 	int	prio;
178 };
179 #endif
180 /*
181  * MPSAFE
182  */
183 int
184 setpriority(td, uap)
185 	struct thread *td;
186 	register struct setpriority_args *uap;
187 {
188 	struct proc *curp;
189 	register struct proc *p;
190 	int found = 0, error = 0;
191 
192 	curp = td->td_proc;
193 	switch (uap->which) {
194 	case PRIO_PROCESS:
195 		if (uap->who == 0) {
196 			PROC_LOCK(curp);
197 			error = donice(td, curp, uap->prio);
198 			PROC_UNLOCK(curp);
199 		} else {
200 			p = pfind(uap->who);
201 			if (p == 0)
202 				break;
203 			if (p_cansee(td, p) == 0)
204 				error = donice(td, p, uap->prio);
205 			PROC_UNLOCK(p);
206 		}
207 		found++;
208 		break;
209 
210 	case PRIO_PGRP: {
211 		register struct pgrp *pg;
212 
213 		sx_slock(&proctree_lock);
214 		if (uap->who == 0) {
215 			pg = curp->p_pgrp;
216 			PGRP_LOCK(pg);
217 		} else {
218 			pg = pgfind(uap->who);
219 			if (pg == NULL) {
220 				sx_sunlock(&proctree_lock);
221 				break;
222 			}
223 		}
224 		sx_sunlock(&proctree_lock);
225 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
226 			PROC_LOCK(p);
227 			if (!p_cansee(td, p)) {
228 				error = donice(td, p, uap->prio);
229 				found++;
230 			}
231 			PROC_UNLOCK(p);
232 		}
233 		PGRP_UNLOCK(pg);
234 		break;
235 	}
236 
237 	case PRIO_USER:
238 		if (uap->who == 0)
239 			uap->who = td->td_ucred->cr_uid;
240 		sx_slock(&allproc_lock);
241 		FOREACH_PROC_IN_SYSTEM(p) {
242 			PROC_LOCK(p);
243 			if (p->p_ucred->cr_uid == uap->who &&
244 			    !p_cansee(td, p)) {
245 				error = donice(td, p, uap->prio);
246 				found++;
247 			}
248 			PROC_UNLOCK(p);
249 		}
250 		sx_sunlock(&allproc_lock);
251 		break;
252 
253 	default:
254 		error = EINVAL;
255 		break;
256 	}
257 	if (found == 0 && error == 0)
258 		error = ESRCH;
259 	return (error);
260 }
261 
262 /*
263  * Set "nice" for a process.  Doesn't really understand threaded processes
264  * well but does try.  Has the unfortunate side effect of making all the NICE
265  * values for a process's ksegrps the same.  This suggests that
266  * NICE values should be stored as a process nice and deltas for the ksegrps.
267  * (but not yet).
268  */
269 static int
270 donice(struct thread *td, struct proc *p, int n)
271 {
272 	struct ksegrp *kg;
273 	int error, low;
274 
275 	low = PRIO_MAX + 1;
276 	PROC_LOCK_ASSERT(p, MA_OWNED);
277 	if ((error = p_cansched(td, p)))
278 		return (error);
279 	if (n > PRIO_MAX)
280 		n = PRIO_MAX;
281 	if (n < PRIO_MIN)
282 		n = PRIO_MIN;
283 	/*
284 	 * Only allow nicing if to more than the lowest nice.
285 	 * E.g., for nices of 4,3,2 allow nice to 3 but not 1
286 	 */
287 	FOREACH_KSEGRP_IN_PROC(p, kg) {
288 		if (kg->kg_nice < low)
289 			low = kg->kg_nice;
290 	}
291  	if (n < low && suser(td) != 0)
292 		return (EACCES);
293 	mtx_lock_spin(&sched_lock);
294 	FOREACH_KSEGRP_IN_PROC(p, kg) {
295 		sched_nice(kg, n);
296 	}
297 	mtx_unlock_spin(&sched_lock);
298 	return (0);
299 }
300 
301 /*
302  * Set realtime priority
303  *
304  * MPSAFE
305  */
306 #ifndef _SYS_SYSPROTO_H_
307 struct rtprio_args {
308 	int		function;
309 	pid_t		pid;
310 	struct rtprio	*rtp;
311 };
312 #endif
313 
314 int
315 rtprio(td, uap)
316 	struct thread *td;		/* curthread */
317 	register struct rtprio_args *uap;
318 {
319 	struct proc *curp;
320 	struct proc *p;
321 	struct ksegrp *kg;
322 	struct rtprio rtp;
323 	int cierror, error;
324 
325 	/* Perform copyin before acquiring locks if needed. */
326 	if (uap->function == RTP_SET)
327 		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
328 	else
329 		cierror = 0;
330 
331 	curp = td->td_proc;
332 	if (uap->pid == 0) {
333 		p = curp;
334 		PROC_LOCK(p);
335 	} else {
336 		p = pfind(uap->pid);
337 		if (p == NULL)
338 			return (ESRCH);
339 	}
340 
341 	switch (uap->function) {
342 	case RTP_LOOKUP:
343 		if ((error = p_cansee(td, p)))
344 			break;
345 		mtx_lock_spin(&sched_lock);
346 		/*
347 		 * Return OUR priority if no pid specified,
348 		 * or if one is, report the highest priority
349 		 * in the process. There isn't much more you can do as
350 		 * there is only room to return a single priority.
351 		 * XXXKSE  Maybe need a new interface to report
352 		 * priorities of multiple system scope threads.
353 		 * Note: specifying our own pid is not the same
354 		 * as leaving it zero.
355 		 */
356 		if (uap->pid == 0) {
357 			pri_to_rtp(td->td_ksegrp, &rtp);
358 		} else {
359 			struct rtprio rtp2;
360 
361 			rtp.type = RTP_PRIO_IDLE;
362 			rtp.prio = RTP_PRIO_MAX;
363 			FOREACH_KSEGRP_IN_PROC(p, kg) {
364 				pri_to_rtp(kg, &rtp2);
365 				if ((rtp2.type <  rtp.type) ||
366 				    ((rtp2.type == rtp.type) &&
367 				     (rtp2.prio < rtp.prio))) {
368 					rtp.type = rtp2.type;
369 					rtp.prio = rtp2.prio;
370 				}
371 			}
372 		}
373 		mtx_unlock_spin(&sched_lock);
374 		PROC_UNLOCK(p);
375 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
376 	case RTP_SET:
377 		if ((error = p_cansched(td, p)) || (error = cierror))
378 			break;
379 		/* disallow setting rtprio in most cases if not superuser */
380 		if (suser(td) != 0) {
381 			/* can't set someone else's */
382 			if (uap->pid) {
383 				error = EPERM;
384 				break;
385 			}
386 			/* can't set realtime priority */
387 /*
388  * Realtime priority has to be restricted for reasons which should be
389  * obvious. However, for idle priority, there is a potential for
390  * system deadlock if an idleprio process gains a lock on a resource
391  * that other processes need (and the idleprio process can't run
392  * due to a CPU-bound normal process). Fix me! XXX
393  */
394 #if 0
395  			if (RTP_PRIO_IS_REALTIME(rtp.type))
396 #endif
397 			if (rtp.type != RTP_PRIO_NORMAL) {
398 				error = EPERM;
399 				break;
400 			}
401 		}
402 		mtx_lock_spin(&sched_lock);
403 		/*
404 		 * If we are setting our own priority, set just our
405 		 * KSEGRP but if we are doing another process,
406 		 * do all the groups on that process. If we
407 		 * specify our own pid we do the latter.
408 		 */
409 		if (uap->pid == 0) {
410 			error = rtp_to_pri(&rtp, td->td_ksegrp);
411 		} else {
412 			FOREACH_KSEGRP_IN_PROC(p, kg) {
413 				if ((error = rtp_to_pri(&rtp, kg)) != 0) {
414 					break;
415 				}
416 			}
417 		}
418 		mtx_unlock_spin(&sched_lock);
419 		break;
420 	default:
421 		error = EINVAL;
422 		break;
423 	}
424 	PROC_UNLOCK(p);
425 	return (error);
426 }
427 
428 int
429 rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg)
430 {
431 
432 	mtx_assert(&sched_lock, MA_OWNED);
433 	if (rtp->prio > RTP_PRIO_MAX)
434 		return (EINVAL);
435 	switch (RTP_PRIO_BASE(rtp->type)) {
436 	case RTP_PRIO_REALTIME:
437 		kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio;
438 		break;
439 	case RTP_PRIO_NORMAL:
440 		kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
441 		break;
442 	case RTP_PRIO_IDLE:
443 		kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio;
444 		break;
445 	default:
446 		return (EINVAL);
447 	}
448 	sched_class(kg, rtp->type);
449 	if (curthread->td_ksegrp == kg) {
450 		curthread->td_base_pri = kg->kg_user_pri;
451 		sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */
452 	}
453 	return (0);
454 }
455 
456 void
457 pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp)
458 {
459 
460 	mtx_assert(&sched_lock, MA_OWNED);
461 	switch (PRI_BASE(kg->kg_pri_class)) {
462 	case PRI_REALTIME:
463 		rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME;
464 		break;
465 	case PRI_TIMESHARE:
466 		rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE;
467 		break;
468 	case PRI_IDLE:
469 		rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE;
470 		break;
471 	default:
472 		break;
473 	}
474 	rtp->type = kg->kg_pri_class;
475 }
476 
477 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
478 #ifndef _SYS_SYSPROTO_H_
479 struct osetrlimit_args {
480 	u_int	which;
481 	struct	orlimit *rlp;
482 };
483 #endif
484 /*
485  * MPSAFE
486  */
487 int
488 osetrlimit(td, uap)
489 	struct thread *td;
490 	register struct osetrlimit_args *uap;
491 {
492 	struct orlimit olim;
493 	struct rlimit lim;
494 	int error;
495 
496 	if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
497 		return (error);
498 	lim.rlim_cur = olim.rlim_cur;
499 	lim.rlim_max = olim.rlim_max;
500 	error = kern_setrlimit(td, uap->which, &lim);
501 	return (error);
502 }
503 
504 #ifndef _SYS_SYSPROTO_H_
505 struct ogetrlimit_args {
506 	u_int	which;
507 	struct	orlimit *rlp;
508 };
509 #endif
510 /*
511  * MPSAFE
512  */
513 int
514 ogetrlimit(td, uap)
515 	struct thread *td;
516 	register struct ogetrlimit_args *uap;
517 {
518 	struct orlimit olim;
519 	struct rlimit rl;
520 	struct proc *p;
521 	int error;
522 
523 	if (uap->which >= RLIM_NLIMITS)
524 		return (EINVAL);
525 	p = td->td_proc;
526 	PROC_LOCK(p);
527 	lim_rlimit(p, uap->which, &rl);
528 	PROC_UNLOCK(p);
529 
530 	/*
531 	 * XXX would be more correct to convert only RLIM_INFINITY to the
532 	 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
533 	 * values.  Most 64->32 and 32->16 conversions, including not
534 	 * unimportant ones of uids are even more broken than what we
535 	 * do here (they blindly truncate).  We don't do this correctly
536 	 * here since we have little experience with EOVERFLOW yet.
537 	 * Elsewhere, getuid() can't fail...
538 	 */
539 	olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
540 	olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
541 	error = copyout(&olim, uap->rlp, sizeof(olim));
542 	return (error);
543 }
544 #endif /* COMPAT_43 || COMPAT_SUNOS */
545 
546 #ifndef _SYS_SYSPROTO_H_
547 struct __setrlimit_args {
548 	u_int	which;
549 	struct	rlimit *rlp;
550 };
551 #endif
552 /*
553  * MPSAFE
554  */
555 int
556 setrlimit(td, uap)
557 	struct thread *td;
558 	register struct __setrlimit_args *uap;
559 {
560 	struct rlimit alim;
561 	int error;
562 
563 	if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
564 		return (error);
565 	error = kern_setrlimit(td, uap->which, &alim);
566 	return (error);
567 }
568 
569 int
570 kern_setrlimit(td, which, limp)
571 	struct thread *td;
572 	u_int which;
573 	struct rlimit *limp;
574 {
575 	struct plimit *newlim, *oldlim;
576 	struct proc *p;
577 	register struct rlimit *alimp;
578 	rlim_t oldssiz;
579 	int error;
580 
581 	if (which >= RLIM_NLIMITS)
582 		return (EINVAL);
583 
584 	/*
585 	 * Preserve historical bugs by treating negative limits as unsigned.
586 	 */
587 	if (limp->rlim_cur < 0)
588 		limp->rlim_cur = RLIM_INFINITY;
589 	if (limp->rlim_max < 0)
590 		limp->rlim_max = RLIM_INFINITY;
591 
592 	oldssiz = 0;
593 	p = td->td_proc;
594 	newlim = lim_alloc();
595 	PROC_LOCK(p);
596 	oldlim = p->p_limit;
597 	alimp = &oldlim->pl_rlimit[which];
598 	if (limp->rlim_cur > alimp->rlim_max ||
599 	    limp->rlim_max > alimp->rlim_max)
600 		if ((error = suser_cred(td->td_ucred, PRISON_ROOT))) {
601 			PROC_UNLOCK(p);
602 			lim_free(newlim);
603 			return (error);
604 	}
605 	if (limp->rlim_cur > limp->rlim_max)
606 		limp->rlim_cur = limp->rlim_max;
607 	lim_copy(newlim, oldlim);
608 	alimp = &newlim->pl_rlimit[which];
609 
610 	switch (which) {
611 
612 	case RLIMIT_CPU:
613 		mtx_lock_spin(&sched_lock);
614 		p->p_cpulimit = limp->rlim_cur;
615 		mtx_unlock_spin(&sched_lock);
616 		break;
617 	case RLIMIT_DATA:
618 		if (limp->rlim_cur > maxdsiz)
619 			limp->rlim_cur = maxdsiz;
620 		if (limp->rlim_max > maxdsiz)
621 			limp->rlim_max = maxdsiz;
622 		break;
623 
624 	case RLIMIT_STACK:
625 		if (limp->rlim_cur > maxssiz)
626 			limp->rlim_cur = maxssiz;
627 		if (limp->rlim_max > maxssiz)
628 			limp->rlim_max = maxssiz;
629 		oldssiz = alimp->rlim_cur;
630 		break;
631 
632 	case RLIMIT_NOFILE:
633 		if (limp->rlim_cur > maxfilesperproc)
634 			limp->rlim_cur = maxfilesperproc;
635 		if (limp->rlim_max > maxfilesperproc)
636 			limp->rlim_max = maxfilesperproc;
637 		break;
638 
639 	case RLIMIT_NPROC:
640 		if (limp->rlim_cur > maxprocperuid)
641 			limp->rlim_cur = maxprocperuid;
642 		if (limp->rlim_max > maxprocperuid)
643 			limp->rlim_max = maxprocperuid;
644 		if (limp->rlim_cur < 1)
645 			limp->rlim_cur = 1;
646 		if (limp->rlim_max < 1)
647 			limp->rlim_max = 1;
648 		break;
649 	}
650 	*alimp = *limp;
651 	p->p_limit = newlim;
652 	PROC_UNLOCK(p);
653 	lim_free(oldlim);
654 
655 	if (which == RLIMIT_STACK) {
656 		/*
657 		 * Stack is allocated to the max at exec time with only
658 		 * "rlim_cur" bytes accessible.  If stack limit is going
659 		 * up make more accessible, if going down make inaccessible.
660 		 */
661 		if (limp->rlim_cur != oldssiz) {
662 			vm_offset_t addr;
663 			vm_size_t size;
664 			vm_prot_t prot;
665 
666 			mtx_lock(&Giant);
667 			if (limp->rlim_cur > oldssiz) {
668 				prot = p->p_sysent->sv_stackprot;
669 				size = limp->rlim_cur - oldssiz;
670 				addr = p->p_sysent->sv_usrstack -
671 				    limp->rlim_cur;
672 			} else {
673 				prot = VM_PROT_NONE;
674 				size = oldssiz - limp->rlim_cur;
675 				addr = p->p_sysent->sv_usrstack -
676 				    oldssiz;
677 			}
678 			addr = trunc_page(addr);
679 			size = round_page(size);
680 			(void) vm_map_protect(&p->p_vmspace->vm_map,
681 					      addr, addr+size, prot, FALSE);
682 			mtx_unlock(&Giant);
683 		}
684 	}
685 	return (0);
686 }
687 
688 #ifndef _SYS_SYSPROTO_H_
689 struct __getrlimit_args {
690 	u_int	which;
691 	struct	rlimit *rlp;
692 };
693 #endif
694 /*
695  * MPSAFE
696  */
697 /* ARGSUSED */
698 int
699 getrlimit(td, uap)
700 	struct thread *td;
701 	register struct __getrlimit_args *uap;
702 {
703 	struct rlimit rlim;
704 	struct proc *p;
705 	int error;
706 
707 	if (uap->which >= RLIM_NLIMITS)
708 		return (EINVAL);
709 	p = td->td_proc;
710 	PROC_LOCK(p);
711 	lim_rlimit(p, uap->which, &rlim);
712 	PROC_UNLOCK(p);
713 	error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
714 	return(error);
715 }
716 
717 /*
718  * Transform the running time and tick information in proc p into user,
719  * system, and interrupt time usage.
720  */
721 void
722 calcru(p, up, sp, ip)
723 	struct proc *p;
724 	struct timeval *up;
725 	struct timeval *sp;
726 	struct timeval *ip;
727 {
728 	struct bintime bt;
729 	struct timeval tv;
730 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
731 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
732 
733 	mtx_assert(&sched_lock, MA_OWNED);
734 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
735 
736 	ut = p->p_uticks;
737 	st = p->p_sticks;
738 	it = p->p_iticks;
739 
740 	tt = ut + st + it;
741 	if (tt == 0) {
742 		st = 1;
743 		tt = 1;
744 	}
745 	if (p == curthread->td_proc) {
746 		/*
747 		 * Adjust for the current time slice.  This is actually fairly
748 		 * important since the error here is on the order of a time
749 		 * quantum, which is much greater than the sampling error.
750 		 * XXXKSE use a different test due to threads on other
751 		 * processors also being 'current'.
752 		 */
753 		binuptime(&bt);
754 		bintime_sub(&bt, PCPU_PTR(switchtime));
755 		bintime_add(&bt, &p->p_runtime);
756 	} else
757 		bt = p->p_runtime;
758 	bintime2timeval(&bt, &tv);
759 	tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
760 	ptu = p->p_uu + p->p_su + p->p_iu;
761 	if (tu < ptu || (int64_t)tu < 0) {
762 		printf("calcru: negative time of %jd usec for pid %d (%s)\n",
763 		    (intmax_t)tu, p->p_pid, p->p_comm);
764 		tu = ptu;
765 	}
766 
767 	/* Subdivide tu. */
768 	uu = (tu * ut) / tt;
769 	su = (tu * st) / tt;
770 	iu = tu - uu - su;
771 
772 	/* Enforce monotonicity. */
773 	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
774 		if (uu < p->p_uu)
775 			uu = p->p_uu;
776 		else if (uu + p->p_su + p->p_iu > tu)
777 			uu = tu - p->p_su - p->p_iu;
778 		if (st == 0)
779 			su = p->p_su;
780 		else {
781 			su = ((tu - uu) * st) / (st + it);
782 			if (su < p->p_su)
783 				su = p->p_su;
784 			else if (uu + su + p->p_iu > tu)
785 				su = tu - uu - p->p_iu;
786 		}
787 		KASSERT(uu + su + p->p_iu <= tu,
788 		    ("calcru: monotonisation botch 1"));
789 		iu = tu - uu - su;
790 		KASSERT(iu >= p->p_iu,
791 		    ("calcru: monotonisation botch 2"));
792 	}
793 	p->p_uu = uu;
794 	p->p_su = su;
795 	p->p_iu = iu;
796 
797 	up->tv_sec = uu / 1000000;
798 	up->tv_usec = uu % 1000000;
799 	sp->tv_sec = su / 1000000;
800 	sp->tv_usec = su % 1000000;
801 	if (ip != NULL) {
802 		ip->tv_sec = iu / 1000000;
803 		ip->tv_usec = iu % 1000000;
804 	}
805 }
806 
807 #ifndef _SYS_SYSPROTO_H_
808 struct getrusage_args {
809 	int	who;
810 	struct	rusage *rusage;
811 };
812 #endif
813 /*
814  * MPSAFE
815  */
816 /* ARGSUSED */
817 int
818 getrusage(td, uap)
819 	register struct thread *td;
820 	register struct getrusage_args *uap;
821 {
822 	struct rusage ru;
823 	struct proc *p;
824 
825 	p = td->td_proc;
826 	switch (uap->who) {
827 
828 	case RUSAGE_SELF:
829 		mtx_lock(&Giant);
830 		mtx_lock_spin(&sched_lock);
831 		calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime,
832 		    NULL);
833 		mtx_unlock_spin(&sched_lock);
834 		ru = p->p_stats->p_ru;
835 		mtx_unlock(&Giant);
836 		break;
837 
838 	case RUSAGE_CHILDREN:
839 		mtx_lock(&Giant);
840 		ru = p->p_stats->p_cru;
841 		mtx_unlock(&Giant);
842 		break;
843 
844 	default:
845 		return (EINVAL);
846 		break;
847 	}
848 	return (copyout(&ru, uap->rusage, sizeof(struct rusage)));
849 }
850 
851 void
852 ruadd(ru, ru2)
853 	register struct rusage *ru, *ru2;
854 {
855 	register long *ip, *ip2;
856 	register int i;
857 
858 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
859 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
860 	if (ru->ru_maxrss < ru2->ru_maxrss)
861 		ru->ru_maxrss = ru2->ru_maxrss;
862 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
863 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
864 		*ip++ += *ip2++;
865 }
866 
867 /*
868  * Allocate a new resource limits structure and initialize its
869  * reference count and mutex pointer.
870  */
871 struct plimit *
872 lim_alloc()
873 {
874 	struct plimit *limp;
875 
876 	limp = (struct plimit *)malloc(sizeof(struct plimit), M_PLIMIT,
877 	    M_WAITOK);
878 	limp->pl_refcnt = 1;
879 	limp->pl_mtx = mtx_pool_alloc(mtxpool_sleep);
880 	return (limp);
881 }
882 
883 struct plimit *
884 lim_hold(limp)
885 	struct plimit *limp;
886 {
887 
888 	LIM_LOCK(limp);
889 	limp->pl_refcnt++;
890 	LIM_UNLOCK(limp);
891 	return (limp);
892 }
893 
894 void
895 lim_free(limp)
896 	struct plimit *limp;
897 {
898 
899 	LIM_LOCK(limp);
900 	KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow"));
901 	if (--limp->pl_refcnt == 0) {
902 		LIM_UNLOCK(limp);
903 		free((void *)limp, M_PLIMIT);
904 		return;
905 	}
906 	LIM_UNLOCK(limp);
907 }
908 
909 /*
910  * Make a copy of the plimit structure.
911  * We share these structures copy-on-write after fork.
912  */
913 void
914 lim_copy(dst, src)
915 	struct plimit *dst, *src;
916 {
917 
918 	KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit"));
919 	bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
920 }
921 
922 /*
923  * Return the hard limit for a particular system resource.  The
924  * which parameter specifies the index into the rlimit array.
925  */
926 rlim_t
927 lim_max(struct proc *p, int which)
928 {
929 	struct rlimit rl;
930 
931 	lim_rlimit(p, which, &rl);
932 	return (rl.rlim_max);
933 }
934 
935 /*
936  * Return the current (soft) limit for a particular system resource.
937  * The which parameter which specifies the index into the rlimit array
938  */
939 rlim_t
940 lim_cur(struct proc *p, int which)
941 {
942 	struct rlimit rl;
943 
944 	lim_rlimit(p, which, &rl);
945 	return (rl.rlim_cur);
946 }
947 
948 /*
949  * Return a copy of the entire rlimit structure for the system limit
950  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
951  */
952 void
953 lim_rlimit(struct proc *p, int which, struct rlimit *rlp)
954 {
955 
956 	PROC_LOCK_ASSERT(p, MA_OWNED);
957 	KASSERT(which >= 0 && which < RLIM_NLIMITS,
958 	    ("request for invalid resource limit"));
959 	*rlp = p->p_limit->pl_rlimit[which];
960 }
961 
962 /*
963  * Find the uidinfo structure for a uid.  This structure is used to
964  * track the total resource consumption (process count, socket buffer
965  * size, etc.) for the uid and impose limits.
966  */
967 void
968 uihashinit()
969 {
970 
971 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
972 	mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF);
973 }
974 
975 /*
976  * Look up a uidinfo struct for the parameter uid.
977  * uihashtbl_mtx must be locked.
978  */
979 static struct uidinfo *
980 uilookup(uid)
981 	uid_t uid;
982 {
983 	struct uihashhead *uipp;
984 	struct uidinfo *uip;
985 
986 	mtx_assert(&uihashtbl_mtx, MA_OWNED);
987 	uipp = UIHASH(uid);
988 	LIST_FOREACH(uip, uipp, ui_hash)
989 		if (uip->ui_uid == uid)
990 			break;
991 
992 	return (uip);
993 }
994 
995 /*
996  * Find or allocate a struct uidinfo for a particular uid.
997  * Increase refcount on uidinfo struct returned.
998  * uifree() should be called on a struct uidinfo when released.
999  */
1000 struct uidinfo *
1001 uifind(uid)
1002 	uid_t uid;
1003 {
1004 	struct uidinfo *old_uip, *uip;
1005 
1006 	mtx_lock(&uihashtbl_mtx);
1007 	uip = uilookup(uid);
1008 	if (uip == NULL) {
1009 		mtx_unlock(&uihashtbl_mtx);
1010 		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
1011 		mtx_lock(&uihashtbl_mtx);
1012 		/*
1013 		 * There's a chance someone created our uidinfo while we
1014 		 * were in malloc and not holding the lock, so we have to
1015 		 * make sure we don't insert a duplicate uidinfo.
1016 		 */
1017 		if ((old_uip = uilookup(uid)) != NULL) {
1018 			/* Someone else beat us to it. */
1019 			free(uip, M_UIDINFO);
1020 			uip = old_uip;
1021 		} else {
1022 			uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep);
1023 			uip->ui_uid = uid;
1024 			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
1025 		}
1026 	}
1027 	uihold(uip);
1028 	mtx_unlock(&uihashtbl_mtx);
1029 	return (uip);
1030 }
1031 
1032 /*
1033  * Place another refcount on a uidinfo struct.
1034  */
1035 void
1036 uihold(uip)
1037 	struct uidinfo *uip;
1038 {
1039 
1040 	UIDINFO_LOCK(uip);
1041 	uip->ui_ref++;
1042 	UIDINFO_UNLOCK(uip);
1043 }
1044 
1045 /*-
1046  * Since uidinfo structs have a long lifetime, we use an
1047  * opportunistic refcounting scheme to avoid locking the lookup hash
1048  * for each release.
1049  *
1050  * If the refcount hits 0, we need to free the structure,
1051  * which means we need to lock the hash.
1052  * Optimal case:
1053  *   After locking the struct and lowering the refcount, if we find
1054  *   that we don't need to free, simply unlock and return.
1055  * Suboptimal case:
1056  *   If refcount lowering results in need to free, bump the count
1057  *   back up, loose the lock and aquire the locks in the proper
1058  *   order to try again.
1059  */
1060 void
1061 uifree(uip)
1062 	struct uidinfo *uip;
1063 {
1064 
1065 	/* Prepare for optimal case. */
1066 	UIDINFO_LOCK(uip);
1067 
1068 	if (--uip->ui_ref != 0) {
1069 		UIDINFO_UNLOCK(uip);
1070 		return;
1071 	}
1072 
1073 	/* Prepare for suboptimal case. */
1074 	uip->ui_ref++;
1075 	UIDINFO_UNLOCK(uip);
1076 	mtx_lock(&uihashtbl_mtx);
1077 	UIDINFO_LOCK(uip);
1078 
1079 	/*
1080 	 * We must subtract one from the count again because we backed out
1081 	 * our initial subtraction before dropping the lock.
1082 	 * Since another thread may have added a reference after we dropped the
1083 	 * initial lock we have to test for zero again.
1084 	 */
1085 	if (--uip->ui_ref == 0) {
1086 		LIST_REMOVE(uip, ui_hash);
1087 		mtx_unlock(&uihashtbl_mtx);
1088 		if (uip->ui_sbsize != 0)
1089 			printf("freeing uidinfo: uid = %d, sbsize = %jd\n",
1090 			    uip->ui_uid, (intmax_t)uip->ui_sbsize);
1091 		if (uip->ui_proccnt != 0)
1092 			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1093 			    uip->ui_uid, uip->ui_proccnt);
1094 		UIDINFO_UNLOCK(uip);
1095 		FREE(uip, M_UIDINFO);
1096 		return;
1097 	}
1098 
1099 	mtx_unlock(&uihashtbl_mtx);
1100 	UIDINFO_UNLOCK(uip);
1101 }
1102 
1103 /*
1104  * Change the count associated with number of processes
1105  * a given user is using.  When 'max' is 0, don't enforce a limit
1106  */
1107 int
1108 chgproccnt(uip, diff, max)
1109 	struct	uidinfo	*uip;
1110 	int	diff;
1111 	int	max;
1112 {
1113 
1114 	UIDINFO_LOCK(uip);
1115 	/* Don't allow them to exceed max, but allow subtraction. */
1116 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
1117 		UIDINFO_UNLOCK(uip);
1118 		return (0);
1119 	}
1120 	uip->ui_proccnt += diff;
1121 	if (uip->ui_proccnt < 0)
1122 		printf("negative proccnt for uid = %d\n", uip->ui_uid);
1123 	UIDINFO_UNLOCK(uip);
1124 	return (1);
1125 }
1126 
1127 /*
1128  * Change the total socket buffer size a user has used.
1129  */
1130 int
1131 chgsbsize(uip, hiwat, to, max)
1132 	struct	uidinfo	*uip;
1133 	u_int  *hiwat;
1134 	u_int	to;
1135 	rlim_t	max;
1136 {
1137 	rlim_t new;
1138 	int s;
1139 
1140 	s = splnet();
1141 	UIDINFO_LOCK(uip);
1142 	new = uip->ui_sbsize + to - *hiwat;
1143 	/* Don't allow them to exceed max, but allow subtraction */
1144 	if (to > *hiwat && new > max) {
1145 		splx(s);
1146 		UIDINFO_UNLOCK(uip);
1147 		return (0);
1148 	}
1149 	uip->ui_sbsize = new;
1150 	*hiwat = to;
1151 	if (uip->ui_sbsize < 0)
1152 		printf("negative sbsize for uid = %d\n", uip->ui_uid);
1153 	splx(s);
1154 	UIDINFO_UNLOCK(uip);
1155 	return (1);
1156 }
1157