xref: /freebsd/sys/kern/kern_resource.c (revision 7f3dea244c40159a41ab22da77a434d7c5b5e85a)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $Id: kern_resource.c,v 1.47 1999/04/27 12:21:07 phk Exp $
40  */
41 
42 #include "opt_compat.h"
43 #include "opt_rlimit.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/kernel.h>
49 #include <sys/file.h>
50 #include <sys/resourcevar.h>
51 #include <sys/malloc.h>
52 #include <sys/proc.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_prot.h>
57 #include <sys/lock.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_map.h>
60 
61 static int donice __P((struct proc *curp, struct proc *chgp, int n));
62 /* dosetrlimit non-static:  Needed by SysVR4 emulator */
63 int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
64 
65 /*
66  * Resource controls and accounting.
67  */
68 
69 #ifndef _SYS_SYSPROTO_H_
70 struct getpriority_args {
71 	int	which;
72 	int	who;
73 };
74 #endif
75 int
76 getpriority(curp, uap)
77 	struct proc *curp;
78 	register struct getpriority_args *uap;
79 {
80 	register struct proc *p;
81 	register int low = PRIO_MAX + 1;
82 
83 	switch (uap->which) {
84 
85 	case PRIO_PROCESS:
86 		if (uap->who == 0)
87 			p = curp;
88 		else
89 			p = pfind(uap->who);
90 		if (p == 0)
91 			break;
92 		low = p->p_nice;
93 		break;
94 
95 	case PRIO_PGRP: {
96 		register struct pgrp *pg;
97 
98 		if (uap->who == 0)
99 			pg = curp->p_pgrp;
100 		else if ((pg = pgfind(uap->who)) == NULL)
101 			break;
102 		for (p = pg->pg_members.lh_first; p != 0;
103 		     p = p->p_pglist.le_next) {
104 			if (p->p_nice < low)
105 				low = p->p_nice;
106 		}
107 		break;
108 	}
109 
110 	case PRIO_USER:
111 		if (uap->who == 0)
112 			uap->who = curp->p_ucred->cr_uid;
113 		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
114 			if (p->p_ucred->cr_uid == uap->who &&
115 			    p->p_nice < low)
116 				low = p->p_nice;
117 		break;
118 
119 	default:
120 		return (EINVAL);
121 	}
122 	if (low == PRIO_MAX + 1)
123 		return (ESRCH);
124 	curp->p_retval[0] = low;
125 	return (0);
126 }
127 
128 #ifndef _SYS_SYSPROTO_H_
129 struct setpriority_args {
130 	int	which;
131 	int	who;
132 	int	prio;
133 };
134 #endif
135 /* ARGSUSED */
136 int
137 setpriority(curp, uap)
138 	struct proc *curp;
139 	register struct setpriority_args *uap;
140 {
141 	register struct proc *p;
142 	int found = 0, error = 0;
143 
144 	switch (uap->which) {
145 
146 	case PRIO_PROCESS:
147 		if (uap->who == 0)
148 			p = curp;
149 		else
150 			p = pfind(uap->who);
151 		if (p == 0)
152 			break;
153 		error = donice(curp, p, uap->prio);
154 		found++;
155 		break;
156 
157 	case PRIO_PGRP: {
158 		register struct pgrp *pg;
159 
160 		if (uap->who == 0)
161 			pg = curp->p_pgrp;
162 		else if ((pg = pgfind(uap->who)) == NULL)
163 			break;
164 		for (p = pg->pg_members.lh_first; p != 0;
165 		    p = p->p_pglist.le_next) {
166 			error = donice(curp, p, uap->prio);
167 			found++;
168 		}
169 		break;
170 	}
171 
172 	case PRIO_USER:
173 		if (uap->who == 0)
174 			uap->who = curp->p_ucred->cr_uid;
175 		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
176 			if (p->p_ucred->cr_uid == uap->who) {
177 				error = donice(curp, p, uap->prio);
178 				found++;
179 			}
180 		break;
181 
182 	default:
183 		return (EINVAL);
184 	}
185 	if (found == 0)
186 		return (ESRCH);
187 	return (error);
188 }
189 
190 static int
191 donice(curp, chgp, n)
192 	register struct proc *curp, *chgp;
193 	register int n;
194 {
195 	register struct pcred *pcred = curp->p_cred;
196 
197 	if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
198 	    pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
199 	    pcred->p_ruid != chgp->p_ucred->cr_uid)
200 		return (EPERM);
201 	if (n > PRIO_MAX)
202 		n = PRIO_MAX;
203 	if (n < PRIO_MIN)
204 		n = PRIO_MIN;
205 	if (n < chgp->p_nice && suser(curp))
206 		return (EACCES);
207 	chgp->p_nice = n;
208 	(void)resetpriority(chgp);
209 	return (0);
210 }
211 
212 /* rtprio system call */
213 #ifndef _SYS_SYSPROTO_H_
214 struct rtprio_args {
215 	int		function;
216 	pid_t		pid;
217 	struct rtprio	*rtp;
218 };
219 #endif
220 
221 /*
222  * Set realtime priority
223  */
224 
225 /* ARGSUSED */
226 int
227 rtprio(curp, uap)
228 	struct proc *curp;
229 	register struct rtprio_args *uap;
230 {
231 	register struct proc *p;
232 	register struct pcred *pcred = curp->p_cred;
233 	struct rtprio rtp;
234 	int error;
235 
236 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
237 	if (error)
238 		return (error);
239 
240 	if (uap->pid == 0)
241 		p = curp;
242 	else
243 		p = pfind(uap->pid);
244 
245 	if (p == 0)
246 		return (ESRCH);
247 
248 	switch (uap->function) {
249 	case RTP_LOOKUP:
250 		return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio)));
251 	case RTP_SET:
252 		if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
253 		    pcred->pc_ucred->cr_uid != p->p_ucred->cr_uid &&
254 		    pcred->p_ruid != p->p_ucred->cr_uid)
255 		        return (EPERM);
256 		/* disallow setting rtprio in most cases if not superuser */
257 		if (suser(curp)) {
258 			/* can't set someone else's */
259 			if (uap->pid)
260 				return (EPERM);
261 			/* can't set realtime priority */
262 /*
263  * Realtime priority has to be restricted for reasons which should be
264  * obvious. However, for idle priority, there is a potential for
265  * system deadlock if an idleprio process gains a lock on a resource
266  * that other processes need (and the idleprio process can't run
267  * due to a CPU-bound normal process). Fix me! XXX
268  */
269 #if 0
270  			if (RTP_PRIO_IS_REALTIME(rtp.type))
271 #endif
272 			if (rtp.type != RTP_PRIO_NORMAL)
273 				return (EPERM);
274 		}
275 		switch (rtp.type) {
276 #ifdef RTP_PRIO_FIFO
277 		case RTP_PRIO_FIFO:
278 #endif
279 		case RTP_PRIO_REALTIME:
280 		case RTP_PRIO_NORMAL:
281 		case RTP_PRIO_IDLE:
282 			if (rtp.prio > RTP_PRIO_MAX)
283 				return (EINVAL);
284 			p->p_rtprio = rtp;
285 			return (0);
286 		default:
287 			return (EINVAL);
288 		}
289 
290 	default:
291 		return (EINVAL);
292 	}
293 }
294 
295 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
296 #ifndef _SYS_SYSPROTO_H_
297 struct osetrlimit_args {
298 	u_int	which;
299 	struct	orlimit *rlp;
300 };
301 #endif
302 /* ARGSUSED */
303 int
304 osetrlimit(p, uap)
305 	struct proc *p;
306 	register struct osetrlimit_args *uap;
307 {
308 	struct orlimit olim;
309 	struct rlimit lim;
310 	int error;
311 
312 	if ((error =
313 	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
314 		return (error);
315 	lim.rlim_cur = olim.rlim_cur;
316 	lim.rlim_max = olim.rlim_max;
317 	return (dosetrlimit(p, uap->which, &lim));
318 }
319 
320 #ifndef _SYS_SYSPROTO_H_
321 struct ogetrlimit_args {
322 	u_int	which;
323 	struct	orlimit *rlp;
324 };
325 #endif
326 /* ARGSUSED */
327 int
328 ogetrlimit(p, uap)
329 	struct proc *p;
330 	register struct ogetrlimit_args *uap;
331 {
332 	struct orlimit olim;
333 
334 	if (uap->which >= RLIM_NLIMITS)
335 		return (EINVAL);
336 	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
337 	if (olim.rlim_cur == -1)
338 		olim.rlim_cur = 0x7fffffff;
339 	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
340 	if (olim.rlim_max == -1)
341 		olim.rlim_max = 0x7fffffff;
342 	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
343 }
344 #endif /* COMPAT_43 || COMPAT_SUNOS */
345 
346 #ifndef _SYS_SYSPROTO_H_
347 struct __setrlimit_args {
348 	u_int	which;
349 	struct	rlimit *rlp;
350 };
351 #endif
352 /* ARGSUSED */
353 int
354 setrlimit(p, uap)
355 	struct proc *p;
356 	register struct __setrlimit_args *uap;
357 {
358 	struct rlimit alim;
359 	int error;
360 
361 	if ((error =
362 	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
363 		return (error);
364 	return (dosetrlimit(p, uap->which, &alim));
365 }
366 
367 int
368 dosetrlimit(p, which, limp)
369 	struct proc *p;
370 	u_int which;
371 	struct rlimit *limp;
372 {
373 	register struct rlimit *alimp;
374 	int error;
375 
376 	if (which >= RLIM_NLIMITS)
377 		return (EINVAL);
378 	alimp = &p->p_rlimit[which];
379 
380 	/*
381 	 * Preserve historical bugs by treating negative limits as unsigned.
382 	 */
383 	if (limp->rlim_cur < 0)
384 		limp->rlim_cur = RLIM_INFINITY;
385 	if (limp->rlim_max < 0)
386 		limp->rlim_max = RLIM_INFINITY;
387 
388 	if (limp->rlim_cur > alimp->rlim_max ||
389 	    limp->rlim_max > alimp->rlim_max)
390 		if ((error = suser_xxx(0, p, PRISON_ROOT)))
391 			return (error);
392 	if (limp->rlim_cur > limp->rlim_max)
393 		limp->rlim_cur = limp->rlim_max;
394 	if (p->p_limit->p_refcnt > 1 &&
395 	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
396 		p->p_limit->p_refcnt--;
397 		p->p_limit = limcopy(p->p_limit);
398 		alimp = &p->p_rlimit[which];
399 	}
400 
401 	switch (which) {
402 
403 	case RLIMIT_CPU:
404 		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
405 			p->p_limit->p_cpulimit = RLIM_INFINITY;
406 		else
407 			p->p_limit->p_cpulimit =
408 			    (rlim_t)1000000 * limp->rlim_cur;
409 		break;
410 	case RLIMIT_DATA:
411 		if (limp->rlim_cur > MAXDSIZ)
412 			limp->rlim_cur = MAXDSIZ;
413 		if (limp->rlim_max > MAXDSIZ)
414 			limp->rlim_max = MAXDSIZ;
415 		break;
416 
417 	case RLIMIT_STACK:
418 		if (limp->rlim_cur > MAXSSIZ)
419 			limp->rlim_cur = MAXSSIZ;
420 		if (limp->rlim_max > MAXSSIZ)
421 			limp->rlim_max = MAXSSIZ;
422 		/*
423 		 * Stack is allocated to the max at exec time with only
424 		 * "rlim_cur" bytes accessible.  If stack limit is going
425 		 * up make more accessible, if going down make inaccessible.
426 		 */
427 		if (limp->rlim_cur != alimp->rlim_cur) {
428 			vm_offset_t addr;
429 			vm_size_t size;
430 			vm_prot_t prot;
431 
432 			if (limp->rlim_cur > alimp->rlim_cur) {
433 				prot = VM_PROT_ALL;
434 				size = limp->rlim_cur - alimp->rlim_cur;
435 				addr = USRSTACK - limp->rlim_cur;
436 			} else {
437 				prot = VM_PROT_NONE;
438 				size = alimp->rlim_cur - limp->rlim_cur;
439 				addr = USRSTACK - alimp->rlim_cur;
440 			}
441 			addr = trunc_page(addr);
442 			size = round_page(size);
443 			(void) vm_map_protect(&p->p_vmspace->vm_map,
444 					      addr, addr+size, prot, FALSE);
445 		}
446 		break;
447 
448 	case RLIMIT_NOFILE:
449 		if (limp->rlim_cur > maxfilesperproc)
450 			limp->rlim_cur = maxfilesperproc;
451 		if (limp->rlim_max > maxfilesperproc)
452 			limp->rlim_max = maxfilesperproc;
453 		break;
454 
455 	case RLIMIT_NPROC:
456 		if (limp->rlim_cur > maxprocperuid)
457 			limp->rlim_cur = maxprocperuid;
458 		if (limp->rlim_max > maxprocperuid)
459 			limp->rlim_max = maxprocperuid;
460 		break;
461 	}
462 	*alimp = *limp;
463 	return (0);
464 }
465 
466 #ifndef _SYS_SYSPROTO_H_
467 struct __getrlimit_args {
468 	u_int	which;
469 	struct	rlimit *rlp;
470 };
471 #endif
472 /* ARGSUSED */
473 int
474 getrlimit(p, uap)
475 	struct proc *p;
476 	register struct __getrlimit_args *uap;
477 {
478 
479 	if (uap->which >= RLIM_NLIMITS)
480 		return (EINVAL);
481 	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
482 	    sizeof (struct rlimit)));
483 }
484 
485 /*
486  * Transform the running time and tick information in proc p into user,
487  * system, and interrupt time usage.
488  */
489 void
490 calcru(p, up, sp, ip)
491 	struct proc *p;
492 	struct timeval *up;
493 	struct timeval *sp;
494 	struct timeval *ip;
495 {
496 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
497 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
498 	int s;
499 	struct timeval tv;
500 
501 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
502 	s = splstatclock();
503 	ut = p->p_uticks;
504 	st = p->p_sticks;
505 	it = p->p_iticks;
506 	splx(s);
507 
508 	tt = ut + st + it;
509 	if (tt == 0) {
510 		st = 1;
511 		tt = 1;
512 	}
513 
514 	tu = p->p_runtime;
515 #ifdef SMP
516 	if (p->p_oncpu != 0xff) {
517 #else
518 	if (p == curproc) {
519 #endif
520 		/*
521 		 * Adjust for the current time slice.  This is actually fairly
522 		 * important since the error here is on the order of a time
523 		 * quantum, which is much greater than the sampling error.
524 		 */
525 		microuptime(&tv);
526 		tu += (tv.tv_usec - switchtime.tv_usec) +
527 		    (tv.tv_sec - switchtime.tv_sec) * (int64_t)1000000;
528 	}
529 	ptu = p->p_stats->p_uu + p->p_stats->p_su + p->p_stats->p_iu;
530 	if (tu < ptu || (int64_t)tu < 0) {
531 		/* XXX no %qd in kernel.  Truncate. */
532 		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
533 		       (long)tu, p->p_pid, p->p_comm);
534 		tu = ptu;
535 	}
536 
537 	/* Subdivide tu. */
538 	uu = (tu * ut) / tt;
539 	su = (tu * st) / tt;
540 	iu = tu - uu - su;
541 
542 	/* Enforce monotonicity. */
543 	if (uu < p->p_stats->p_uu || su < p->p_stats->p_su ||
544 	    iu < p->p_stats->p_iu) {
545 		if (uu < p->p_stats->p_uu)
546 			uu = p->p_stats->p_uu;
547 		else if (uu + p->p_stats->p_su + p->p_stats->p_iu > tu)
548 			uu = tu - p->p_stats->p_su - p->p_stats->p_iu;
549 		if (st == 0)
550 			su = p->p_stats->p_su;
551 		else {
552 			su = ((tu - uu) * st) / (st + it);
553 			if (su < p->p_stats->p_su)
554 				su = p->p_stats->p_su;
555 			else if (uu + su + p->p_stats->p_iu > tu)
556 				su = tu - uu - p->p_stats->p_iu;
557 		}
558 		KASSERT(uu + su + p->p_stats->p_iu <= tu,
559 		    ("calcru: monotonisation botch 1"));
560 		iu = tu - uu - su;
561 		KASSERT(iu >= p->p_stats->p_iu,
562 		    ("calcru: monotonisation botch 2"));
563 	}
564 	p->p_stats->p_uu = uu;
565 	p->p_stats->p_su = su;
566 	p->p_stats->p_iu = iu;
567 
568 	up->tv_sec = uu / 1000000;
569 	up->tv_usec = uu % 1000000;
570 	sp->tv_sec = su / 1000000;
571 	sp->tv_usec = su % 1000000;
572 	if (ip != NULL) {
573 		ip->tv_sec = iu / 1000000;
574 		ip->tv_usec = iu % 1000000;
575 	}
576 }
577 
578 #ifndef _SYS_SYSPROTO_H_
579 struct getrusage_args {
580 	int	who;
581 	struct	rusage *rusage;
582 };
583 #endif
584 /* ARGSUSED */
585 int
586 getrusage(p, uap)
587 	register struct proc *p;
588 	register struct getrusage_args *uap;
589 {
590 	register struct rusage *rup;
591 
592 	switch (uap->who) {
593 
594 	case RUSAGE_SELF:
595 		rup = &p->p_stats->p_ru;
596 		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
597 		break;
598 
599 	case RUSAGE_CHILDREN:
600 		rup = &p->p_stats->p_cru;
601 		break;
602 
603 	default:
604 		return (EINVAL);
605 	}
606 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
607 	    sizeof (struct rusage)));
608 }
609 
610 void
611 ruadd(ru, ru2)
612 	register struct rusage *ru, *ru2;
613 {
614 	register long *ip, *ip2;
615 	register int i;
616 
617 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
618 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
619 	if (ru->ru_maxrss < ru2->ru_maxrss)
620 		ru->ru_maxrss = ru2->ru_maxrss;
621 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
622 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
623 		*ip++ += *ip2++;
624 }
625 
626 /*
627  * Make a copy of the plimit structure.
628  * We share these structures copy-on-write after fork,
629  * and copy when a limit is changed.
630  */
631 struct plimit *
632 limcopy(lim)
633 	struct plimit *lim;
634 {
635 	register struct plimit *copy;
636 
637 	MALLOC(copy, struct plimit *, sizeof(struct plimit),
638 	    M_SUBPROC, M_WAITOK);
639 	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
640 	copy->p_lflags = 0;
641 	copy->p_refcnt = 1;
642 	return (copy);
643 }
644