xref: /freebsd/sys/kern/kern_resource.c (revision daf1cffce2e07931f27c6c6998652e90df6ba87e)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 #include "opt_rlimit.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/resourcevar.h>
50 #include <sys/malloc.h>
51 #include <sys/proc.h>
52 #include <sys/time.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <sys/lock.h>
57 #include <vm/pmap.h>
58 #include <vm/vm_map.h>
59 
60 static int donice __P((struct proc *curp, struct proc *chgp, int n));
61 /* dosetrlimit non-static:  Needed by SysVR4 emulator */
62 int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
63 
64 /*
65  * Resource controls and accounting.
66  */
67 
68 #ifndef _SYS_SYSPROTO_H_
69 struct getpriority_args {
70 	int	which;
71 	int	who;
72 };
73 #endif
74 int
75 getpriority(curp, uap)
76 	struct proc *curp;
77 	register struct getpriority_args *uap;
78 {
79 	register struct proc *p;
80 	register int low = PRIO_MAX + 1;
81 
82 	switch (uap->which) {
83 
84 	case PRIO_PROCESS:
85 		if (uap->who == 0)
86 			p = curp;
87 		else
88 			p = pfind(uap->who);
89 		if (p == 0)
90 			break;
91 		low = p->p_nice;
92 		break;
93 
94 	case PRIO_PGRP: {
95 		register struct pgrp *pg;
96 
97 		if (uap->who == 0)
98 			pg = curp->p_pgrp;
99 		else if ((pg = pgfind(uap->who)) == NULL)
100 			break;
101 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
102 			if (p->p_nice < low)
103 				low = p->p_nice;
104 		}
105 		break;
106 	}
107 
108 	case PRIO_USER:
109 		if (uap->who == 0)
110 			uap->who = curp->p_ucred->cr_uid;
111 		LIST_FOREACH(p, &allproc, p_list)
112 			if (p->p_ucred->cr_uid == uap->who &&
113 			    p->p_nice < low)
114 				low = p->p_nice;
115 		break;
116 
117 	default:
118 		return (EINVAL);
119 	}
120 	if (low == PRIO_MAX + 1)
121 		return (ESRCH);
122 	curp->p_retval[0] = low;
123 	return (0);
124 }
125 
126 #ifndef _SYS_SYSPROTO_H_
127 struct setpriority_args {
128 	int	which;
129 	int	who;
130 	int	prio;
131 };
132 #endif
133 /* ARGSUSED */
134 int
135 setpriority(curp, uap)
136 	struct proc *curp;
137 	register struct setpriority_args *uap;
138 {
139 	register struct proc *p;
140 	int found = 0, error = 0;
141 
142 	switch (uap->which) {
143 
144 	case PRIO_PROCESS:
145 		if (uap->who == 0)
146 			p = curp;
147 		else
148 			p = pfind(uap->who);
149 		if (p == 0)
150 			break;
151 		error = donice(curp, p, uap->prio);
152 		found++;
153 		break;
154 
155 	case PRIO_PGRP: {
156 		register struct pgrp *pg;
157 
158 		if (uap->who == 0)
159 			pg = curp->p_pgrp;
160 		else if ((pg = pgfind(uap->who)) == NULL)
161 			break;
162 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
163 			error = donice(curp, p, uap->prio);
164 			found++;
165 		}
166 		break;
167 	}
168 
169 	case PRIO_USER:
170 		if (uap->who == 0)
171 			uap->who = curp->p_ucred->cr_uid;
172 		LIST_FOREACH(p, &allproc, p_list)
173 			if (p->p_ucred->cr_uid == uap->who) {
174 				error = donice(curp, p, uap->prio);
175 				found++;
176 			}
177 		break;
178 
179 	default:
180 		return (EINVAL);
181 	}
182 	if (found == 0)
183 		return (ESRCH);
184 	return (error);
185 }
186 
187 static int
188 donice(curp, chgp, n)
189 	register struct proc *curp, *chgp;
190 	register int n;
191 {
192 	register struct pcred *pcred = curp->p_cred;
193 
194 	if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
195 	    pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
196 	    pcred->p_ruid != chgp->p_ucred->cr_uid)
197 		return (EPERM);
198 	if (n > PRIO_MAX)
199 		n = PRIO_MAX;
200 	if (n < PRIO_MIN)
201 		n = PRIO_MIN;
202 	if (n < chgp->p_nice && suser(curp))
203 		return (EACCES);
204 	chgp->p_nice = n;
205 	(void)resetpriority(chgp);
206 	return (0);
207 }
208 
209 /* rtprio system call */
210 #ifndef _SYS_SYSPROTO_H_
211 struct rtprio_args {
212 	int		function;
213 	pid_t		pid;
214 	struct rtprio	*rtp;
215 };
216 #endif
217 
218 /*
219  * Set realtime priority
220  */
221 
222 /* ARGSUSED */
223 int
224 rtprio(curp, uap)
225 	struct proc *curp;
226 	register struct rtprio_args *uap;
227 {
228 	register struct proc *p;
229 	register struct pcred *pcred = curp->p_cred;
230 	struct rtprio rtp;
231 	int error;
232 
233 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
234 	if (error)
235 		return (error);
236 
237 	if (uap->pid == 0)
238 		p = curp;
239 	else
240 		p = pfind(uap->pid);
241 
242 	if (p == 0)
243 		return (ESRCH);
244 
245 	switch (uap->function) {
246 	case RTP_LOOKUP:
247 		return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio)));
248 	case RTP_SET:
249 		if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
250 		    pcred->pc_ucred->cr_uid != p->p_ucred->cr_uid &&
251 		    pcred->p_ruid != p->p_ucred->cr_uid)
252 		        return (EPERM);
253 		/* disallow setting rtprio in most cases if not superuser */
254 		if (suser(curp)) {
255 			/* can't set someone else's */
256 			if (uap->pid)
257 				return (EPERM);
258 			/* can't set realtime priority */
259 /*
260  * Realtime priority has to be restricted for reasons which should be
261  * obvious. However, for idle priority, there is a potential for
262  * system deadlock if an idleprio process gains a lock on a resource
263  * that other processes need (and the idleprio process can't run
264  * due to a CPU-bound normal process). Fix me! XXX
265  */
266 #if 0
267  			if (RTP_PRIO_IS_REALTIME(rtp.type))
268 #endif
269 			if (rtp.type != RTP_PRIO_NORMAL)
270 				return (EPERM);
271 		}
272 		switch (rtp.type) {
273 #ifdef RTP_PRIO_FIFO
274 		case RTP_PRIO_FIFO:
275 #endif
276 		case RTP_PRIO_REALTIME:
277 		case RTP_PRIO_NORMAL:
278 		case RTP_PRIO_IDLE:
279 			if (rtp.prio > RTP_PRIO_MAX)
280 				return (EINVAL);
281 			p->p_rtprio = rtp;
282 			return (0);
283 		default:
284 			return (EINVAL);
285 		}
286 
287 	default:
288 		return (EINVAL);
289 	}
290 }
291 
292 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
293 #ifndef _SYS_SYSPROTO_H_
294 struct osetrlimit_args {
295 	u_int	which;
296 	struct	orlimit *rlp;
297 };
298 #endif
299 /* ARGSUSED */
300 int
301 osetrlimit(p, uap)
302 	struct proc *p;
303 	register struct osetrlimit_args *uap;
304 {
305 	struct orlimit olim;
306 	struct rlimit lim;
307 	int error;
308 
309 	if ((error =
310 	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
311 		return (error);
312 	lim.rlim_cur = olim.rlim_cur;
313 	lim.rlim_max = olim.rlim_max;
314 	return (dosetrlimit(p, uap->which, &lim));
315 }
316 
317 #ifndef _SYS_SYSPROTO_H_
318 struct ogetrlimit_args {
319 	u_int	which;
320 	struct	orlimit *rlp;
321 };
322 #endif
323 /* ARGSUSED */
324 int
325 ogetrlimit(p, uap)
326 	struct proc *p;
327 	register struct ogetrlimit_args *uap;
328 {
329 	struct orlimit olim;
330 
331 	if (uap->which >= RLIM_NLIMITS)
332 		return (EINVAL);
333 	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
334 	if (olim.rlim_cur == -1)
335 		olim.rlim_cur = 0x7fffffff;
336 	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
337 	if (olim.rlim_max == -1)
338 		olim.rlim_max = 0x7fffffff;
339 	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
340 }
341 #endif /* COMPAT_43 || COMPAT_SUNOS */
342 
343 #ifndef _SYS_SYSPROTO_H_
344 struct __setrlimit_args {
345 	u_int	which;
346 	struct	rlimit *rlp;
347 };
348 #endif
349 /* ARGSUSED */
350 int
351 setrlimit(p, uap)
352 	struct proc *p;
353 	register struct __setrlimit_args *uap;
354 {
355 	struct rlimit alim;
356 	int error;
357 
358 	if ((error =
359 	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
360 		return (error);
361 	return (dosetrlimit(p, uap->which, &alim));
362 }
363 
364 int
365 dosetrlimit(p, which, limp)
366 	struct proc *p;
367 	u_int which;
368 	struct rlimit *limp;
369 {
370 	register struct rlimit *alimp;
371 	int error;
372 
373 	if (which >= RLIM_NLIMITS)
374 		return (EINVAL);
375 	alimp = &p->p_rlimit[which];
376 
377 	/*
378 	 * Preserve historical bugs by treating negative limits as unsigned.
379 	 */
380 	if (limp->rlim_cur < 0)
381 		limp->rlim_cur = RLIM_INFINITY;
382 	if (limp->rlim_max < 0)
383 		limp->rlim_max = RLIM_INFINITY;
384 
385 	if (limp->rlim_cur > alimp->rlim_max ||
386 	    limp->rlim_max > alimp->rlim_max)
387 		if ((error = suser_xxx(0, p, PRISON_ROOT)))
388 			return (error);
389 	if (limp->rlim_cur > limp->rlim_max)
390 		limp->rlim_cur = limp->rlim_max;
391 	if (p->p_limit->p_refcnt > 1 &&
392 	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
393 		p->p_limit->p_refcnt--;
394 		p->p_limit = limcopy(p->p_limit);
395 		alimp = &p->p_rlimit[which];
396 	}
397 
398 	switch (which) {
399 
400 	case RLIMIT_CPU:
401 		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
402 			p->p_limit->p_cpulimit = RLIM_INFINITY;
403 		else
404 			p->p_limit->p_cpulimit =
405 			    (rlim_t)1000000 * limp->rlim_cur;
406 		break;
407 	case RLIMIT_DATA:
408 		if (limp->rlim_cur > MAXDSIZ)
409 			limp->rlim_cur = MAXDSIZ;
410 		if (limp->rlim_max > MAXDSIZ)
411 			limp->rlim_max = MAXDSIZ;
412 		break;
413 
414 	case RLIMIT_STACK:
415 		if (limp->rlim_cur > MAXSSIZ)
416 			limp->rlim_cur = MAXSSIZ;
417 		if (limp->rlim_max > MAXSSIZ)
418 			limp->rlim_max = MAXSSIZ;
419 		/*
420 		 * Stack is allocated to the max at exec time with only
421 		 * "rlim_cur" bytes accessible.  If stack limit is going
422 		 * up make more accessible, if going down make inaccessible.
423 		 */
424 		if (limp->rlim_cur != alimp->rlim_cur) {
425 			vm_offset_t addr;
426 			vm_size_t size;
427 			vm_prot_t prot;
428 
429 			if (limp->rlim_cur > alimp->rlim_cur) {
430 				prot = VM_PROT_ALL;
431 				size = limp->rlim_cur - alimp->rlim_cur;
432 				addr = USRSTACK - limp->rlim_cur;
433 			} else {
434 				prot = VM_PROT_NONE;
435 				size = alimp->rlim_cur - limp->rlim_cur;
436 				addr = USRSTACK - alimp->rlim_cur;
437 			}
438 			addr = trunc_page(addr);
439 			size = round_page(size);
440 			(void) vm_map_protect(&p->p_vmspace->vm_map,
441 					      addr, addr+size, prot, FALSE);
442 		}
443 		break;
444 
445 	case RLIMIT_NOFILE:
446 		if (limp->rlim_cur > maxfilesperproc)
447 			limp->rlim_cur = maxfilesperproc;
448 		if (limp->rlim_max > maxfilesperproc)
449 			limp->rlim_max = maxfilesperproc;
450 		break;
451 
452 	case RLIMIT_NPROC:
453 		if (limp->rlim_cur > maxprocperuid)
454 			limp->rlim_cur = maxprocperuid;
455 		if (limp->rlim_max > maxprocperuid)
456 			limp->rlim_max = maxprocperuid;
457 		break;
458 	}
459 	*alimp = *limp;
460 	return (0);
461 }
462 
463 #ifndef _SYS_SYSPROTO_H_
464 struct __getrlimit_args {
465 	u_int	which;
466 	struct	rlimit *rlp;
467 };
468 #endif
469 /* ARGSUSED */
470 int
471 getrlimit(p, uap)
472 	struct proc *p;
473 	register struct __getrlimit_args *uap;
474 {
475 
476 	if (uap->which >= RLIM_NLIMITS)
477 		return (EINVAL);
478 	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
479 	    sizeof (struct rlimit)));
480 }
481 
482 /*
483  * Transform the running time and tick information in proc p into user,
484  * system, and interrupt time usage.
485  */
486 void
487 calcru(p, up, sp, ip)
488 	struct proc *p;
489 	struct timeval *up;
490 	struct timeval *sp;
491 	struct timeval *ip;
492 {
493 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
494 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
495 	int s;
496 	struct timeval tv;
497 
498 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
499 	s = splstatclock();
500 	ut = p->p_uticks;
501 	st = p->p_sticks;
502 	it = p->p_iticks;
503 	splx(s);
504 
505 	tt = ut + st + it;
506 	if (tt == 0) {
507 		st = 1;
508 		tt = 1;
509 	}
510 
511 	tu = p->p_runtime;
512 	if (p == curproc) {
513 		/*
514 		 * Adjust for the current time slice.  This is actually fairly
515 		 * important since the error here is on the order of a time
516 		 * quantum, which is much greater than the sampling error.
517 		 */
518 		microuptime(&tv);
519 		if (timevalcmp(&tv, &switchtime, <))
520 			printf("microuptime() went backwards (%ld.%06ld -> %ld,%06ld)\n",
521 			    switchtime.tv_sec, switchtime.tv_usec,
522 			    tv.tv_sec, tv.tv_usec);
523 		else
524 			tu += (tv.tv_usec - switchtime.tv_usec) +
525 			    (tv.tv_sec - switchtime.tv_sec) * (int64_t)1000000;
526 	}
527 	ptu = p->p_uu + p->p_su + p->p_iu;
528 	if (tu < ptu || (int64_t)tu < 0) {
529 		/* XXX no %qd in kernel.  Truncate. */
530 		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
531 		       (long)tu, p->p_pid, p->p_comm);
532 		tu = ptu;
533 	}
534 
535 	/* Subdivide tu. */
536 	uu = (tu * ut) / tt;
537 	su = (tu * st) / tt;
538 	iu = tu - uu - su;
539 
540 	/* Enforce monotonicity. */
541 	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
542 		if (uu < p->p_uu)
543 			uu = p->p_uu;
544 		else if (uu + p->p_su + p->p_iu > tu)
545 			uu = tu - p->p_su - p->p_iu;
546 		if (st == 0)
547 			su = p->p_su;
548 		else {
549 			su = ((tu - uu) * st) / (st + it);
550 			if (su < p->p_su)
551 				su = p->p_su;
552 			else if (uu + su + p->p_iu > tu)
553 				su = tu - uu - p->p_iu;
554 		}
555 		KASSERT(uu + su + p->p_iu <= tu,
556 		    ("calcru: monotonisation botch 1"));
557 		iu = tu - uu - su;
558 		KASSERT(iu >= p->p_iu,
559 		    ("calcru: monotonisation botch 2"));
560 	}
561 	p->p_uu = uu;
562 	p->p_su = su;
563 	p->p_iu = iu;
564 
565 	up->tv_sec = uu / 1000000;
566 	up->tv_usec = uu % 1000000;
567 	sp->tv_sec = su / 1000000;
568 	sp->tv_usec = su % 1000000;
569 	if (ip != NULL) {
570 		ip->tv_sec = iu / 1000000;
571 		ip->tv_usec = iu % 1000000;
572 	}
573 }
574 
575 #ifndef _SYS_SYSPROTO_H_
576 struct getrusage_args {
577 	int	who;
578 	struct	rusage *rusage;
579 };
580 #endif
581 /* ARGSUSED */
582 int
583 getrusage(p, uap)
584 	register struct proc *p;
585 	register struct getrusage_args *uap;
586 {
587 	register struct rusage *rup;
588 
589 	switch (uap->who) {
590 
591 	case RUSAGE_SELF:
592 		rup = &p->p_stats->p_ru;
593 		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
594 		break;
595 
596 	case RUSAGE_CHILDREN:
597 		rup = &p->p_stats->p_cru;
598 		break;
599 
600 	default:
601 		return (EINVAL);
602 	}
603 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
604 	    sizeof (struct rusage)));
605 }
606 
607 void
608 ruadd(ru, ru2)
609 	register struct rusage *ru, *ru2;
610 {
611 	register long *ip, *ip2;
612 	register int i;
613 
614 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
615 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
616 	if (ru->ru_maxrss < ru2->ru_maxrss)
617 		ru->ru_maxrss = ru2->ru_maxrss;
618 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
619 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
620 		*ip++ += *ip2++;
621 }
622 
623 /*
624  * Make a copy of the plimit structure.
625  * We share these structures copy-on-write after fork,
626  * and copy when a limit is changed.
627  */
628 struct plimit *
629 limcopy(lim)
630 	struct plimit *lim;
631 {
632 	register struct plimit *copy;
633 
634 	MALLOC(copy, struct plimit *, sizeof(struct plimit),
635 	    M_SUBPROC, M_WAITOK);
636 	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
637 	copy->p_lflags = 0;
638 	copy->p_refcnt = 1;
639 	return (copy);
640 }
641