xref: /freebsd/sys/kern/kern_resource.c (revision b601c69bdbe8755d26570261d7fd4c02ee4eff74)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 #include "opt_rlimit.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/resourcevar.h>
50 #include <sys/malloc.h>
51 #include <sys/proc.h>
52 #include <sys/time.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <sys/lock.h>
57 #include <vm/pmap.h>
58 #include <vm/vm_map.h>
59 
60 static int donice __P((struct proc *curp, struct proc *chgp, int n));
61 /* dosetrlimit non-static:  Needed by SysVR4 emulator */
62 int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
63 
64 /*
65  * Resource controls and accounting.
66  */
67 
68 #ifndef _SYS_SYSPROTO_H_
69 struct getpriority_args {
70 	int	which;
71 	int	who;
72 };
73 #endif
74 int
75 getpriority(curp, uap)
76 	struct proc *curp;
77 	register struct getpriority_args *uap;
78 {
79 	register struct proc *p;
80 	register int low = PRIO_MAX + 1;
81 
82 	switch (uap->which) {
83 
84 	case PRIO_PROCESS:
85 		if (uap->who == 0)
86 			p = curp;
87 		else
88 			p = pfind(uap->who);
89 		if (p == 0)
90 			break;
91 		if (!PRISON_CHECK(curp, p))
92 			break;
93 		low = p->p_nice;
94 		break;
95 
96 	case PRIO_PGRP: {
97 		register struct pgrp *pg;
98 
99 		if (uap->who == 0)
100 			pg = curp->p_pgrp;
101 		else if ((pg = pgfind(uap->who)) == NULL)
102 			break;
103 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
104 			if ((PRISON_CHECK(curp, p) && p->p_nice < low))
105 				low = p->p_nice;
106 		}
107 		break;
108 	}
109 
110 	case PRIO_USER:
111 		if (uap->who == 0)
112 			uap->who = curp->p_ucred->cr_uid;
113 		LIST_FOREACH(p, &allproc, p_list)
114 			if (PRISON_CHECK(curp, p) &&
115 			    p->p_ucred->cr_uid == uap->who &&
116 			    p->p_nice < low)
117 				low = p->p_nice;
118 		break;
119 
120 	default:
121 		return (EINVAL);
122 	}
123 	if (low == PRIO_MAX + 1)
124 		return (ESRCH);
125 	curp->p_retval[0] = low;
126 	return (0);
127 }
128 
129 #ifndef _SYS_SYSPROTO_H_
130 struct setpriority_args {
131 	int	which;
132 	int	who;
133 	int	prio;
134 };
135 #endif
136 /* ARGSUSED */
137 int
138 setpriority(curp, uap)
139 	struct proc *curp;
140 	register struct setpriority_args *uap;
141 {
142 	register struct proc *p;
143 	int found = 0, error = 0;
144 
145 	switch (uap->which) {
146 
147 	case PRIO_PROCESS:
148 		if (uap->who == 0)
149 			p = curp;
150 		else
151 			p = pfind(uap->who);
152 		if (p == 0)
153 			break;
154 		if (!PRISON_CHECK(curp, p))
155 			break;
156 		error = donice(curp, p, uap->prio);
157 		found++;
158 		break;
159 
160 	case PRIO_PGRP: {
161 		register struct pgrp *pg;
162 
163 		if (uap->who == 0)
164 			pg = curp->p_pgrp;
165 		else if ((pg = pgfind(uap->who)) == NULL)
166 			break;
167 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
168 			if (PRISON_CHECK(curp, p)) {
169 				error = donice(curp, p, uap->prio);
170 				found++;
171 			}
172 		}
173 		break;
174 	}
175 
176 	case PRIO_USER:
177 		if (uap->who == 0)
178 			uap->who = curp->p_ucred->cr_uid;
179 		LIST_FOREACH(p, &allproc, p_list)
180 			if (p->p_ucred->cr_uid == uap->who &&
181 			    PRISON_CHECK(curp, p)) {
182 				error = donice(curp, p, uap->prio);
183 				found++;
184 			}
185 		break;
186 
187 	default:
188 		return (EINVAL);
189 	}
190 	if (found == 0)
191 		return (ESRCH);
192 	return (error);
193 }
194 
195 static int
196 donice(curp, chgp, n)
197 	register struct proc *curp, *chgp;
198 	register int n;
199 {
200 	register struct pcred *pcred = curp->p_cred;
201 
202 	if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
203 	    pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
204 	    pcred->p_ruid != chgp->p_ucred->cr_uid)
205 		return (EPERM);
206 	if (n > PRIO_MAX)
207 		n = PRIO_MAX;
208 	if (n < PRIO_MIN)
209 		n = PRIO_MIN;
210 	if (n < chgp->p_nice && suser(curp))
211 		return (EACCES);
212 	chgp->p_nice = n;
213 	(void)resetpriority(chgp);
214 	return (0);
215 }
216 
217 /* rtprio system call */
218 #ifndef _SYS_SYSPROTO_H_
219 struct rtprio_args {
220 	int		function;
221 	pid_t		pid;
222 	struct rtprio	*rtp;
223 };
224 #endif
225 
226 /*
227  * Set realtime priority
228  */
229 
230 /* ARGSUSED */
231 int
232 rtprio(curp, uap)
233 	struct proc *curp;
234 	register struct rtprio_args *uap;
235 {
236 	register struct proc *p;
237 	register struct pcred *pcred = curp->p_cred;
238 	struct rtprio rtp;
239 	int error;
240 
241 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
242 	if (error)
243 		return (error);
244 
245 	if (uap->pid == 0)
246 		p = curp;
247 	else
248 		p = pfind(uap->pid);
249 
250 	if (p == 0)
251 		return (ESRCH);
252 
253 	switch (uap->function) {
254 	case RTP_LOOKUP:
255 		return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio)));
256 	case RTP_SET:
257 		if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
258 		    pcred->pc_ucred->cr_uid != p->p_ucred->cr_uid &&
259 		    pcred->p_ruid != p->p_ucred->cr_uid)
260 		        return (EPERM);
261 		/* disallow setting rtprio in most cases if not superuser */
262 		if (suser(curp)) {
263 			/* can't set someone else's */
264 			if (uap->pid)
265 				return (EPERM);
266 			/* can't set realtime priority */
267 /*
268  * Realtime priority has to be restricted for reasons which should be
269  * obvious. However, for idle priority, there is a potential for
270  * system deadlock if an idleprio process gains a lock on a resource
271  * that other processes need (and the idleprio process can't run
272  * due to a CPU-bound normal process). Fix me! XXX
273  */
274 #if 0
275  			if (RTP_PRIO_IS_REALTIME(rtp.type))
276 #endif
277 			if (rtp.type != RTP_PRIO_NORMAL)
278 				return (EPERM);
279 		}
280 		switch (rtp.type) {
281 #ifdef RTP_PRIO_FIFO
282 		case RTP_PRIO_FIFO:
283 #endif
284 		case RTP_PRIO_REALTIME:
285 		case RTP_PRIO_NORMAL:
286 		case RTP_PRIO_IDLE:
287 			if (rtp.prio > RTP_PRIO_MAX)
288 				return (EINVAL);
289 			p->p_rtprio = rtp;
290 			return (0);
291 		default:
292 			return (EINVAL);
293 		}
294 
295 	default:
296 		return (EINVAL);
297 	}
298 }
299 
300 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
301 #ifndef _SYS_SYSPROTO_H_
302 struct osetrlimit_args {
303 	u_int	which;
304 	struct	orlimit *rlp;
305 };
306 #endif
307 /* ARGSUSED */
308 int
309 osetrlimit(p, uap)
310 	struct proc *p;
311 	register struct osetrlimit_args *uap;
312 {
313 	struct orlimit olim;
314 	struct rlimit lim;
315 	int error;
316 
317 	if ((error =
318 	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
319 		return (error);
320 	lim.rlim_cur = olim.rlim_cur;
321 	lim.rlim_max = olim.rlim_max;
322 	return (dosetrlimit(p, uap->which, &lim));
323 }
324 
325 #ifndef _SYS_SYSPROTO_H_
326 struct ogetrlimit_args {
327 	u_int	which;
328 	struct	orlimit *rlp;
329 };
330 #endif
331 /* ARGSUSED */
332 int
333 ogetrlimit(p, uap)
334 	struct proc *p;
335 	register struct ogetrlimit_args *uap;
336 {
337 	struct orlimit olim;
338 
339 	if (uap->which >= RLIM_NLIMITS)
340 		return (EINVAL);
341 	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
342 	if (olim.rlim_cur == -1)
343 		olim.rlim_cur = 0x7fffffff;
344 	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
345 	if (olim.rlim_max == -1)
346 		olim.rlim_max = 0x7fffffff;
347 	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
348 }
349 #endif /* COMPAT_43 || COMPAT_SUNOS */
350 
351 #ifndef _SYS_SYSPROTO_H_
352 struct __setrlimit_args {
353 	u_int	which;
354 	struct	rlimit *rlp;
355 };
356 #endif
357 /* ARGSUSED */
358 int
359 setrlimit(p, uap)
360 	struct proc *p;
361 	register struct __setrlimit_args *uap;
362 {
363 	struct rlimit alim;
364 	int error;
365 
366 	if ((error =
367 	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
368 		return (error);
369 	return (dosetrlimit(p, uap->which, &alim));
370 }
371 
372 int
373 dosetrlimit(p, which, limp)
374 	struct proc *p;
375 	u_int which;
376 	struct rlimit *limp;
377 {
378 	register struct rlimit *alimp;
379 	int error;
380 
381 	if (which >= RLIM_NLIMITS)
382 		return (EINVAL);
383 	alimp = &p->p_rlimit[which];
384 
385 	/*
386 	 * Preserve historical bugs by treating negative limits as unsigned.
387 	 */
388 	if (limp->rlim_cur < 0)
389 		limp->rlim_cur = RLIM_INFINITY;
390 	if (limp->rlim_max < 0)
391 		limp->rlim_max = RLIM_INFINITY;
392 
393 	if (limp->rlim_cur > alimp->rlim_max ||
394 	    limp->rlim_max > alimp->rlim_max)
395 		if ((error = suser_xxx(0, p, PRISON_ROOT)))
396 			return (error);
397 	if (limp->rlim_cur > limp->rlim_max)
398 		limp->rlim_cur = limp->rlim_max;
399 	if (p->p_limit->p_refcnt > 1 &&
400 	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
401 		p->p_limit->p_refcnt--;
402 		p->p_limit = limcopy(p->p_limit);
403 		alimp = &p->p_rlimit[which];
404 	}
405 
406 	switch (which) {
407 
408 	case RLIMIT_CPU:
409 		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
410 			p->p_limit->p_cpulimit = RLIM_INFINITY;
411 		else
412 			p->p_limit->p_cpulimit =
413 			    (rlim_t)1000000 * limp->rlim_cur;
414 		break;
415 	case RLIMIT_DATA:
416 		if (limp->rlim_cur > MAXDSIZ)
417 			limp->rlim_cur = MAXDSIZ;
418 		if (limp->rlim_max > MAXDSIZ)
419 			limp->rlim_max = MAXDSIZ;
420 		break;
421 
422 	case RLIMIT_STACK:
423 		if (limp->rlim_cur > MAXSSIZ)
424 			limp->rlim_cur = MAXSSIZ;
425 		if (limp->rlim_max > MAXSSIZ)
426 			limp->rlim_max = MAXSSIZ;
427 		/*
428 		 * Stack is allocated to the max at exec time with only
429 		 * "rlim_cur" bytes accessible.  If stack limit is going
430 		 * up make more accessible, if going down make inaccessible.
431 		 */
432 		if (limp->rlim_cur != alimp->rlim_cur) {
433 			vm_offset_t addr;
434 			vm_size_t size;
435 			vm_prot_t prot;
436 
437 			if (limp->rlim_cur > alimp->rlim_cur) {
438 				prot = VM_PROT_ALL;
439 				size = limp->rlim_cur - alimp->rlim_cur;
440 				addr = USRSTACK - limp->rlim_cur;
441 			} else {
442 				prot = VM_PROT_NONE;
443 				size = alimp->rlim_cur - limp->rlim_cur;
444 				addr = USRSTACK - alimp->rlim_cur;
445 			}
446 			addr = trunc_page(addr);
447 			size = round_page(size);
448 			(void) vm_map_protect(&p->p_vmspace->vm_map,
449 					      addr, addr+size, prot, FALSE);
450 		}
451 		break;
452 
453 	case RLIMIT_NOFILE:
454 		if (limp->rlim_cur > maxfilesperproc)
455 			limp->rlim_cur = maxfilesperproc;
456 		if (limp->rlim_max > maxfilesperproc)
457 			limp->rlim_max = maxfilesperproc;
458 		break;
459 
460 	case RLIMIT_NPROC:
461 		if (limp->rlim_cur > maxprocperuid)
462 			limp->rlim_cur = maxprocperuid;
463 		if (limp->rlim_max > maxprocperuid)
464 			limp->rlim_max = maxprocperuid;
465 		break;
466 	}
467 	*alimp = *limp;
468 	return (0);
469 }
470 
471 #ifndef _SYS_SYSPROTO_H_
472 struct __getrlimit_args {
473 	u_int	which;
474 	struct	rlimit *rlp;
475 };
476 #endif
477 /* ARGSUSED */
478 int
479 getrlimit(p, uap)
480 	struct proc *p;
481 	register struct __getrlimit_args *uap;
482 {
483 
484 	if (uap->which >= RLIM_NLIMITS)
485 		return (EINVAL);
486 	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
487 	    sizeof (struct rlimit)));
488 }
489 
490 /*
491  * Transform the running time and tick information in proc p into user,
492  * system, and interrupt time usage.
493  */
494 void
495 calcru(p, up, sp, ip)
496 	struct proc *p;
497 	struct timeval *up;
498 	struct timeval *sp;
499 	struct timeval *ip;
500 {
501 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
502 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
503 	int s;
504 	struct timeval tv;
505 
506 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
507 	s = splstatclock();
508 	ut = p->p_uticks;
509 	st = p->p_sticks;
510 	it = p->p_iticks;
511 	splx(s);
512 
513 	tt = ut + st + it;
514 	if (tt == 0) {
515 		st = 1;
516 		tt = 1;
517 	}
518 
519 	tu = p->p_runtime;
520 	if (p == curproc) {
521 		/*
522 		 * Adjust for the current time slice.  This is actually fairly
523 		 * important since the error here is on the order of a time
524 		 * quantum, which is much greater than the sampling error.
525 		 */
526 		microuptime(&tv);
527 		if (timevalcmp(&tv, &switchtime, <))
528 			printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
529 			    switchtime.tv_sec, switchtime.tv_usec,
530 			    tv.tv_sec, tv.tv_usec);
531 		else
532 			tu += (tv.tv_usec - switchtime.tv_usec) +
533 			    (tv.tv_sec - switchtime.tv_sec) * (int64_t)1000000;
534 	}
535 	ptu = p->p_uu + p->p_su + p->p_iu;
536 	if (tu < ptu || (int64_t)tu < 0) {
537 		/* XXX no %qd in kernel.  Truncate. */
538 		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
539 		       (long)tu, p->p_pid, p->p_comm);
540 		tu = ptu;
541 	}
542 
543 	/* Subdivide tu. */
544 	uu = (tu * ut) / tt;
545 	su = (tu * st) / tt;
546 	iu = tu - uu - su;
547 
548 	/* Enforce monotonicity. */
549 	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
550 		if (uu < p->p_uu)
551 			uu = p->p_uu;
552 		else if (uu + p->p_su + p->p_iu > tu)
553 			uu = tu - p->p_su - p->p_iu;
554 		if (st == 0)
555 			su = p->p_su;
556 		else {
557 			su = ((tu - uu) * st) / (st + it);
558 			if (su < p->p_su)
559 				su = p->p_su;
560 			else if (uu + su + p->p_iu > tu)
561 				su = tu - uu - p->p_iu;
562 		}
563 		KASSERT(uu + su + p->p_iu <= tu,
564 		    ("calcru: monotonisation botch 1"));
565 		iu = tu - uu - su;
566 		KASSERT(iu >= p->p_iu,
567 		    ("calcru: monotonisation botch 2"));
568 	}
569 	p->p_uu = uu;
570 	p->p_su = su;
571 	p->p_iu = iu;
572 
573 	up->tv_sec = uu / 1000000;
574 	up->tv_usec = uu % 1000000;
575 	sp->tv_sec = su / 1000000;
576 	sp->tv_usec = su % 1000000;
577 	if (ip != NULL) {
578 		ip->tv_sec = iu / 1000000;
579 		ip->tv_usec = iu % 1000000;
580 	}
581 }
582 
583 #ifndef _SYS_SYSPROTO_H_
584 struct getrusage_args {
585 	int	who;
586 	struct	rusage *rusage;
587 };
588 #endif
589 /* ARGSUSED */
590 int
591 getrusage(p, uap)
592 	register struct proc *p;
593 	register struct getrusage_args *uap;
594 {
595 	register struct rusage *rup;
596 
597 	switch (uap->who) {
598 
599 	case RUSAGE_SELF:
600 		rup = &p->p_stats->p_ru;
601 		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
602 		break;
603 
604 	case RUSAGE_CHILDREN:
605 		rup = &p->p_stats->p_cru;
606 		break;
607 
608 	default:
609 		return (EINVAL);
610 	}
611 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
612 	    sizeof (struct rusage)));
613 }
614 
615 void
616 ruadd(ru, ru2)
617 	register struct rusage *ru, *ru2;
618 {
619 	register long *ip, *ip2;
620 	register int i;
621 
622 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
623 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
624 	if (ru->ru_maxrss < ru2->ru_maxrss)
625 		ru->ru_maxrss = ru2->ru_maxrss;
626 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
627 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
628 		*ip++ += *ip2++;
629 }
630 
631 /*
632  * Make a copy of the plimit structure.
633  * We share these structures copy-on-write after fork,
634  * and copy when a limit is changed.
635  */
636 struct plimit *
637 limcopy(lim)
638 	struct plimit *lim;
639 {
640 	register struct plimit *copy;
641 
642 	MALLOC(copy, struct plimit *, sizeof(struct plimit),
643 	    M_SUBPROC, M_WAITOK);
644 	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
645 	copy->p_lflags = 0;
646 	copy->p_refcnt = 1;
647 	return (copy);
648 }
649