xref: /freebsd/sys/kern/kern_resource.c (revision 2cdbd5eec4e32beddb3adcca014dda56debc6f5b)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 #include "opt_rlimit.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/resourcevar.h>
50 #include <sys/malloc.h>
51 #include <sys/proc.h>
52 #include <sys/time.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <sys/lock.h>
57 #include <vm/pmap.h>
58 #include <vm/vm_map.h>
59 
60 static int donice __P((struct proc *curp, struct proc *chgp, int n));
61 /* dosetrlimit non-static:  Needed by SysVR4 emulator */
62 int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
63 
64 /*
65  * Resource controls and accounting.
66  */
67 
68 #ifndef _SYS_SYSPROTO_H_
69 struct getpriority_args {
70 	int	which;
71 	int	who;
72 };
73 #endif
74 int
75 getpriority(curp, uap)
76 	struct proc *curp;
77 	register struct getpriority_args *uap;
78 {
79 	register struct proc *p;
80 	register int low = PRIO_MAX + 1;
81 
82 	switch (uap->which) {
83 
84 	case PRIO_PROCESS:
85 		if (uap->who == 0)
86 			p = curp;
87 		else
88 			p = pfind(uap->who);
89 		if (p == 0)
90 			break;
91 		if (!PRISON_CHECK(curp, p))
92 			break;
93 		low = p->p_nice;
94 		break;
95 
96 	case PRIO_PGRP: {
97 		register struct pgrp *pg;
98 
99 		if (uap->who == 0)
100 			pg = curp->p_pgrp;
101 		else if ((pg = pgfind(uap->who)) == NULL)
102 			break;
103 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
104 			if ((PRISON_CHECK(curp, p) && p->p_nice < low))
105 				low = p->p_nice;
106 		}
107 		break;
108 	}
109 
110 	case PRIO_USER:
111 		if (uap->who == 0)
112 			uap->who = curp->p_ucred->cr_uid;
113 		LIST_FOREACH(p, &allproc, p_list)
114 			if (PRISON_CHECK(curp, p) &&
115 			    p->p_ucred->cr_uid == uap->who &&
116 			    p->p_nice < low)
117 				low = p->p_nice;
118 		break;
119 
120 	default:
121 		return (EINVAL);
122 	}
123 	if (low == PRIO_MAX + 1)
124 		return (ESRCH);
125 	curp->p_retval[0] = low;
126 	return (0);
127 }
128 
129 #ifndef _SYS_SYSPROTO_H_
130 struct setpriority_args {
131 	int	which;
132 	int	who;
133 	int	prio;
134 };
135 #endif
136 /* ARGSUSED */
137 int
138 setpriority(curp, uap)
139 	struct proc *curp;
140 	register struct setpriority_args *uap;
141 {
142 	register struct proc *p;
143 	int found = 0, error = 0;
144 
145 	switch (uap->which) {
146 
147 	case PRIO_PROCESS:
148 		if (uap->who == 0)
149 			p = curp;
150 		else
151 			p = pfind(uap->who);
152 		if (p == 0)
153 			break;
154 		if (!PRISON_CHECK(curp, p))
155 			break;
156 		error = donice(curp, p, uap->prio);
157 		found++;
158 		break;
159 
160 	case PRIO_PGRP: {
161 		register struct pgrp *pg;
162 
163 		if (uap->who == 0)
164 			pg = curp->p_pgrp;
165 		else if ((pg = pgfind(uap->who)) == NULL)
166 			break;
167 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
168 			if (PRISON_CHECK(curp, p)) {
169 				error = donice(curp, p, uap->prio);
170 				found++;
171 			}
172 		}
173 		break;
174 	}
175 
176 	case PRIO_USER:
177 		if (uap->who == 0)
178 			uap->who = curp->p_ucred->cr_uid;
179 		LIST_FOREACH(p, &allproc, p_list)
180 			if (p->p_ucred->cr_uid == uap->who &&
181 			    PRISON_CHECK(curp, p)) {
182 				error = donice(curp, p, uap->prio);
183 				found++;
184 			}
185 		break;
186 
187 	default:
188 		return (EINVAL);
189 	}
190 	if (found == 0)
191 		return (ESRCH);
192 	return (error);
193 }
194 
195 static int
196 donice(curp, chgp, n)
197 	register struct proc *curp, *chgp;
198 	register int n;
199 {
200 
201 	if (p_trespass(curp, chgp) != 0)
202 		return (EPERM);
203 	if (n > PRIO_MAX)
204 		n = PRIO_MAX;
205 	if (n < PRIO_MIN)
206 		n = PRIO_MIN;
207 	if (n < chgp->p_nice && suser(curp))
208 		return (EACCES);
209 	chgp->p_nice = n;
210 	(void)resetpriority(chgp);
211 	return (0);
212 }
213 
214 /* rtprio system call */
215 #ifndef _SYS_SYSPROTO_H_
216 struct rtprio_args {
217 	int		function;
218 	pid_t		pid;
219 	struct rtprio	*rtp;
220 };
221 #endif
222 
223 /*
224  * Set realtime priority
225  */
226 
227 /* ARGSUSED */
228 int
229 rtprio(curp, uap)
230 	struct proc *curp;
231 	register struct rtprio_args *uap;
232 {
233 	register struct proc *p;
234 	struct rtprio rtp;
235 	int error;
236 
237 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
238 	if (error)
239 		return (error);
240 
241 	if (uap->pid == 0)
242 		p = curp;
243 	else
244 		p = pfind(uap->pid);
245 
246 	if (p == 0)
247 		return (ESRCH);
248 
249 	switch (uap->function) {
250 	case RTP_LOOKUP:
251 		return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio)));
252 	case RTP_SET:
253 		if (p_trespass(curp, p) != 0)
254 		        return (EPERM);
255 		/* disallow setting rtprio in most cases if not superuser */
256 		if (suser_xxx(NULL, curp, PRISON_ROOT) != 0) {
257 			/* can't set someone else's */
258 			if (uap->pid)
259 				return (EPERM);
260 			/* can't set realtime priority */
261 /*
262  * Realtime priority has to be restricted for reasons which should be
263  * obvious. However, for idle priority, there is a potential for
264  * system deadlock if an idleprio process gains a lock on a resource
265  * that other processes need (and the idleprio process can't run
266  * due to a CPU-bound normal process). Fix me! XXX
267  */
268 #if 0
269  			if (RTP_PRIO_IS_REALTIME(rtp.type))
270 #endif
271 			if (rtp.type != RTP_PRIO_NORMAL)
272 				return (EPERM);
273 		}
274 		switch (rtp.type) {
275 #ifdef RTP_PRIO_FIFO
276 		case RTP_PRIO_FIFO:
277 #endif
278 		case RTP_PRIO_REALTIME:
279 		case RTP_PRIO_NORMAL:
280 		case RTP_PRIO_IDLE:
281 			if (rtp.prio > RTP_PRIO_MAX)
282 				return (EINVAL);
283 			p->p_rtprio = rtp;
284 			return (0);
285 		default:
286 			return (EINVAL);
287 		}
288 
289 	default:
290 		return (EINVAL);
291 	}
292 }
293 
294 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
295 #ifndef _SYS_SYSPROTO_H_
296 struct osetrlimit_args {
297 	u_int	which;
298 	struct	orlimit *rlp;
299 };
300 #endif
301 /* ARGSUSED */
302 int
303 osetrlimit(p, uap)
304 	struct proc *p;
305 	register struct osetrlimit_args *uap;
306 {
307 	struct orlimit olim;
308 	struct rlimit lim;
309 	int error;
310 
311 	if ((error =
312 	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
313 		return (error);
314 	lim.rlim_cur = olim.rlim_cur;
315 	lim.rlim_max = olim.rlim_max;
316 	return (dosetrlimit(p, uap->which, &lim));
317 }
318 
319 #ifndef _SYS_SYSPROTO_H_
320 struct ogetrlimit_args {
321 	u_int	which;
322 	struct	orlimit *rlp;
323 };
324 #endif
325 /* ARGSUSED */
326 int
327 ogetrlimit(p, uap)
328 	struct proc *p;
329 	register struct ogetrlimit_args *uap;
330 {
331 	struct orlimit olim;
332 
333 	if (uap->which >= RLIM_NLIMITS)
334 		return (EINVAL);
335 	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
336 	if (olim.rlim_cur == -1)
337 		olim.rlim_cur = 0x7fffffff;
338 	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
339 	if (olim.rlim_max == -1)
340 		olim.rlim_max = 0x7fffffff;
341 	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
342 }
343 #endif /* COMPAT_43 || COMPAT_SUNOS */
344 
345 #ifndef _SYS_SYSPROTO_H_
346 struct __setrlimit_args {
347 	u_int	which;
348 	struct	rlimit *rlp;
349 };
350 #endif
351 /* ARGSUSED */
352 int
353 setrlimit(p, uap)
354 	struct proc *p;
355 	register struct __setrlimit_args *uap;
356 {
357 	struct rlimit alim;
358 	int error;
359 
360 	if ((error =
361 	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
362 		return (error);
363 	return (dosetrlimit(p, uap->which, &alim));
364 }
365 
366 int
367 dosetrlimit(p, which, limp)
368 	struct proc *p;
369 	u_int which;
370 	struct rlimit *limp;
371 {
372 	register struct rlimit *alimp;
373 	int error;
374 
375 	if (which >= RLIM_NLIMITS)
376 		return (EINVAL);
377 	alimp = &p->p_rlimit[which];
378 
379 	/*
380 	 * Preserve historical bugs by treating negative limits as unsigned.
381 	 */
382 	if (limp->rlim_cur < 0)
383 		limp->rlim_cur = RLIM_INFINITY;
384 	if (limp->rlim_max < 0)
385 		limp->rlim_max = RLIM_INFINITY;
386 
387 	if (limp->rlim_cur > alimp->rlim_max ||
388 	    limp->rlim_max > alimp->rlim_max)
389 		if ((error = suser_xxx(0, p, PRISON_ROOT)))
390 			return (error);
391 	if (limp->rlim_cur > limp->rlim_max)
392 		limp->rlim_cur = limp->rlim_max;
393 	if (p->p_limit->p_refcnt > 1 &&
394 	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
395 		p->p_limit->p_refcnt--;
396 		p->p_limit = limcopy(p->p_limit);
397 		alimp = &p->p_rlimit[which];
398 	}
399 
400 	switch (which) {
401 
402 	case RLIMIT_CPU:
403 		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
404 			p->p_limit->p_cpulimit = RLIM_INFINITY;
405 		else
406 			p->p_limit->p_cpulimit =
407 			    (rlim_t)1000000 * limp->rlim_cur;
408 		break;
409 	case RLIMIT_DATA:
410 		if (limp->rlim_cur > MAXDSIZ)
411 			limp->rlim_cur = MAXDSIZ;
412 		if (limp->rlim_max > MAXDSIZ)
413 			limp->rlim_max = MAXDSIZ;
414 		break;
415 
416 	case RLIMIT_STACK:
417 		if (limp->rlim_cur > MAXSSIZ)
418 			limp->rlim_cur = MAXSSIZ;
419 		if (limp->rlim_max > MAXSSIZ)
420 			limp->rlim_max = MAXSSIZ;
421 		/*
422 		 * Stack is allocated to the max at exec time with only
423 		 * "rlim_cur" bytes accessible.  If stack limit is going
424 		 * up make more accessible, if going down make inaccessible.
425 		 */
426 		if (limp->rlim_cur != alimp->rlim_cur) {
427 			vm_offset_t addr;
428 			vm_size_t size;
429 			vm_prot_t prot;
430 
431 			if (limp->rlim_cur > alimp->rlim_cur) {
432 				prot = VM_PROT_ALL;
433 				size = limp->rlim_cur - alimp->rlim_cur;
434 				addr = USRSTACK - limp->rlim_cur;
435 			} else {
436 				prot = VM_PROT_NONE;
437 				size = alimp->rlim_cur - limp->rlim_cur;
438 				addr = USRSTACK - alimp->rlim_cur;
439 			}
440 			addr = trunc_page(addr);
441 			size = round_page(size);
442 			(void) vm_map_protect(&p->p_vmspace->vm_map,
443 					      addr, addr+size, prot, FALSE);
444 		}
445 		break;
446 
447 	case RLIMIT_NOFILE:
448 		if (limp->rlim_cur > maxfilesperproc)
449 			limp->rlim_cur = maxfilesperproc;
450 		if (limp->rlim_max > maxfilesperproc)
451 			limp->rlim_max = maxfilesperproc;
452 		break;
453 
454 	case RLIMIT_NPROC:
455 		if (limp->rlim_cur > maxprocperuid)
456 			limp->rlim_cur = maxprocperuid;
457 		if (limp->rlim_max > maxprocperuid)
458 			limp->rlim_max = maxprocperuid;
459 		break;
460 	}
461 	*alimp = *limp;
462 	return (0);
463 }
464 
465 #ifndef _SYS_SYSPROTO_H_
466 struct __getrlimit_args {
467 	u_int	which;
468 	struct	rlimit *rlp;
469 };
470 #endif
471 /* ARGSUSED */
472 int
473 getrlimit(p, uap)
474 	struct proc *p;
475 	register struct __getrlimit_args *uap;
476 {
477 
478 	if (uap->which >= RLIM_NLIMITS)
479 		return (EINVAL);
480 	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
481 	    sizeof (struct rlimit)));
482 }
483 
484 /*
485  * Transform the running time and tick information in proc p into user,
486  * system, and interrupt time usage.
487  */
488 void
489 calcru(p, up, sp, ip)
490 	struct proc *p;
491 	struct timeval *up;
492 	struct timeval *sp;
493 	struct timeval *ip;
494 {
495 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
496 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
497 	int s;
498 	struct timeval tv;
499 
500 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
501 	s = splstatclock();
502 	ut = p->p_uticks;
503 	st = p->p_sticks;
504 	it = p->p_iticks;
505 	splx(s);
506 
507 	tt = ut + st + it;
508 	if (tt == 0) {
509 		st = 1;
510 		tt = 1;
511 	}
512 
513 	tu = p->p_runtime;
514 	if (p == curproc) {
515 		/*
516 		 * Adjust for the current time slice.  This is actually fairly
517 		 * important since the error here is on the order of a time
518 		 * quantum, which is much greater than the sampling error.
519 		 */
520 		microuptime(&tv);
521 		if (timevalcmp(&tv, &switchtime, <))
522 			printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
523 			    switchtime.tv_sec, switchtime.tv_usec,
524 			    tv.tv_sec, tv.tv_usec);
525 		else
526 			tu += (tv.tv_usec - switchtime.tv_usec) +
527 			    (tv.tv_sec - switchtime.tv_sec) * (int64_t)1000000;
528 	}
529 	ptu = p->p_uu + p->p_su + p->p_iu;
530 	if (tu < ptu || (int64_t)tu < 0) {
531 		/* XXX no %qd in kernel.  Truncate. */
532 		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
533 		       (long)tu, p->p_pid, p->p_comm);
534 		tu = ptu;
535 	}
536 
537 	/* Subdivide tu. */
538 	uu = (tu * ut) / tt;
539 	su = (tu * st) / tt;
540 	iu = tu - uu - su;
541 
542 	/* Enforce monotonicity. */
543 	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
544 		if (uu < p->p_uu)
545 			uu = p->p_uu;
546 		else if (uu + p->p_su + p->p_iu > tu)
547 			uu = tu - p->p_su - p->p_iu;
548 		if (st == 0)
549 			su = p->p_su;
550 		else {
551 			su = ((tu - uu) * st) / (st + it);
552 			if (su < p->p_su)
553 				su = p->p_su;
554 			else if (uu + su + p->p_iu > tu)
555 				su = tu - uu - p->p_iu;
556 		}
557 		KASSERT(uu + su + p->p_iu <= tu,
558 		    ("calcru: monotonisation botch 1"));
559 		iu = tu - uu - su;
560 		KASSERT(iu >= p->p_iu,
561 		    ("calcru: monotonisation botch 2"));
562 	}
563 	p->p_uu = uu;
564 	p->p_su = su;
565 	p->p_iu = iu;
566 
567 	up->tv_sec = uu / 1000000;
568 	up->tv_usec = uu % 1000000;
569 	sp->tv_sec = su / 1000000;
570 	sp->tv_usec = su % 1000000;
571 	if (ip != NULL) {
572 		ip->tv_sec = iu / 1000000;
573 		ip->tv_usec = iu % 1000000;
574 	}
575 }
576 
577 #ifndef _SYS_SYSPROTO_H_
578 struct getrusage_args {
579 	int	who;
580 	struct	rusage *rusage;
581 };
582 #endif
583 /* ARGSUSED */
584 int
585 getrusage(p, uap)
586 	register struct proc *p;
587 	register struct getrusage_args *uap;
588 {
589 	register struct rusage *rup;
590 
591 	switch (uap->who) {
592 
593 	case RUSAGE_SELF:
594 		rup = &p->p_stats->p_ru;
595 		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
596 		break;
597 
598 	case RUSAGE_CHILDREN:
599 		rup = &p->p_stats->p_cru;
600 		break;
601 
602 	default:
603 		return (EINVAL);
604 	}
605 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
606 	    sizeof (struct rusage)));
607 }
608 
609 void
610 ruadd(ru, ru2)
611 	register struct rusage *ru, *ru2;
612 {
613 	register long *ip, *ip2;
614 	register int i;
615 
616 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
617 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
618 	if (ru->ru_maxrss < ru2->ru_maxrss)
619 		ru->ru_maxrss = ru2->ru_maxrss;
620 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
621 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
622 		*ip++ += *ip2++;
623 }
624 
625 /*
626  * Make a copy of the plimit structure.
627  * We share these structures copy-on-write after fork,
628  * and copy when a limit is changed.
629  */
630 struct plimit *
631 limcopy(lim)
632 	struct plimit *lim;
633 {
634 	register struct plimit *copy;
635 
636 	MALLOC(copy, struct plimit *, sizeof(struct plimit),
637 	    M_SUBPROC, M_WAITOK);
638 	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
639 	copy->p_lflags = 0;
640 	copy->p_refcnt = 1;
641 	return (copy);
642 }
643