xref: /freebsd/sys/kern/kern_resource.c (revision a14a0223ae1b172e96dd2a1d849e22026a98b692)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 #include "opt_rlimit.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/resourcevar.h>
50 #include <sys/malloc.h>
51 #include <sys/proc.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_param.h>
55 #include <sys/lock.h>
56 #include <vm/pmap.h>
57 #include <vm/vm_map.h>
58 
59 static int donice __P((struct proc *curp, struct proc *chgp, int n));
60 /* dosetrlimit non-static:  Needed by SysVR4 emulator */
61 int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
62 
63 /*
64  * Resource controls and accounting.
65  */
66 
67 #ifndef _SYS_SYSPROTO_H_
68 struct getpriority_args {
69 	int	which;
70 	int	who;
71 };
72 #endif
73 int
74 getpriority(curp, uap)
75 	struct proc *curp;
76 	register struct getpriority_args *uap;
77 {
78 	register struct proc *p;
79 	register int low = PRIO_MAX + 1;
80 
81 	switch (uap->which) {
82 
83 	case PRIO_PROCESS:
84 		if (uap->who == 0)
85 			p = curp;
86 		else
87 			p = pfind(uap->who);
88 		if (p == 0)
89 			break;
90 		low = p->p_nice;
91 		break;
92 
93 	case PRIO_PGRP: {
94 		register struct pgrp *pg;
95 
96 		if (uap->who == 0)
97 			pg = curp->p_pgrp;
98 		else if ((pg = pgfind(uap->who)) == NULL)
99 			break;
100 		for (p = pg->pg_members.lh_first; p != 0;
101 		     p = p->p_pglist.le_next) {
102 			if (p->p_nice < low)
103 				low = p->p_nice;
104 		}
105 		break;
106 	}
107 
108 	case PRIO_USER:
109 		if (uap->who == 0)
110 			uap->who = curp->p_ucred->cr_uid;
111 		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
112 			if (p->p_ucred->cr_uid == uap->who &&
113 			    p->p_nice < low)
114 				low = p->p_nice;
115 		break;
116 
117 	default:
118 		return (EINVAL);
119 	}
120 	if (low == PRIO_MAX + 1)
121 		return (ESRCH);
122 	curp->p_retval[0] = low;
123 	return (0);
124 }
125 
126 #ifndef _SYS_SYSPROTO_H_
127 struct setpriority_args {
128 	int	which;
129 	int	who;
130 	int	prio;
131 };
132 #endif
133 /* ARGSUSED */
134 int
135 setpriority(curp, uap)
136 	struct proc *curp;
137 	register struct setpriority_args *uap;
138 {
139 	register struct proc *p;
140 	int found = 0, error = 0;
141 
142 	switch (uap->which) {
143 
144 	case PRIO_PROCESS:
145 		if (uap->who == 0)
146 			p = curp;
147 		else
148 			p = pfind(uap->who);
149 		if (p == 0)
150 			break;
151 		error = donice(curp, p, uap->prio);
152 		found++;
153 		break;
154 
155 	case PRIO_PGRP: {
156 		register struct pgrp *pg;
157 
158 		if (uap->who == 0)
159 			pg = curp->p_pgrp;
160 		else if ((pg = pgfind(uap->who)) == NULL)
161 			break;
162 		for (p = pg->pg_members.lh_first; p != 0;
163 		    p = p->p_pglist.le_next) {
164 			error = donice(curp, p, uap->prio);
165 			found++;
166 		}
167 		break;
168 	}
169 
170 	case PRIO_USER:
171 		if (uap->who == 0)
172 			uap->who = curp->p_ucred->cr_uid;
173 		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
174 			if (p->p_ucred->cr_uid == uap->who) {
175 				error = donice(curp, p, uap->prio);
176 				found++;
177 			}
178 		break;
179 
180 	default:
181 		return (EINVAL);
182 	}
183 	if (found == 0)
184 		return (ESRCH);
185 	return (error);
186 }
187 
188 static int
189 donice(curp, chgp, n)
190 	register struct proc *curp, *chgp;
191 	register int n;
192 {
193 	register struct pcred *pcred = curp->p_cred;
194 
195 	if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
196 	    pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
197 	    pcred->p_ruid != chgp->p_ucred->cr_uid)
198 		return (EPERM);
199 	if (n > PRIO_MAX)
200 		n = PRIO_MAX;
201 	if (n < PRIO_MIN)
202 		n = PRIO_MIN;
203 	if (n < chgp->p_nice && suser(curp))
204 		return (EACCES);
205 	chgp->p_nice = n;
206 	(void)resetpriority(chgp);
207 	return (0);
208 }
209 
210 /* rtprio system call */
211 #ifndef _SYS_SYSPROTO_H_
212 struct rtprio_args {
213 	int		function;
214 	pid_t		pid;
215 	struct rtprio	*rtp;
216 };
217 #endif
218 
219 /*
220  * Set realtime priority
221  */
222 
223 /* ARGSUSED */
224 int
225 rtprio(curp, uap)
226 	struct proc *curp;
227 	register struct rtprio_args *uap;
228 {
229 	register struct proc *p;
230 	register struct pcred *pcred = curp->p_cred;
231 	struct rtprio rtp;
232 	int error;
233 
234 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
235 	if (error)
236 		return (error);
237 
238 	if (uap->pid == 0)
239 		p = curp;
240 	else
241 		p = pfind(uap->pid);
242 
243 	if (p == 0)
244 		return (ESRCH);
245 
246 	switch (uap->function) {
247 	case RTP_LOOKUP:
248 		return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio)));
249 	case RTP_SET:
250 		if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
251 		    pcred->pc_ucred->cr_uid != p->p_ucred->cr_uid &&
252 		    pcred->p_ruid != p->p_ucred->cr_uid)
253 		        return (EPERM);
254 		/* disallow setting rtprio in most cases if not superuser */
255 		if (suser(curp)) {
256 			/* can't set someone else's */
257 			if (uap->pid)
258 				return (EPERM);
259 			/* can't set realtime priority */
260 /*
261  * Realtime priority has to be restricted for reasons which should be
262  * obvious. However, for idle priority, there is a potential for
263  * system deadlock if an idleprio process gains a lock on a resource
264  * that other processes need (and the idleprio process can't run
265  * due to a CPU-bound normal process). Fix me! XXX
266  */
267 #if 0
268  			if (RTP_PRIO_IS_REALTIME(rtp.type))
269 #endif
270 			if (rtp.type != RTP_PRIO_NORMAL)
271 				return (EPERM);
272 		}
273 		switch (rtp.type) {
274 #ifdef RTP_PRIO_FIFO
275 		case RTP_PRIO_FIFO:
276 #endif
277 		case RTP_PRIO_REALTIME:
278 		case RTP_PRIO_NORMAL:
279 		case RTP_PRIO_IDLE:
280 			if (rtp.prio > RTP_PRIO_MAX)
281 				return (EINVAL);
282 			p->p_rtprio = rtp;
283 			return (0);
284 		default:
285 			return (EINVAL);
286 		}
287 
288 	default:
289 		return (EINVAL);
290 	}
291 }
292 
293 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
294 #ifndef _SYS_SYSPROTO_H_
295 struct osetrlimit_args {
296 	u_int	which;
297 	struct	orlimit *rlp;
298 };
299 #endif
300 /* ARGSUSED */
301 int
302 osetrlimit(p, uap)
303 	struct proc *p;
304 	register struct osetrlimit_args *uap;
305 {
306 	struct orlimit olim;
307 	struct rlimit lim;
308 	int error;
309 
310 	if ((error =
311 	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
312 		return (error);
313 	lim.rlim_cur = olim.rlim_cur;
314 	lim.rlim_max = olim.rlim_max;
315 	return (dosetrlimit(p, uap->which, &lim));
316 }
317 
318 #ifndef _SYS_SYSPROTO_H_
319 struct ogetrlimit_args {
320 	u_int	which;
321 	struct	orlimit *rlp;
322 };
323 #endif
324 /* ARGSUSED */
325 int
326 ogetrlimit(p, uap)
327 	struct proc *p;
328 	register struct ogetrlimit_args *uap;
329 {
330 	struct orlimit olim;
331 
332 	if (uap->which >= RLIM_NLIMITS)
333 		return (EINVAL);
334 	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
335 	if (olim.rlim_cur == -1)
336 		olim.rlim_cur = 0x7fffffff;
337 	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
338 	if (olim.rlim_max == -1)
339 		olim.rlim_max = 0x7fffffff;
340 	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
341 }
342 #endif /* COMPAT_43 || COMPAT_SUNOS */
343 
344 #ifndef _SYS_SYSPROTO_H_
345 struct __setrlimit_args {
346 	u_int	which;
347 	struct	rlimit *rlp;
348 };
349 #endif
350 /* ARGSUSED */
351 int
352 setrlimit(p, uap)
353 	struct proc *p;
354 	register struct __setrlimit_args *uap;
355 {
356 	struct rlimit alim;
357 	int error;
358 
359 	if ((error =
360 	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
361 		return (error);
362 	return (dosetrlimit(p, uap->which, &alim));
363 }
364 
365 int
366 dosetrlimit(p, which, limp)
367 	struct proc *p;
368 	u_int which;
369 	struct rlimit *limp;
370 {
371 	register struct rlimit *alimp;
372 	int error;
373 
374 	if (which >= RLIM_NLIMITS)
375 		return (EINVAL);
376 	alimp = &p->p_rlimit[which];
377 
378 	/*
379 	 * Preserve historical bugs by treating negative limits as unsigned.
380 	 */
381 	if (limp->rlim_cur < 0)
382 		limp->rlim_cur = RLIM_INFINITY;
383 	if (limp->rlim_max < 0)
384 		limp->rlim_max = RLIM_INFINITY;
385 
386 	if (limp->rlim_cur > alimp->rlim_max ||
387 	    limp->rlim_max > alimp->rlim_max)
388 		if ((error = suser_xxx(0, p, PRISON_ROOT)))
389 			return (error);
390 	if (limp->rlim_cur > limp->rlim_max)
391 		limp->rlim_cur = limp->rlim_max;
392 	if (p->p_limit->p_refcnt > 1 &&
393 	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
394 		p->p_limit->p_refcnt--;
395 		p->p_limit = limcopy(p->p_limit);
396 		alimp = &p->p_rlimit[which];
397 	}
398 
399 	switch (which) {
400 
401 	case RLIMIT_CPU:
402 		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
403 			p->p_limit->p_cpulimit = RLIM_INFINITY;
404 		else
405 			p->p_limit->p_cpulimit =
406 			    (rlim_t)1000000 * limp->rlim_cur;
407 		break;
408 	case RLIMIT_DATA:
409 		if (limp->rlim_cur > MAXDSIZ)
410 			limp->rlim_cur = MAXDSIZ;
411 		if (limp->rlim_max > MAXDSIZ)
412 			limp->rlim_max = MAXDSIZ;
413 		break;
414 
415 	case RLIMIT_STACK:
416 		if (limp->rlim_cur > MAXSSIZ)
417 			limp->rlim_cur = MAXSSIZ;
418 		if (limp->rlim_max > MAXSSIZ)
419 			limp->rlim_max = MAXSSIZ;
420 		/*
421 		 * Stack is allocated to the max at exec time with only
422 		 * "rlim_cur" bytes accessible.  If stack limit is going
423 		 * up make more accessible, if going down make inaccessible.
424 		 */
425 		if (limp->rlim_cur != alimp->rlim_cur) {
426 			vm_offset_t addr;
427 			vm_size_t size;
428 			vm_prot_t prot;
429 
430 			if (limp->rlim_cur > alimp->rlim_cur) {
431 				prot = VM_PROT_ALL;
432 				size = limp->rlim_cur - alimp->rlim_cur;
433 				addr = USRSTACK - limp->rlim_cur;
434 			} else {
435 				prot = VM_PROT_NONE;
436 				size = alimp->rlim_cur - limp->rlim_cur;
437 				addr = USRSTACK - alimp->rlim_cur;
438 			}
439 			addr = trunc_page(addr);
440 			size = round_page(size);
441 			(void) vm_map_protect(&p->p_vmspace->vm_map,
442 					      addr, addr+size, prot, FALSE);
443 		}
444 		break;
445 
446 	case RLIMIT_NOFILE:
447 		if (limp->rlim_cur > maxfilesperproc)
448 			limp->rlim_cur = maxfilesperproc;
449 		if (limp->rlim_max > maxfilesperproc)
450 			limp->rlim_max = maxfilesperproc;
451 		break;
452 
453 	case RLIMIT_NPROC:
454 		if (limp->rlim_cur > maxprocperuid)
455 			limp->rlim_cur = maxprocperuid;
456 		if (limp->rlim_max > maxprocperuid)
457 			limp->rlim_max = maxprocperuid;
458 		break;
459 	}
460 	*alimp = *limp;
461 	return (0);
462 }
463 
464 #ifndef _SYS_SYSPROTO_H_
465 struct __getrlimit_args {
466 	u_int	which;
467 	struct	rlimit *rlp;
468 };
469 #endif
470 /* ARGSUSED */
471 int
472 getrlimit(p, uap)
473 	struct proc *p;
474 	register struct __getrlimit_args *uap;
475 {
476 
477 	if (uap->which >= RLIM_NLIMITS)
478 		return (EINVAL);
479 	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
480 	    sizeof (struct rlimit)));
481 }
482 
483 /*
484  * Transform the running time and tick information in proc p into user,
485  * system, and interrupt time usage.
486  */
487 void
488 calcru(p, up, sp, ip)
489 	struct proc *p;
490 	struct timeval *up;
491 	struct timeval *sp;
492 	struct timeval *ip;
493 {
494 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
495 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
496 	int s;
497 	struct timeval tv;
498 
499 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
500 	s = splstatclock();
501 	ut = p->p_uticks;
502 	st = p->p_sticks;
503 	it = p->p_iticks;
504 	splx(s);
505 
506 	tt = ut + st + it;
507 	if (tt == 0) {
508 		st = 1;
509 		tt = 1;
510 	}
511 
512 	tu = p->p_runtime;
513 #ifdef SMP
514 	if (p->p_oncpu != 0xff) {
515 #else
516 	if (p == curproc) {
517 #endif
518 		/*
519 		 * Adjust for the current time slice.  This is actually fairly
520 		 * important since the error here is on the order of a time
521 		 * quantum, which is much greater than the sampling error.
522 		 */
523 		microuptime(&tv);
524 		tu += (tv.tv_usec - switchtime.tv_usec) +
525 		    (tv.tv_sec - switchtime.tv_sec) * (int64_t)1000000;
526 	}
527 	ptu = p->p_stats->p_uu + p->p_stats->p_su + p->p_stats->p_iu;
528 	if (tu < ptu || (int64_t)tu < 0) {
529 		/* XXX no %qd in kernel.  Truncate. */
530 		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
531 		       (long)tu, p->p_pid, p->p_comm);
532 		tu = ptu;
533 	}
534 
535 	/* Subdivide tu. */
536 	uu = (tu * ut) / tt;
537 	su = (tu * st) / tt;
538 	iu = tu - uu - su;
539 
540 	/* Enforce monotonicity. */
541 	if (uu < p->p_stats->p_uu || su < p->p_stats->p_su ||
542 	    iu < p->p_stats->p_iu) {
543 		if (uu < p->p_stats->p_uu)
544 			uu = p->p_stats->p_uu;
545 		else if (uu + p->p_stats->p_su + p->p_stats->p_iu > tu)
546 			uu = tu - p->p_stats->p_su - p->p_stats->p_iu;
547 		if (st == 0)
548 			su = p->p_stats->p_su;
549 		else {
550 			su = ((tu - uu) * st) / (st + it);
551 			if (su < p->p_stats->p_su)
552 				su = p->p_stats->p_su;
553 			else if (uu + su + p->p_stats->p_iu > tu)
554 				su = tu - uu - p->p_stats->p_iu;
555 		}
556 		KASSERT(uu + su + p->p_stats->p_iu <= tu,
557 		    ("calcru: monotonisation botch 1"));
558 		iu = tu - uu - su;
559 		KASSERT(iu >= p->p_stats->p_iu,
560 		    ("calcru: monotonisation botch 2"));
561 	}
562 	p->p_stats->p_uu = uu;
563 	p->p_stats->p_su = su;
564 	p->p_stats->p_iu = iu;
565 
566 	up->tv_sec = uu / 1000000;
567 	up->tv_usec = uu % 1000000;
568 	sp->tv_sec = su / 1000000;
569 	sp->tv_usec = su % 1000000;
570 	if (ip != NULL) {
571 		ip->tv_sec = iu / 1000000;
572 		ip->tv_usec = iu % 1000000;
573 	}
574 }
575 
576 #ifndef _SYS_SYSPROTO_H_
577 struct getrusage_args {
578 	int	who;
579 	struct	rusage *rusage;
580 };
581 #endif
582 /* ARGSUSED */
583 int
584 getrusage(p, uap)
585 	register struct proc *p;
586 	register struct getrusage_args *uap;
587 {
588 	register struct rusage *rup;
589 
590 	switch (uap->who) {
591 
592 	case RUSAGE_SELF:
593 		rup = &p->p_stats->p_ru;
594 		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
595 		break;
596 
597 	case RUSAGE_CHILDREN:
598 		rup = &p->p_stats->p_cru;
599 		break;
600 
601 	default:
602 		return (EINVAL);
603 	}
604 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
605 	    sizeof (struct rusage)));
606 }
607 
608 void
609 ruadd(ru, ru2)
610 	register struct rusage *ru, *ru2;
611 {
612 	register long *ip, *ip2;
613 	register int i;
614 
615 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
616 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
617 	if (ru->ru_maxrss < ru2->ru_maxrss)
618 		ru->ru_maxrss = ru2->ru_maxrss;
619 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
620 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
621 		*ip++ += *ip2++;
622 }
623 
624 /*
625  * Make a copy of the plimit structure.
626  * We share these structures copy-on-write after fork,
627  * and copy when a limit is changed.
628  */
629 struct plimit *
630 limcopy(lim)
631 	struct plimit *lim;
632 {
633 	register struct plimit *copy;
634 
635 	MALLOC(copy, struct plimit *, sizeof(struct plimit),
636 	    M_SUBPROC, M_WAITOK);
637 	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
638 	copy->p_lflags = 0;
639 	copy->p_refcnt = 1;
640 	return (copy);
641 }
642