xref: /freebsd/sys/kern/kern_resource.c (revision 1a2cdef4962b47be5057809ce730a733b7f3c27c)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 #include "opt_rlimit.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/resourcevar.h>
55 #include <sys/sx.h>
56 #include <sys/time.h>
57 
58 #include <vm/vm.h>
59 #include <vm/vm_param.h>
60 #include <vm/pmap.h>
61 #include <vm/vm_map.h>
62 
63 static int donice __P((struct proc *curp, struct proc *chgp, int n));
64 /* dosetrlimit non-static:  Needed by SysVR4 emulator */
65 int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
66 
67 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
68 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
69 static struct mtx uihashtbl_mtx;
70 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
71 static u_long uihash;		/* size of hash table - 1 */
72 
73 static struct uidinfo	*uilookup __P((uid_t uid));
74 
75 /*
76  * Resource controls and accounting.
77  */
78 
79 #ifndef _SYS_SYSPROTO_H_
80 struct getpriority_args {
81 	int	which;
82 	int	who;
83 };
84 #endif
85 int
86 getpriority(curp, uap)
87 	struct proc *curp;
88 	register struct getpriority_args *uap;
89 {
90 	register struct proc *p;
91 	register int low = PRIO_MAX + 1;
92 
93 	switch (uap->which) {
94 
95 	case PRIO_PROCESS:
96 		if (uap->who == 0)
97 			p = curp;
98 		else
99 			p = pfind(uap->who);
100 		if (p == 0)
101 			break;
102 		if (p_can(curp, p, P_CAN_SEE, NULL))
103 			break;
104 		low = p->p_nice;
105 		break;
106 
107 	case PRIO_PGRP: {
108 		register struct pgrp *pg;
109 
110 		if (uap->who == 0)
111 			pg = curp->p_pgrp;
112 		else if ((pg = pgfind(uap->who)) == NULL)
113 			break;
114 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
115 			if (!p_can(curp, p, P_CAN_SEE, NULL) && p->p_nice < low)
116 				low = p->p_nice;
117 		}
118 		break;
119 	}
120 
121 	case PRIO_USER:
122 		if (uap->who == 0)
123 			uap->who = curp->p_ucred->cr_uid;
124 		sx_slock(&allproc_lock);
125 		LIST_FOREACH(p, &allproc, p_list)
126 			if (!p_can(curp, p, P_CAN_SEE, NULL) &&
127 			    p->p_ucred->cr_uid == uap->who &&
128 			    p->p_nice < low)
129 				low = p->p_nice;
130 		sx_sunlock(&allproc_lock);
131 		break;
132 
133 	default:
134 		return (EINVAL);
135 	}
136 	if (low == PRIO_MAX + 1)
137 		return (ESRCH);
138 	curp->p_retval[0] = low;
139 	return (0);
140 }
141 
142 #ifndef _SYS_SYSPROTO_H_
143 struct setpriority_args {
144 	int	which;
145 	int	who;
146 	int	prio;
147 };
148 #endif
149 /* ARGSUSED */
150 int
151 setpriority(curp, uap)
152 	struct proc *curp;
153 	register struct setpriority_args *uap;
154 {
155 	register struct proc *p;
156 	int found = 0, error = 0;
157 
158 	switch (uap->which) {
159 
160 	case PRIO_PROCESS:
161 		if (uap->who == 0)
162 			p = curp;
163 		else
164 			p = pfind(uap->who);
165 		if (p == 0)
166 			break;
167 		if (p_can(curp, p, P_CAN_SEE, NULL))
168 			break;
169 		error = donice(curp, p, uap->prio);
170 		found++;
171 		break;
172 
173 	case PRIO_PGRP: {
174 		register struct pgrp *pg;
175 
176 		if (uap->who == 0)
177 			pg = curp->p_pgrp;
178 		else if ((pg = pgfind(uap->who)) == NULL)
179 			break;
180 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
181 			if (!p_can(curp, p, P_CAN_SEE, NULL)) {
182 				error = donice(curp, p, uap->prio);
183 				found++;
184 			}
185 		}
186 		break;
187 	}
188 
189 	case PRIO_USER:
190 		if (uap->who == 0)
191 			uap->who = curp->p_ucred->cr_uid;
192 		sx_slock(&allproc_lock);
193 		LIST_FOREACH(p, &allproc, p_list)
194 			if (p->p_ucred->cr_uid == uap->who &&
195 			    !p_can(curp, p, P_CAN_SEE, NULL)) {
196 				error = donice(curp, p, uap->prio);
197 				found++;
198 			}
199 		sx_sunlock(&allproc_lock);
200 		break;
201 
202 	default:
203 		return (EINVAL);
204 	}
205 	if (found == 0)
206 		return (ESRCH);
207 	return (error);
208 }
209 
210 static int
211 donice(curp, chgp, n)
212 	register struct proc *curp, *chgp;
213 	register int n;
214 {
215 	int	error;
216 
217 	if ((error = p_can(curp, chgp, P_CAN_SCHED, NULL)))
218 		return (error);
219 	if (n > PRIO_MAX)
220 		n = PRIO_MAX;
221 	if (n < PRIO_MIN)
222 		n = PRIO_MIN;
223 	if (n < chgp->p_nice && suser(curp))
224 		return (EACCES);
225 	chgp->p_nice = n;
226 	(void)resetpriority(chgp);
227 	return (0);
228 }
229 
230 /* rtprio system call */
231 #ifndef _SYS_SYSPROTO_H_
232 struct rtprio_args {
233 	int		function;
234 	pid_t		pid;
235 	struct rtprio	*rtp;
236 };
237 #endif
238 
239 /*
240  * Set realtime priority
241  */
242 
243 /* ARGSUSED */
244 int
245 rtprio(curp, uap)
246 	struct proc *curp;
247 	register struct rtprio_args *uap;
248 {
249 	register struct proc *p;
250 	struct rtprio rtp;
251 	int error;
252 
253 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
254 	if (error)
255 		return (error);
256 
257 	if (uap->pid == 0)
258 		p = curp;
259 	else
260 		p = pfind(uap->pid);
261 
262 	if (p == 0)
263 		return (ESRCH);
264 
265 	switch (uap->function) {
266 	case RTP_LOOKUP:
267 		if ((error = p_can(curp, p, P_CAN_SEE, NULL)))
268 			return (error);
269 		pri_to_rtp(&p->p_pri, &rtp);
270 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
271 	case RTP_SET:
272 		if ((error = p_can(curp, p, P_CAN_SCHED, NULL)))
273 		        return (error);
274 		/* disallow setting rtprio in most cases if not superuser */
275 		if (suser(curp) != 0) {
276 			/* can't set someone else's */
277 			if (uap->pid)
278 				return (EPERM);
279 			/* can't set realtime priority */
280 /*
281  * Realtime priority has to be restricted for reasons which should be
282  * obvious. However, for idle priority, there is a potential for
283  * system deadlock if an idleprio process gains a lock on a resource
284  * that other processes need (and the idleprio process can't run
285  * due to a CPU-bound normal process). Fix me! XXX
286  */
287 #if 0
288  			if (RTP_PRIO_IS_REALTIME(rtp.type))
289 #endif
290 			if (rtp.type != RTP_PRIO_NORMAL)
291 				return (EPERM);
292 		}
293 		if (rtp_to_pri(&rtp, &p->p_pri) == 0)
294 			return (0);
295 		return (EINVAL);
296 	default:
297 		return (EINVAL);
298 	}
299 }
300 
301 int
302 rtp_to_pri(struct rtprio *rtp, struct priority *pri)
303 {
304 
305 	if (rtp->prio > RTP_PRIO_MAX)
306 		return (-1);
307 	switch (RTP_PRIO_BASE(rtp->type)) {
308 	case RTP_PRIO_REALTIME:
309 		pri->pri_level = PRI_MIN_REALTIME + rtp->prio;
310 		break;
311 	case RTP_PRIO_NORMAL:
312 		pri->pri_level = PRI_MIN_TIMESHARE + rtp->prio;
313 		break;
314 	case RTP_PRIO_IDLE:
315 		pri->pri_level = PRI_MIN_IDLE + rtp->prio;
316 		break;
317 	default:
318 		return (-1);
319 	}
320 	pri->pri_class = rtp->type;
321 	pri->pri_native = pri->pri_level;
322 	pri->pri_user = pri->pri_level;
323 	return (0);
324 }
325 
326 void
327 pri_to_rtp(struct priority *pri, struct rtprio *rtp)
328 {
329 
330 	switch (PRI_BASE(pri->pri_class)) {
331 	case PRI_REALTIME:
332 		rtp->prio = pri->pri_level - PRI_MIN_REALTIME;
333 		break;
334 	case PRI_TIMESHARE:
335 		rtp->prio = pri->pri_level - PRI_MIN_TIMESHARE;
336 		break;
337 	case PRI_IDLE:
338 		rtp->prio = pri->pri_level - PRI_MIN_IDLE;
339 		break;
340 	default:
341 		break;
342 	}
343 	rtp->type = pri->pri_class;
344 }
345 
346 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
347 #ifndef _SYS_SYSPROTO_H_
348 struct osetrlimit_args {
349 	u_int	which;
350 	struct	orlimit *rlp;
351 };
352 #endif
353 /* ARGSUSED */
354 int
355 osetrlimit(p, uap)
356 	struct proc *p;
357 	register struct osetrlimit_args *uap;
358 {
359 	struct orlimit olim;
360 	struct rlimit lim;
361 	int error;
362 
363 	if ((error =
364 	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
365 		return (error);
366 	lim.rlim_cur = olim.rlim_cur;
367 	lim.rlim_max = olim.rlim_max;
368 	return (dosetrlimit(p, uap->which, &lim));
369 }
370 
371 #ifndef _SYS_SYSPROTO_H_
372 struct ogetrlimit_args {
373 	u_int	which;
374 	struct	orlimit *rlp;
375 };
376 #endif
377 /* ARGSUSED */
378 int
379 ogetrlimit(p, uap)
380 	struct proc *p;
381 	register struct ogetrlimit_args *uap;
382 {
383 	struct orlimit olim;
384 
385 	if (uap->which >= RLIM_NLIMITS)
386 		return (EINVAL);
387 	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
388 	if (olim.rlim_cur == -1)
389 		olim.rlim_cur = 0x7fffffff;
390 	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
391 	if (olim.rlim_max == -1)
392 		olim.rlim_max = 0x7fffffff;
393 	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
394 }
395 #endif /* COMPAT_43 || COMPAT_SUNOS */
396 
397 #ifndef _SYS_SYSPROTO_H_
398 struct __setrlimit_args {
399 	u_int	which;
400 	struct	rlimit *rlp;
401 };
402 #endif
403 /* ARGSUSED */
404 int
405 setrlimit(p, uap)
406 	struct proc *p;
407 	register struct __setrlimit_args *uap;
408 {
409 	struct rlimit alim;
410 	int error;
411 
412 	if ((error =
413 	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
414 		return (error);
415 	return (dosetrlimit(p, uap->which, &alim));
416 }
417 
418 int
419 dosetrlimit(p, which, limp)
420 	struct proc *p;
421 	u_int which;
422 	struct rlimit *limp;
423 {
424 	register struct rlimit *alimp;
425 	int error;
426 
427 	if (which >= RLIM_NLIMITS)
428 		return (EINVAL);
429 	alimp = &p->p_rlimit[which];
430 
431 	/*
432 	 * Preserve historical bugs by treating negative limits as unsigned.
433 	 */
434 	if (limp->rlim_cur < 0)
435 		limp->rlim_cur = RLIM_INFINITY;
436 	if (limp->rlim_max < 0)
437 		limp->rlim_max = RLIM_INFINITY;
438 
439 	if (limp->rlim_cur > alimp->rlim_max ||
440 	    limp->rlim_max > alimp->rlim_max)
441 		if ((error = suser_xxx(0, p, PRISON_ROOT)))
442 			return (error);
443 	if (limp->rlim_cur > limp->rlim_max)
444 		limp->rlim_cur = limp->rlim_max;
445 	if (p->p_limit->p_refcnt > 1 &&
446 	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
447 		p->p_limit->p_refcnt--;
448 		p->p_limit = limcopy(p->p_limit);
449 		alimp = &p->p_rlimit[which];
450 	}
451 
452 	switch (which) {
453 
454 	case RLIMIT_CPU:
455 		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
456 			p->p_limit->p_cpulimit = RLIM_INFINITY;
457 		else
458 			p->p_limit->p_cpulimit =
459 			    (rlim_t)1000000 * limp->rlim_cur;
460 		break;
461 	case RLIMIT_DATA:
462 		if (limp->rlim_cur > MAXDSIZ)
463 			limp->rlim_cur = MAXDSIZ;
464 		if (limp->rlim_max > MAXDSIZ)
465 			limp->rlim_max = MAXDSIZ;
466 		break;
467 
468 	case RLIMIT_STACK:
469 		if (limp->rlim_cur > MAXSSIZ)
470 			limp->rlim_cur = MAXSSIZ;
471 		if (limp->rlim_max > MAXSSIZ)
472 			limp->rlim_max = MAXSSIZ;
473 		/*
474 		 * Stack is allocated to the max at exec time with only
475 		 * "rlim_cur" bytes accessible.  If stack limit is going
476 		 * up make more accessible, if going down make inaccessible.
477 		 */
478 		if (limp->rlim_cur != alimp->rlim_cur) {
479 			vm_offset_t addr;
480 			vm_size_t size;
481 			vm_prot_t prot;
482 
483 			if (limp->rlim_cur > alimp->rlim_cur) {
484 				prot = VM_PROT_ALL;
485 				size = limp->rlim_cur - alimp->rlim_cur;
486 				addr = USRSTACK - limp->rlim_cur;
487 			} else {
488 				prot = VM_PROT_NONE;
489 				size = alimp->rlim_cur - limp->rlim_cur;
490 				addr = USRSTACK - alimp->rlim_cur;
491 			}
492 			addr = trunc_page(addr);
493 			size = round_page(size);
494 			(void) vm_map_protect(&p->p_vmspace->vm_map,
495 					      addr, addr+size, prot, FALSE);
496 		}
497 		break;
498 
499 	case RLIMIT_NOFILE:
500 		if (limp->rlim_cur > maxfilesperproc)
501 			limp->rlim_cur = maxfilesperproc;
502 		if (limp->rlim_max > maxfilesperproc)
503 			limp->rlim_max = maxfilesperproc;
504 		break;
505 
506 	case RLIMIT_NPROC:
507 		if (limp->rlim_cur > maxprocperuid)
508 			limp->rlim_cur = maxprocperuid;
509 		if (limp->rlim_max > maxprocperuid)
510 			limp->rlim_max = maxprocperuid;
511 		if (limp->rlim_cur < 1)
512 			limp->rlim_cur = 1;
513 		if (limp->rlim_max < 1)
514 			limp->rlim_max = 1;
515 		break;
516 	}
517 	*alimp = *limp;
518 	return (0);
519 }
520 
521 #ifndef _SYS_SYSPROTO_H_
522 struct __getrlimit_args {
523 	u_int	which;
524 	struct	rlimit *rlp;
525 };
526 #endif
527 /* ARGSUSED */
528 int
529 getrlimit(p, uap)
530 	struct proc *p;
531 	register struct __getrlimit_args *uap;
532 {
533 
534 	if (uap->which >= RLIM_NLIMITS)
535 		return (EINVAL);
536 	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
537 	    sizeof (struct rlimit)));
538 }
539 
540 /*
541  * Transform the running time and tick information in proc p into user,
542  * system, and interrupt time usage.
543  */
544 void
545 calcru(p, up, sp, ip)
546 	struct proc *p;
547 	struct timeval *up;
548 	struct timeval *sp;
549 	struct timeval *ip;
550 {
551 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
552 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
553 	int s;
554 	struct timeval tv;
555 
556 	mtx_assert(&sched_lock, MA_OWNED);
557 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
558 	s = splstatclock();
559 	ut = p->p_uticks;
560 	st = p->p_sticks;
561 	it = p->p_iticks;
562 	splx(s);
563 
564 	tt = ut + st + it;
565 	if (tt == 0) {
566 		st = 1;
567 		tt = 1;
568 	}
569 
570 	tu = p->p_runtime;
571 	if (p == curproc) {
572 		/*
573 		 * Adjust for the current time slice.  This is actually fairly
574 		 * important since the error here is on the order of a time
575 		 * quantum, which is much greater than the sampling error.
576 		 */
577 		microuptime(&tv);
578 		if (timevalcmp(&tv, PCPU_PTR(switchtime), <))
579 			printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
580 			    PCPU_GET(switchtime.tv_sec), PCPU_GET(switchtime.tv_usec),
581 			    tv.tv_sec, tv.tv_usec);
582 		else
583 			tu += (tv.tv_usec - PCPU_GET(switchtime.tv_usec)) +
584 			    (tv.tv_sec - PCPU_GET(switchtime.tv_sec)) *
585 			    (int64_t)1000000;
586 	}
587 	ptu = p->p_uu + p->p_su + p->p_iu;
588 	if (tu < ptu || (int64_t)tu < 0) {
589 		/* XXX no %qd in kernel.  Truncate. */
590 		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
591 		       (long)tu, p->p_pid, p->p_comm);
592 		tu = ptu;
593 	}
594 
595 	/* Subdivide tu. */
596 	uu = (tu * ut) / tt;
597 	su = (tu * st) / tt;
598 	iu = tu - uu - su;
599 
600 	/* Enforce monotonicity. */
601 	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
602 		if (uu < p->p_uu)
603 			uu = p->p_uu;
604 		else if (uu + p->p_su + p->p_iu > tu)
605 			uu = tu - p->p_su - p->p_iu;
606 		if (st == 0)
607 			su = p->p_su;
608 		else {
609 			su = ((tu - uu) * st) / (st + it);
610 			if (su < p->p_su)
611 				su = p->p_su;
612 			else if (uu + su + p->p_iu > tu)
613 				su = tu - uu - p->p_iu;
614 		}
615 		KASSERT(uu + su + p->p_iu <= tu,
616 		    ("calcru: monotonisation botch 1"));
617 		iu = tu - uu - su;
618 		KASSERT(iu >= p->p_iu,
619 		    ("calcru: monotonisation botch 2"));
620 	}
621 	p->p_uu = uu;
622 	p->p_su = su;
623 	p->p_iu = iu;
624 
625 	up->tv_sec = uu / 1000000;
626 	up->tv_usec = uu % 1000000;
627 	sp->tv_sec = su / 1000000;
628 	sp->tv_usec = su % 1000000;
629 	if (ip != NULL) {
630 		ip->tv_sec = iu / 1000000;
631 		ip->tv_usec = iu % 1000000;
632 	}
633 }
634 
635 #ifndef _SYS_SYSPROTO_H_
636 struct getrusage_args {
637 	int	who;
638 	struct	rusage *rusage;
639 };
640 #endif
641 /* ARGSUSED */
642 int
643 getrusage(p, uap)
644 	register struct proc *p;
645 	register struct getrusage_args *uap;
646 {
647 	register struct rusage *rup;
648 
649 	switch (uap->who) {
650 
651 	case RUSAGE_SELF:
652 		rup = &p->p_stats->p_ru;
653 		mtx_lock_spin(&sched_lock);
654 		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
655 		mtx_unlock_spin(&sched_lock);
656 		break;
657 
658 	case RUSAGE_CHILDREN:
659 		rup = &p->p_stats->p_cru;
660 		break;
661 
662 	default:
663 		return (EINVAL);
664 	}
665 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
666 	    sizeof (struct rusage)));
667 }
668 
669 void
670 ruadd(ru, ru2)
671 	register struct rusage *ru, *ru2;
672 {
673 	register long *ip, *ip2;
674 	register int i;
675 
676 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
677 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
678 	if (ru->ru_maxrss < ru2->ru_maxrss)
679 		ru->ru_maxrss = ru2->ru_maxrss;
680 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
681 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
682 		*ip++ += *ip2++;
683 }
684 
685 /*
686  * Make a copy of the plimit structure.
687  * We share these structures copy-on-write after fork,
688  * and copy when a limit is changed.
689  */
690 struct plimit *
691 limcopy(lim)
692 	struct plimit *lim;
693 {
694 	register struct plimit *copy;
695 
696 	MALLOC(copy, struct plimit *, sizeof(struct plimit),
697 	    M_SUBPROC, M_WAITOK);
698 	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
699 	copy->p_lflags = 0;
700 	copy->p_refcnt = 1;
701 	return (copy);
702 }
703 
704 /*
705  * Find the uidinfo structure for a uid.  This structure is used to
706  * track the total resource consumption (process count, socket buffer
707  * size, etc.) for the uid and impose limits.
708  */
709 void
710 uihashinit()
711 {
712 
713 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
714 	mtx_init(&uihashtbl_mtx, "uidinfo hash", MTX_DEF);
715 }
716 
717 /*
718  * lookup a uidinfo struct for the parameter uid.
719  * uihashtbl_mtx must be locked.
720  */
721 static struct uidinfo *
722 uilookup(uid)
723 	uid_t uid;
724 {
725 	struct	uihashhead *uipp;
726 	struct	uidinfo *uip;
727 
728 	mtx_assert(&uihashtbl_mtx, MA_OWNED);
729 	uipp = UIHASH(uid);
730 	LIST_FOREACH(uip, uipp, ui_hash)
731 		if (uip->ui_uid == uid)
732 			break;
733 
734 	return (uip);
735 }
736 
737 /*
738  * Find or allocate a struct uidinfo for a particular uid.
739  * Increase refcount on uidinfo struct returned.
740  * uifree() should be called on a struct uidinfo when released.
741  */
742 struct uidinfo *
743 uifind(uid)
744 	uid_t uid;
745 {
746 	struct	uidinfo *uip;
747 
748 	mtx_lock(&uihashtbl_mtx);
749 	uip = uilookup(uid);
750 	if (uip == NULL) {
751 		struct  uidinfo *old_uip;
752 
753 		mtx_unlock(&uihashtbl_mtx);
754 		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
755 		mtx_lock(&uihashtbl_mtx);
756 		/*
757 		 * There's a chance someone created our uidinfo while we
758 		 * were in malloc and not holding the lock, so we have to
759 		 * make sure we don't insert a duplicate uidinfo
760 		 */
761 		if ((old_uip = uilookup(uid)) != NULL) {
762 			/* someone else beat us to it */
763 			free(uip, M_UIDINFO);
764 			uip = old_uip;
765 		} else {
766 			mtx_init(&uip->ui_mtx, "uidinfo struct", MTX_DEF);
767 			uip->ui_uid = uid;
768 			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
769 		}
770 	}
771 	uihold(uip);
772 	mtx_unlock(&uihashtbl_mtx);
773 	return (uip);
774 }
775 
776 /*
777  * Place another refcount on a uidinfo struct.
778  */
779 void
780 uihold(uip)
781 	struct uidinfo *uip;
782 {
783 
784 	mtx_lock(&uip->ui_mtx);
785 	uip->ui_ref++;
786 	mtx_unlock(&uip->ui_mtx);
787 }
788 
789 /*-
790  * Since uidinfo structs have a long lifetime, we use an
791  * opportunistic refcounting scheme to avoid locking the lookup hash
792  * for each release.
793  *
794  * If the refcount hits 0, we need to free the structure,
795  * which means we need to lock the hash.
796  * Optimal case:
797  *   After locking the struct and lowering the refcount, if we find
798  *   that we don't need to free, simply unlock and return.
799  * Suboptimal case:
800  *   If refcount lowering results in need to free, bump the count
801  *   back up, loose the lock and aquire the locks in the proper
802  *   order to try again.
803  */
804 void
805 uifree(uip)
806 	struct uidinfo *uip;
807 {
808 
809 	/* Prepare for optimal case. */
810 	mtx_lock(&uip->ui_mtx);
811 
812 	if (--uip->ui_ref != 0) {
813 		mtx_unlock(&uip->ui_mtx);
814 		return;
815 	}
816 
817 	/* Prepare for suboptimal case. */
818 	uip->ui_ref++;
819 	mtx_unlock(&uip->ui_mtx);
820 	mtx_lock(&uihashtbl_mtx);
821 	mtx_lock(&uip->ui_mtx);
822 
823 	/*
824 	 * We must subtract one from the count again because we backed out
825 	 * our initial subtraction before dropping the lock.
826 	 * Since another thread may have added a reference after we dropped the
827 	 * initial lock we have to test for zero again.
828 	 */
829 	if (--uip->ui_ref == 0) {
830 		LIST_REMOVE(uip, ui_hash);
831 		mtx_unlock(&uihashtbl_mtx);
832 		if (uip->ui_sbsize != 0)
833 			/* XXX no %qd in kernel.  Truncate. */
834 			printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
835 			    uip->ui_uid, (long)uip->ui_sbsize);
836 		if (uip->ui_proccnt != 0)
837 			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
838 			    uip->ui_uid, uip->ui_proccnt);
839 		mtx_destroy(&uip->ui_mtx);
840 		FREE(uip, M_UIDINFO);
841 		return;
842 	}
843 
844 	mtx_unlock(&uihashtbl_mtx);
845 	mtx_unlock(&uip->ui_mtx);
846 }
847 
848 /*
849  * Change the count associated with number of processes
850  * a given user is using.  When 'max' is 0, don't enforce a limit
851  */
852 int
853 chgproccnt(uip, diff, max)
854 	struct	uidinfo	*uip;
855 	int	diff;
856 	int	max;
857 {
858 
859 	mtx_lock(&uip->ui_mtx);
860 	/* don't allow them to exceed max, but allow subtraction */
861 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
862 		mtx_unlock(&uip->ui_mtx);
863 		return (0);
864 	}
865 	uip->ui_proccnt += diff;
866 	if (uip->ui_proccnt < 0)
867 		printf("negative proccnt for uid = %d\n", uip->ui_uid);
868 	mtx_unlock(&uip->ui_mtx);
869 	return (1);
870 }
871 
872 /*
873  * Change the total socket buffer size a user has used.
874  */
875 int
876 chgsbsize(uip, hiwat, to, max)
877 	struct	uidinfo	*uip;
878 	u_long *hiwat;
879 	u_long	to;
880 	rlim_t	max;
881 {
882 	rlim_t new;
883 	int s;
884 
885 	s = splnet();
886 	mtx_lock(&uip->ui_mtx);
887 	new = uip->ui_sbsize + to - *hiwat;
888 	/* don't allow them to exceed max, but allow subtraction */
889 	if (to > *hiwat && new > max) {
890 		splx(s);
891 		mtx_unlock(&uip->ui_mtx);
892 		return (0);
893 	}
894 	uip->ui_sbsize = new;
895 	*hiwat = to;
896 	if (uip->ui_sbsize < 0)
897 		printf("negative sbsize for uid = %d\n", uip->ui_uid);
898 	splx(s);
899 	mtx_unlock(&uip->ui_mtx);
900 	return (1);
901 }
902