xref: /freebsd/sys/kern/kern_resource.c (revision 25d0cc3b4dcc3d9e3929615a9e9b5be4801fbaac)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 #include "opt_rlimit.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/resourcevar.h>
55 #include <sys/sx.h>
56 #include <sys/time.h>
57 
58 #include <vm/vm.h>
59 #include <vm/vm_param.h>
60 #include <vm/pmap.h>
61 #include <vm/vm_map.h>
62 
63 static int donice __P((struct proc *curp, struct proc *chgp, int n));
64 /* dosetrlimit non-static:  Needed by SysVR4 emulator */
65 int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
66 
67 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
68 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
69 static struct mtx uihashtbl_mtx;
70 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
71 static u_long uihash;		/* size of hash table - 1 */
72 
73 static struct uidinfo	*uilookup __P((uid_t uid));
74 
75 /*
76  * Resource controls and accounting.
77  */
78 
79 #ifndef _SYS_SYSPROTO_H_
80 struct getpriority_args {
81 	int	which;
82 	int	who;
83 };
84 #endif
85 int
86 getpriority(curp, uap)
87 	struct proc *curp;
88 	register struct getpriority_args *uap;
89 {
90 	register struct proc *p;
91 	register int low = PRIO_MAX + 1;
92 
93 	switch (uap->which) {
94 
95 	case PRIO_PROCESS:
96 		if (uap->who == 0)
97 			p = curp;
98 		else
99 			p = pfind(uap->who);
100 		if (p == 0)
101 			break;
102 		if (p_can(curp, p, P_CAN_SEE, NULL))
103 			break;
104 		low = p->p_nice;
105 		break;
106 
107 	case PRIO_PGRP: {
108 		register struct pgrp *pg;
109 
110 		if (uap->who == 0)
111 			pg = curp->p_pgrp;
112 		else if ((pg = pgfind(uap->who)) == NULL)
113 			break;
114 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
115 			if (!p_can(curp, p, P_CAN_SEE, NULL) && p->p_nice < low)
116 				low = p->p_nice;
117 		}
118 		break;
119 	}
120 
121 	case PRIO_USER:
122 		if (uap->who == 0)
123 			uap->who = curp->p_ucred->cr_uid;
124 		sx_slock(&allproc_lock);
125 		LIST_FOREACH(p, &allproc, p_list)
126 			if (!p_can(curp, p, P_CAN_SEE, NULL) &&
127 			    p->p_ucred->cr_uid == uap->who &&
128 			    p->p_nice < low)
129 				low = p->p_nice;
130 		sx_sunlock(&allproc_lock);
131 		break;
132 
133 	default:
134 		return (EINVAL);
135 	}
136 	if (low == PRIO_MAX + 1)
137 		return (ESRCH);
138 	curp->p_retval[0] = low;
139 	return (0);
140 }
141 
142 #ifndef _SYS_SYSPROTO_H_
143 struct setpriority_args {
144 	int	which;
145 	int	who;
146 	int	prio;
147 };
148 #endif
149 /* ARGSUSED */
150 int
151 setpriority(curp, uap)
152 	struct proc *curp;
153 	register struct setpriority_args *uap;
154 {
155 	register struct proc *p;
156 	int found = 0, error = 0;
157 
158 	switch (uap->which) {
159 
160 	case PRIO_PROCESS:
161 		if (uap->who == 0)
162 			p = curp;
163 		else
164 			p = pfind(uap->who);
165 		if (p == 0)
166 			break;
167 		if (p_can(curp, p, P_CAN_SEE, NULL))
168 			break;
169 		error = donice(curp, p, uap->prio);
170 		found++;
171 		break;
172 
173 	case PRIO_PGRP: {
174 		register struct pgrp *pg;
175 
176 		if (uap->who == 0)
177 			pg = curp->p_pgrp;
178 		else if ((pg = pgfind(uap->who)) == NULL)
179 			break;
180 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
181 			if (!p_can(curp, p, P_CAN_SEE, NULL)) {
182 				error = donice(curp, p, uap->prio);
183 				found++;
184 			}
185 		}
186 		break;
187 	}
188 
189 	case PRIO_USER:
190 		if (uap->who == 0)
191 			uap->who = curp->p_ucred->cr_uid;
192 		sx_slock(&allproc_lock);
193 		LIST_FOREACH(p, &allproc, p_list)
194 			if (p->p_ucred->cr_uid == uap->who &&
195 			    !p_can(curp, p, P_CAN_SEE, NULL)) {
196 				error = donice(curp, p, uap->prio);
197 				found++;
198 			}
199 		sx_sunlock(&allproc_lock);
200 		break;
201 
202 	default:
203 		return (EINVAL);
204 	}
205 	if (found == 0)
206 		return (ESRCH);
207 	return (error);
208 }
209 
210 static int
211 donice(curp, chgp, n)
212 	register struct proc *curp, *chgp;
213 	register int n;
214 {
215 	int	error;
216 
217 	if ((error = p_can(curp, chgp, P_CAN_SCHED, NULL)))
218 		return (error);
219 	if (n > PRIO_MAX)
220 		n = PRIO_MAX;
221 	if (n < PRIO_MIN)
222 		n = PRIO_MIN;
223 	if (n < chgp->p_nice && suser(curp))
224 		return (EACCES);
225 	chgp->p_nice = n;
226 	(void)resetpriority(chgp);
227 	return (0);
228 }
229 
230 /* rtprio system call */
231 #ifndef _SYS_SYSPROTO_H_
232 struct rtprio_args {
233 	int		function;
234 	pid_t		pid;
235 	struct rtprio	*rtp;
236 };
237 #endif
238 
239 /*
240  * Set realtime priority
241  */
242 
243 /* ARGSUSED */
244 int
245 rtprio(curp, uap)
246 	struct proc *curp;
247 	register struct rtprio_args *uap;
248 {
249 	register struct proc *p;
250 	struct rtprio rtp;
251 	int error;
252 
253 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
254 	if (error)
255 		return (error);
256 
257 	if (uap->pid == 0)
258 		p = curp;
259 	else
260 		p = pfind(uap->pid);
261 
262 	if (p == 0)
263 		return (ESRCH);
264 
265 	switch (uap->function) {
266 	case RTP_LOOKUP:
267 		pri_to_rtp(&p->p_pri, &rtp);
268 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
269 	case RTP_SET:
270 		if ((error = p_can(curp, p, P_CAN_SCHED, NULL)))
271 		        return (error);
272 		/* disallow setting rtprio in most cases if not superuser */
273 		if (suser(curp) != 0) {
274 			/* can't set someone else's */
275 			if (uap->pid)
276 				return (EPERM);
277 			/* can't set realtime priority */
278 /*
279  * Realtime priority has to be restricted for reasons which should be
280  * obvious. However, for idle priority, there is a potential for
281  * system deadlock if an idleprio process gains a lock on a resource
282  * that other processes need (and the idleprio process can't run
283  * due to a CPU-bound normal process). Fix me! XXX
284  */
285 #if 0
286  			if (RTP_PRIO_IS_REALTIME(rtp.type))
287 #endif
288 			if (rtp.type != RTP_PRIO_NORMAL)
289 				return (EPERM);
290 		}
291 		if (rtp_to_pri(&rtp, &p->p_pri) == 0)
292 			return (0);
293 		return (EINVAL);
294 	default:
295 		return (EINVAL);
296 	}
297 }
298 
299 int
300 rtp_to_pri(struct rtprio *rtp, struct priority *pri)
301 {
302 
303 	if (rtp->prio > RTP_PRIO_MAX)
304 		return (-1);
305 	switch (RTP_PRIO_BASE(rtp->type)) {
306 	case RTP_PRIO_REALTIME:
307 		pri->pri_level = PRI_MIN_REALTIME + rtp->prio;
308 		break;
309 	case RTP_PRIO_NORMAL:
310 		pri->pri_level = PRI_MIN_TIMESHARE + rtp->prio;
311 		break;
312 	case RTP_PRIO_IDLE:
313 		pri->pri_level = PRI_MIN_IDLE + rtp->prio;
314 		break;
315 	default:
316 		return (-1);
317 	}
318 	pri->pri_class = rtp->type;
319 	pri->pri_native = pri->pri_level;
320 	pri->pri_user = pri->pri_level;
321 	return (0);
322 }
323 
324 void
325 pri_to_rtp(struct priority *pri, struct rtprio *rtp)
326 {
327 
328 	switch (PRI_BASE(pri->pri_class)) {
329 	case PRI_REALTIME:
330 		rtp->prio = pri->pri_level - PRI_MIN_REALTIME;
331 		break;
332 	case PRI_TIMESHARE:
333 		rtp->prio = pri->pri_level - PRI_MIN_TIMESHARE;
334 		break;
335 	case PRI_IDLE:
336 		rtp->prio = pri->pri_level - PRI_MIN_IDLE;
337 		break;
338 	default:
339 		break;
340 	}
341 	rtp->type = pri->pri_class;
342 }
343 
344 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
345 #ifndef _SYS_SYSPROTO_H_
346 struct osetrlimit_args {
347 	u_int	which;
348 	struct	orlimit *rlp;
349 };
350 #endif
351 /* ARGSUSED */
352 int
353 osetrlimit(p, uap)
354 	struct proc *p;
355 	register struct osetrlimit_args *uap;
356 {
357 	struct orlimit olim;
358 	struct rlimit lim;
359 	int error;
360 
361 	if ((error =
362 	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
363 		return (error);
364 	lim.rlim_cur = olim.rlim_cur;
365 	lim.rlim_max = olim.rlim_max;
366 	return (dosetrlimit(p, uap->which, &lim));
367 }
368 
369 #ifndef _SYS_SYSPROTO_H_
370 struct ogetrlimit_args {
371 	u_int	which;
372 	struct	orlimit *rlp;
373 };
374 #endif
375 /* ARGSUSED */
376 int
377 ogetrlimit(p, uap)
378 	struct proc *p;
379 	register struct ogetrlimit_args *uap;
380 {
381 	struct orlimit olim;
382 
383 	if (uap->which >= RLIM_NLIMITS)
384 		return (EINVAL);
385 	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
386 	if (olim.rlim_cur == -1)
387 		olim.rlim_cur = 0x7fffffff;
388 	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
389 	if (olim.rlim_max == -1)
390 		olim.rlim_max = 0x7fffffff;
391 	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
392 }
393 #endif /* COMPAT_43 || COMPAT_SUNOS */
394 
395 #ifndef _SYS_SYSPROTO_H_
396 struct __setrlimit_args {
397 	u_int	which;
398 	struct	rlimit *rlp;
399 };
400 #endif
401 /* ARGSUSED */
402 int
403 setrlimit(p, uap)
404 	struct proc *p;
405 	register struct __setrlimit_args *uap;
406 {
407 	struct rlimit alim;
408 	int error;
409 
410 	if ((error =
411 	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
412 		return (error);
413 	return (dosetrlimit(p, uap->which, &alim));
414 }
415 
416 int
417 dosetrlimit(p, which, limp)
418 	struct proc *p;
419 	u_int which;
420 	struct rlimit *limp;
421 {
422 	register struct rlimit *alimp;
423 	int error;
424 
425 	if (which >= RLIM_NLIMITS)
426 		return (EINVAL);
427 	alimp = &p->p_rlimit[which];
428 
429 	/*
430 	 * Preserve historical bugs by treating negative limits as unsigned.
431 	 */
432 	if (limp->rlim_cur < 0)
433 		limp->rlim_cur = RLIM_INFINITY;
434 	if (limp->rlim_max < 0)
435 		limp->rlim_max = RLIM_INFINITY;
436 
437 	if (limp->rlim_cur > alimp->rlim_max ||
438 	    limp->rlim_max > alimp->rlim_max)
439 		if ((error = suser_xxx(0, p, PRISON_ROOT)))
440 			return (error);
441 	if (limp->rlim_cur > limp->rlim_max)
442 		limp->rlim_cur = limp->rlim_max;
443 	if (p->p_limit->p_refcnt > 1 &&
444 	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
445 		p->p_limit->p_refcnt--;
446 		p->p_limit = limcopy(p->p_limit);
447 		alimp = &p->p_rlimit[which];
448 	}
449 
450 	switch (which) {
451 
452 	case RLIMIT_CPU:
453 		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
454 			p->p_limit->p_cpulimit = RLIM_INFINITY;
455 		else
456 			p->p_limit->p_cpulimit =
457 			    (rlim_t)1000000 * limp->rlim_cur;
458 		break;
459 	case RLIMIT_DATA:
460 		if (limp->rlim_cur > MAXDSIZ)
461 			limp->rlim_cur = MAXDSIZ;
462 		if (limp->rlim_max > MAXDSIZ)
463 			limp->rlim_max = MAXDSIZ;
464 		break;
465 
466 	case RLIMIT_STACK:
467 		if (limp->rlim_cur > MAXSSIZ)
468 			limp->rlim_cur = MAXSSIZ;
469 		if (limp->rlim_max > MAXSSIZ)
470 			limp->rlim_max = MAXSSIZ;
471 		/*
472 		 * Stack is allocated to the max at exec time with only
473 		 * "rlim_cur" bytes accessible.  If stack limit is going
474 		 * up make more accessible, if going down make inaccessible.
475 		 */
476 		if (limp->rlim_cur != alimp->rlim_cur) {
477 			vm_offset_t addr;
478 			vm_size_t size;
479 			vm_prot_t prot;
480 
481 			if (limp->rlim_cur > alimp->rlim_cur) {
482 				prot = VM_PROT_ALL;
483 				size = limp->rlim_cur - alimp->rlim_cur;
484 				addr = USRSTACK - limp->rlim_cur;
485 			} else {
486 				prot = VM_PROT_NONE;
487 				size = alimp->rlim_cur - limp->rlim_cur;
488 				addr = USRSTACK - alimp->rlim_cur;
489 			}
490 			addr = trunc_page(addr);
491 			size = round_page(size);
492 			(void) vm_map_protect(&p->p_vmspace->vm_map,
493 					      addr, addr+size, prot, FALSE);
494 		}
495 		break;
496 
497 	case RLIMIT_NOFILE:
498 		if (limp->rlim_cur > maxfilesperproc)
499 			limp->rlim_cur = maxfilesperproc;
500 		if (limp->rlim_max > maxfilesperproc)
501 			limp->rlim_max = maxfilesperproc;
502 		break;
503 
504 	case RLIMIT_NPROC:
505 		if (limp->rlim_cur > maxprocperuid)
506 			limp->rlim_cur = maxprocperuid;
507 		if (limp->rlim_max > maxprocperuid)
508 			limp->rlim_max = maxprocperuid;
509 		if (limp->rlim_cur < 1)
510 			limp->rlim_cur = 1;
511 		if (limp->rlim_max < 1)
512 			limp->rlim_max = 1;
513 		break;
514 	}
515 	*alimp = *limp;
516 	return (0);
517 }
518 
519 #ifndef _SYS_SYSPROTO_H_
520 struct __getrlimit_args {
521 	u_int	which;
522 	struct	rlimit *rlp;
523 };
524 #endif
525 /* ARGSUSED */
526 int
527 getrlimit(p, uap)
528 	struct proc *p;
529 	register struct __getrlimit_args *uap;
530 {
531 
532 	if (uap->which >= RLIM_NLIMITS)
533 		return (EINVAL);
534 	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
535 	    sizeof (struct rlimit)));
536 }
537 
538 /*
539  * Transform the running time and tick information in proc p into user,
540  * system, and interrupt time usage.
541  */
542 void
543 calcru(p, up, sp, ip)
544 	struct proc *p;
545 	struct timeval *up;
546 	struct timeval *sp;
547 	struct timeval *ip;
548 {
549 	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
550 	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
551 	int s;
552 	struct timeval tv;
553 
554 	mtx_assert(&sched_lock, MA_OWNED);
555 	/* XXX: why spl-protect ?  worst case is an off-by-one report */
556 	s = splstatclock();
557 	ut = p->p_uticks;
558 	st = p->p_sticks;
559 	it = p->p_iticks;
560 	splx(s);
561 
562 	tt = ut + st + it;
563 	if (tt == 0) {
564 		st = 1;
565 		tt = 1;
566 	}
567 
568 	tu = p->p_runtime;
569 	if (p == curproc) {
570 		/*
571 		 * Adjust for the current time slice.  This is actually fairly
572 		 * important since the error here is on the order of a time
573 		 * quantum, which is much greater than the sampling error.
574 		 */
575 		microuptime(&tv);
576 		if (timevalcmp(&tv, PCPU_PTR(switchtime), <))
577 			printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
578 			    PCPU_GET(switchtime.tv_sec), PCPU_GET(switchtime.tv_usec),
579 			    tv.tv_sec, tv.tv_usec);
580 		else
581 			tu += (tv.tv_usec - PCPU_GET(switchtime.tv_usec)) +
582 			    (tv.tv_sec - PCPU_GET(switchtime.tv_sec)) *
583 			    (int64_t)1000000;
584 	}
585 	ptu = p->p_uu + p->p_su + p->p_iu;
586 	if (tu < ptu || (int64_t)tu < 0) {
587 		/* XXX no %qd in kernel.  Truncate. */
588 		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
589 		       (long)tu, p->p_pid, p->p_comm);
590 		tu = ptu;
591 	}
592 
593 	/* Subdivide tu. */
594 	uu = (tu * ut) / tt;
595 	su = (tu * st) / tt;
596 	iu = tu - uu - su;
597 
598 	/* Enforce monotonicity. */
599 	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
600 		if (uu < p->p_uu)
601 			uu = p->p_uu;
602 		else if (uu + p->p_su + p->p_iu > tu)
603 			uu = tu - p->p_su - p->p_iu;
604 		if (st == 0)
605 			su = p->p_su;
606 		else {
607 			su = ((tu - uu) * st) / (st + it);
608 			if (su < p->p_su)
609 				su = p->p_su;
610 			else if (uu + su + p->p_iu > tu)
611 				su = tu - uu - p->p_iu;
612 		}
613 		KASSERT(uu + su + p->p_iu <= tu,
614 		    ("calcru: monotonisation botch 1"));
615 		iu = tu - uu - su;
616 		KASSERT(iu >= p->p_iu,
617 		    ("calcru: monotonisation botch 2"));
618 	}
619 	p->p_uu = uu;
620 	p->p_su = su;
621 	p->p_iu = iu;
622 
623 	up->tv_sec = uu / 1000000;
624 	up->tv_usec = uu % 1000000;
625 	sp->tv_sec = su / 1000000;
626 	sp->tv_usec = su % 1000000;
627 	if (ip != NULL) {
628 		ip->tv_sec = iu / 1000000;
629 		ip->tv_usec = iu % 1000000;
630 	}
631 }
632 
633 #ifndef _SYS_SYSPROTO_H_
634 struct getrusage_args {
635 	int	who;
636 	struct	rusage *rusage;
637 };
638 #endif
639 /* ARGSUSED */
640 int
641 getrusage(p, uap)
642 	register struct proc *p;
643 	register struct getrusage_args *uap;
644 {
645 	register struct rusage *rup;
646 
647 	switch (uap->who) {
648 
649 	case RUSAGE_SELF:
650 		rup = &p->p_stats->p_ru;
651 		mtx_lock_spin(&sched_lock);
652 		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
653 		mtx_unlock_spin(&sched_lock);
654 		break;
655 
656 	case RUSAGE_CHILDREN:
657 		rup = &p->p_stats->p_cru;
658 		break;
659 
660 	default:
661 		return (EINVAL);
662 	}
663 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
664 	    sizeof (struct rusage)));
665 }
666 
667 void
668 ruadd(ru, ru2)
669 	register struct rusage *ru, *ru2;
670 {
671 	register long *ip, *ip2;
672 	register int i;
673 
674 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
675 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
676 	if (ru->ru_maxrss < ru2->ru_maxrss)
677 		ru->ru_maxrss = ru2->ru_maxrss;
678 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
679 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
680 		*ip++ += *ip2++;
681 }
682 
683 /*
684  * Make a copy of the plimit structure.
685  * We share these structures copy-on-write after fork,
686  * and copy when a limit is changed.
687  */
688 struct plimit *
689 limcopy(lim)
690 	struct plimit *lim;
691 {
692 	register struct plimit *copy;
693 
694 	MALLOC(copy, struct plimit *, sizeof(struct plimit),
695 	    M_SUBPROC, M_WAITOK);
696 	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
697 	copy->p_lflags = 0;
698 	copy->p_refcnt = 1;
699 	return (copy);
700 }
701 
702 /*
703  * Find the uidinfo structure for a uid.  This structure is used to
704  * track the total resource consumption (process count, socket buffer
705  * size, etc.) for the uid and impose limits.
706  */
707 void
708 uihashinit()
709 {
710 
711 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
712 	mtx_init(&uihashtbl_mtx, "uidinfo hash", MTX_DEF);
713 }
714 
715 /*
716  * lookup a uidinfo struct for the parameter uid.
717  * uihashtbl_mtx must be locked.
718  */
719 static struct uidinfo *
720 uilookup(uid)
721 	uid_t uid;
722 {
723 	struct	uihashhead *uipp;
724 	struct	uidinfo *uip;
725 
726 	mtx_assert(&uihashtbl_mtx, MA_OWNED);
727 	uipp = UIHASH(uid);
728 	LIST_FOREACH(uip, uipp, ui_hash)
729 		if (uip->ui_uid == uid)
730 			break;
731 
732 	return (uip);
733 }
734 
735 /*
736  * Find or allocate a struct uidinfo for a particular uid.
737  * Increase refcount on uidinfo struct returned.
738  * uifree() should be called on a struct uidinfo when released.
739  */
740 struct uidinfo *
741 uifind(uid)
742 	uid_t uid;
743 {
744 	struct	uidinfo *uip;
745 
746 	mtx_lock(&uihashtbl_mtx);
747 	uip = uilookup(uid);
748 	if (uip == NULL) {
749 		struct  uidinfo *old_uip;
750 
751 		mtx_unlock(&uihashtbl_mtx);
752 		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
753 		mtx_lock(&uihashtbl_mtx);
754 		/*
755 		 * There's a chance someone created our uidinfo while we
756 		 * were in malloc and not holding the lock, so we have to
757 		 * make sure we don't insert a duplicate uidinfo
758 		 */
759 		if ((old_uip = uilookup(uid)) != NULL) {
760 			/* someone else beat us to it */
761 			free(uip, M_UIDINFO);
762 			uip = old_uip;
763 		} else {
764 			mtx_init(&uip->ui_mtx, "uidinfo struct", MTX_DEF);
765 			uip->ui_uid = uid;
766 			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
767 		}
768 	}
769 	uihold(uip);
770 	mtx_unlock(&uihashtbl_mtx);
771 	return (uip);
772 }
773 
774 /*
775  * Place another refcount on a uidinfo struct.
776  */
777 void
778 uihold(uip)
779 	struct uidinfo *uip;
780 {
781 
782 	mtx_lock(&uip->ui_mtx);
783 	uip->ui_ref++;
784 	mtx_unlock(&uip->ui_mtx);
785 }
786 
787 /*-
788  * Since uidinfo structs have a long lifetime, we use an
789  * opportunistic refcounting scheme to avoid locking the lookup hash
790  * for each release.
791  *
792  * If the refcount hits 0, we need to free the structure,
793  * which means we need to lock the hash.
794  * Optimal case:
795  *   After locking the struct and lowering the refcount, if we find
796  *   that we don't need to free, simply unlock and return.
797  * Suboptimal case:
798  *   If refcount lowering results in need to free, bump the count
799  *   back up, loose the lock and aquire the locks in the proper
800  *   order to try again.
801  */
802 void
803 uifree(uip)
804 	struct uidinfo *uip;
805 {
806 
807 	/* Prepare for optimal case. */
808 	mtx_lock(&uip->ui_mtx);
809 
810 	if (--uip->ui_ref != 0) {
811 		mtx_unlock(&uip->ui_mtx);
812 		return;
813 	}
814 
815 	/* Prepare for suboptimal case. */
816 	uip->ui_ref++;
817 	mtx_unlock(&uip->ui_mtx);
818 	mtx_lock(&uihashtbl_mtx);
819 	mtx_lock(&uip->ui_mtx);
820 
821 	/*
822 	 * We must subtract one from the count again because we backed out
823 	 * our initial subtraction before dropping the lock.
824 	 * Since another thread may have added a reference after we dropped the
825 	 * initial lock we have to test for zero again.
826 	 */
827 	if (--uip->ui_ref == 0) {
828 		LIST_REMOVE(uip, ui_hash);
829 		mtx_unlock(&uihashtbl_mtx);
830 		if (uip->ui_sbsize != 0)
831 			/* XXX no %qd in kernel.  Truncate. */
832 			printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
833 			    uip->ui_uid, (long)uip->ui_sbsize);
834 		if (uip->ui_proccnt != 0)
835 			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
836 			    uip->ui_uid, uip->ui_proccnt);
837 		mtx_destroy(&uip->ui_mtx);
838 		FREE(uip, M_UIDINFO);
839 		return;
840 	}
841 
842 	mtx_unlock(&uihashtbl_mtx);
843 	mtx_unlock(&uip->ui_mtx);
844 }
845 
846 /*
847  * Change the count associated with number of processes
848  * a given user is using.  When 'max' is 0, don't enforce a limit
849  */
850 int
851 chgproccnt(uip, diff, max)
852 	struct	uidinfo	*uip;
853 	int	diff;
854 	int	max;
855 {
856 
857 	mtx_lock(&uip->ui_mtx);
858 	/* don't allow them to exceed max, but allow subtraction */
859 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
860 		mtx_unlock(&uip->ui_mtx);
861 		return (0);
862 	}
863 	uip->ui_proccnt += diff;
864 	if (uip->ui_proccnt < 0)
865 		printf("negative proccnt for uid = %d\n", uip->ui_uid);
866 	mtx_unlock(&uip->ui_mtx);
867 	return (1);
868 }
869 
870 /*
871  * Change the total socket buffer size a user has used.
872  */
873 int
874 chgsbsize(uip, hiwat, to, max)
875 	struct	uidinfo	*uip;
876 	u_long *hiwat;
877 	u_long	to;
878 	rlim_t	max;
879 {
880 	rlim_t new;
881 	int s;
882 
883 	s = splnet();
884 	mtx_lock(&uip->ui_mtx);
885 	new = uip->ui_sbsize + to - *hiwat;
886 	/* don't allow them to exceed max, but allow subtraction */
887 	if (to > *hiwat && new > max) {
888 		splx(s);
889 		mtx_unlock(&uip->ui_mtx);
890 		return (0);
891 	}
892 	uip->ui_sbsize = new;
893 	*hiwat = to;
894 	if (uip->ui_sbsize < 0)
895 		printf("negative sbsize for uid = %d\n", uip->ui_uid);
896 	splx(s);
897 	mtx_unlock(&uip->ui_mtx);
898 	return (1);
899 }
900