xref: /freebsd/sys/kern/kern_resource.c (revision 5aa839c9e2c373275091b8bf529c1311d0b84d76)
1  /*-
2   * SPDX-License-Identifier: BSD-3-Clause
3   *
4   * Copyright (c) 1982, 1986, 1991, 1993
5   *	The Regents of the University of California.  All rights reserved.
6   * (c) UNIX System Laboratories, Inc.
7   * All or some portions of this file are derived from material licensed
8   * to the University of California by American Telephone and Telegraph
9   * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10   * the permission of UNIX System Laboratories, Inc.
11   *
12   * Redistribution and use in source and binary forms, with or without
13   * modification, are permitted provided that the following conditions
14   * are met:
15   * 1. Redistributions of source code must retain the above copyright
16   *    notice, this list of conditions and the following disclaimer.
17   * 2. Redistributions in binary form must reproduce the above copyright
18   *    notice, this list of conditions and the following disclaimer in the
19   *    documentation and/or other materials provided with the distribution.
20   * 3. Neither the name of the University nor the names of its contributors
21   *    may be used to endorse or promote products derived from this software
22   *    without specific prior written permission.
23   *
24   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34   * SUCH DAMAGE.
35   *
36   *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
37   */
38  
39  #include <sys/cdefs.h>
40  __FBSDID("$FreeBSD$");
41  
42  #include <sys/param.h>
43  #include <sys/systm.h>
44  #include <sys/sysproto.h>
45  #include <sys/file.h>
46  #include <sys/kernel.h>
47  #include <sys/lock.h>
48  #include <sys/malloc.h>
49  #include <sys/mutex.h>
50  #include <sys/priv.h>
51  #include <sys/proc.h>
52  #include <sys/refcount.h>
53  #include <sys/racct.h>
54  #include <sys/resourcevar.h>
55  #include <sys/rwlock.h>
56  #include <sys/sched.h>
57  #include <sys/sx.h>
58  #include <sys/syscallsubr.h>
59  #include <sys/sysctl.h>
60  #include <sys/sysent.h>
61  #include <sys/time.h>
62  #include <sys/umtxvar.h>
63  
64  #include <vm/vm.h>
65  #include <vm/vm_param.h>
66  #include <vm/pmap.h>
67  #include <vm/vm_map.h>
68  
69  static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
70  static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
71  #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
72  static struct rwlock uihashtbl_lock;
73  static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
74  static u_long uihash;		/* size of hash table - 1 */
75  
76  static void	calcru1(struct proc *p, struct rusage_ext *ruxp,
77  		    struct timeval *up, struct timeval *sp);
78  static int	donice(struct thread *td, struct proc *chgp, int n);
79  static struct uidinfo *uilookup(uid_t uid);
80  static void	ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td);
81  
82  /*
83   * Resource controls and accounting.
84   */
85  #ifndef _SYS_SYSPROTO_H_
86  struct getpriority_args {
87  	int	which;
88  	int	who;
89  };
90  #endif
91  int
92  sys_getpriority(struct thread *td, struct getpriority_args *uap)
93  {
94  
95  	return (kern_getpriority(td, uap->which, uap->who));
96  }
97  
98  int
99  kern_getpriority(struct thread *td, int which, int who)
100  {
101  	struct proc *p;
102  	struct pgrp *pg;
103  	int error, low;
104  
105  	error = 0;
106  	low = PRIO_MAX + 1;
107  	switch (which) {
108  	case PRIO_PROCESS:
109  		if (who == 0)
110  			low = td->td_proc->p_nice;
111  		else {
112  			p = pfind(who);
113  			if (p == NULL)
114  				break;
115  			if (p_cansee(td, p) == 0)
116  				low = p->p_nice;
117  			PROC_UNLOCK(p);
118  		}
119  		break;
120  
121  	case PRIO_PGRP:
122  		sx_slock(&proctree_lock);
123  		if (who == 0) {
124  			pg = td->td_proc->p_pgrp;
125  			PGRP_LOCK(pg);
126  		} else {
127  			pg = pgfind(who);
128  			if (pg == NULL) {
129  				sx_sunlock(&proctree_lock);
130  				break;
131  			}
132  		}
133  		sx_sunlock(&proctree_lock);
134  		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
135  			PROC_LOCK(p);
136  			if (p->p_state == PRS_NORMAL &&
137  			    p_cansee(td, p) == 0) {
138  				if (p->p_nice < low)
139  					low = p->p_nice;
140  			}
141  			PROC_UNLOCK(p);
142  		}
143  		PGRP_UNLOCK(pg);
144  		break;
145  
146  	case PRIO_USER:
147  		if (who == 0)
148  			who = td->td_ucred->cr_uid;
149  		sx_slock(&allproc_lock);
150  		FOREACH_PROC_IN_SYSTEM(p) {
151  			PROC_LOCK(p);
152  			if (p->p_state == PRS_NORMAL &&
153  			    p_cansee(td, p) == 0 &&
154  			    p->p_ucred->cr_uid == who) {
155  				if (p->p_nice < low)
156  					low = p->p_nice;
157  			}
158  			PROC_UNLOCK(p);
159  		}
160  		sx_sunlock(&allproc_lock);
161  		break;
162  
163  	default:
164  		error = EINVAL;
165  		break;
166  	}
167  	if (low == PRIO_MAX + 1 && error == 0)
168  		error = ESRCH;
169  	td->td_retval[0] = low;
170  	return (error);
171  }
172  
173  #ifndef _SYS_SYSPROTO_H_
174  struct setpriority_args {
175  	int	which;
176  	int	who;
177  	int	prio;
178  };
179  #endif
180  int
181  sys_setpriority(struct thread *td, struct setpriority_args *uap)
182  {
183  
184  	return (kern_setpriority(td, uap->which, uap->who, uap->prio));
185  }
186  
187  int
188  kern_setpriority(struct thread *td, int which, int who, int prio)
189  {
190  	struct proc *curp, *p;
191  	struct pgrp *pg;
192  	int found = 0, error = 0;
193  
194  	curp = td->td_proc;
195  	switch (which) {
196  	case PRIO_PROCESS:
197  		if (who == 0) {
198  			PROC_LOCK(curp);
199  			error = donice(td, curp, prio);
200  			PROC_UNLOCK(curp);
201  		} else {
202  			p = pfind(who);
203  			if (p == NULL)
204  				break;
205  			error = p_cansee(td, p);
206  			if (error == 0)
207  				error = donice(td, p, prio);
208  			PROC_UNLOCK(p);
209  		}
210  		found++;
211  		break;
212  
213  	case PRIO_PGRP:
214  		sx_slock(&proctree_lock);
215  		if (who == 0) {
216  			pg = curp->p_pgrp;
217  			PGRP_LOCK(pg);
218  		} else {
219  			pg = pgfind(who);
220  			if (pg == NULL) {
221  				sx_sunlock(&proctree_lock);
222  				break;
223  			}
224  		}
225  		sx_sunlock(&proctree_lock);
226  		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
227  			PROC_LOCK(p);
228  			if (p->p_state == PRS_NORMAL &&
229  			    p_cansee(td, p) == 0) {
230  				error = donice(td, p, prio);
231  				found++;
232  			}
233  			PROC_UNLOCK(p);
234  		}
235  		PGRP_UNLOCK(pg);
236  		break;
237  
238  	case PRIO_USER:
239  		if (who == 0)
240  			who = td->td_ucred->cr_uid;
241  		sx_slock(&allproc_lock);
242  		FOREACH_PROC_IN_SYSTEM(p) {
243  			PROC_LOCK(p);
244  			if (p->p_state == PRS_NORMAL &&
245  			    p->p_ucred->cr_uid == who &&
246  			    p_cansee(td, p) == 0) {
247  				error = donice(td, p, prio);
248  				found++;
249  			}
250  			PROC_UNLOCK(p);
251  		}
252  		sx_sunlock(&allproc_lock);
253  		break;
254  
255  	default:
256  		error = EINVAL;
257  		break;
258  	}
259  	if (found == 0 && error == 0)
260  		error = ESRCH;
261  	return (error);
262  }
263  
264  /*
265   * Set "nice" for a (whole) process.
266   */
267  static int
268  donice(struct thread *td, struct proc *p, int n)
269  {
270  	int error;
271  
272  	PROC_LOCK_ASSERT(p, MA_OWNED);
273  	if ((error = p_cansched(td, p)))
274  		return (error);
275  	if (n > PRIO_MAX)
276  		n = PRIO_MAX;
277  	if (n < PRIO_MIN)
278  		n = PRIO_MIN;
279  	if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0)
280  		return (EACCES);
281  	sched_nice(p, n);
282  	return (0);
283  }
284  
285  static int unprivileged_idprio;
286  SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW,
287      &unprivileged_idprio, 0,
288      "Allow non-root users to set an idle priority (deprecated)");
289  
290  /*
291   * Set realtime priority for LWP.
292   */
293  #ifndef _SYS_SYSPROTO_H_
294  struct rtprio_thread_args {
295  	int		function;
296  	lwpid_t		lwpid;
297  	struct rtprio	*rtp;
298  };
299  #endif
300  int
301  sys_rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
302  {
303  	struct proc *p;
304  	struct rtprio rtp;
305  	struct thread *td1;
306  	int cierror, error;
307  
308  	/* Perform copyin before acquiring locks if needed. */
309  	if (uap->function == RTP_SET)
310  		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
311  	else
312  		cierror = 0;
313  
314  	if (uap->lwpid == 0 || uap->lwpid == td->td_tid) {
315  		p = td->td_proc;
316  		td1 = td;
317  		PROC_LOCK(p);
318  	} else {
319  		td1 = tdfind(uap->lwpid, -1);
320  		if (td1 == NULL)
321  			return (ESRCH);
322  		p = td1->td_proc;
323  	}
324  
325  	switch (uap->function) {
326  	case RTP_LOOKUP:
327  		if ((error = p_cansee(td, p)))
328  			break;
329  		pri_to_rtp(td1, &rtp);
330  		PROC_UNLOCK(p);
331  		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
332  	case RTP_SET:
333  		if ((error = p_cansched(td, p)) || (error = cierror))
334  			break;
335  
336  		/* Disallow setting rtprio in most cases if not superuser. */
337  
338  		/*
339  		 * Realtime priority has to be restricted for reasons which
340  		 * should be obvious.  However, for idleprio processes, there is
341  		 * a potential for system deadlock if an idleprio process gains
342  		 * a lock on a resource that other processes need (and the
343  		 * idleprio process can't run due to a CPU-bound normal
344  		 * process).  Fix me!  XXX
345  		 *
346  		 * This problem is not only related to idleprio process.
347  		 * A user level program can obtain a file lock and hold it
348  		 * indefinitely.  Additionally, without idleprio processes it is
349  		 * still conceivable that a program with low priority will never
350  		 * get to run.  In short, allowing this feature might make it
351  		 * easier to lock a resource indefinitely, but it is not the
352  		 * only thing that makes it possible.
353  		 */
354  		if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME &&
355  		    (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0)
356  			break;
357  		if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
358  		    unprivileged_idprio == 0 &&
359  		    (error = priv_check(td, PRIV_SCHED_IDPRIO)) != 0)
360  			break;
361  		error = rtp_to_pri(&rtp, td1);
362  		break;
363  	default:
364  		error = EINVAL;
365  		break;
366  	}
367  	PROC_UNLOCK(p);
368  	return (error);
369  }
370  
371  /*
372   * Set realtime priority.
373   */
374  #ifndef _SYS_SYSPROTO_H_
375  struct rtprio_args {
376  	int		function;
377  	pid_t		pid;
378  	struct rtprio	*rtp;
379  };
380  #endif
381  int
382  sys_rtprio(struct thread *td, struct rtprio_args *uap)
383  {
384  	struct proc *p;
385  	struct thread *tdp;
386  	struct rtprio rtp;
387  	int cierror, error;
388  
389  	/* Perform copyin before acquiring locks if needed. */
390  	if (uap->function == RTP_SET)
391  		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
392  	else
393  		cierror = 0;
394  
395  	if (uap->pid == 0) {
396  		p = td->td_proc;
397  		PROC_LOCK(p);
398  	} else {
399  		p = pfind(uap->pid);
400  		if (p == NULL)
401  			return (ESRCH);
402  	}
403  
404  	switch (uap->function) {
405  	case RTP_LOOKUP:
406  		if ((error = p_cansee(td, p)))
407  			break;
408  		/*
409  		 * Return OUR priority if no pid specified,
410  		 * or if one is, report the highest priority
411  		 * in the process.  There isn't much more you can do as
412  		 * there is only room to return a single priority.
413  		 * Note: specifying our own pid is not the same
414  		 * as leaving it zero.
415  		 */
416  		if (uap->pid == 0) {
417  			pri_to_rtp(td, &rtp);
418  		} else {
419  			struct rtprio rtp2;
420  
421  			rtp.type = RTP_PRIO_IDLE;
422  			rtp.prio = RTP_PRIO_MAX;
423  			FOREACH_THREAD_IN_PROC(p, tdp) {
424  				pri_to_rtp(tdp, &rtp2);
425  				if (rtp2.type <  rtp.type ||
426  				    (rtp2.type == rtp.type &&
427  				    rtp2.prio < rtp.prio)) {
428  					rtp.type = rtp2.type;
429  					rtp.prio = rtp2.prio;
430  				}
431  			}
432  		}
433  		PROC_UNLOCK(p);
434  		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
435  	case RTP_SET:
436  		if ((error = p_cansched(td, p)) || (error = cierror))
437  			break;
438  
439  		/*
440  		 * Disallow setting rtprio in most cases if not superuser.
441  		 * See the comment in sys_rtprio_thread about idprio
442  		 * threads holding a lock.
443  		 */
444  		if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME &&
445  		    (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0)
446  			break;
447  		if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
448  		    unprivileged_idprio == 0 &&
449  		    (error = priv_check(td, PRIV_SCHED_IDPRIO)) != 0)
450  			break;
451  
452  		/*
453  		 * If we are setting our own priority, set just our
454  		 * thread but if we are doing another process,
455  		 * do all the threads on that process. If we
456  		 * specify our own pid we do the latter.
457  		 */
458  		if (uap->pid == 0) {
459  			error = rtp_to_pri(&rtp, td);
460  		} else {
461  			FOREACH_THREAD_IN_PROC(p, td) {
462  				if ((error = rtp_to_pri(&rtp, td)) != 0)
463  					break;
464  			}
465  		}
466  		break;
467  	default:
468  		error = EINVAL;
469  		break;
470  	}
471  	PROC_UNLOCK(p);
472  	return (error);
473  }
474  
475  int
476  rtp_to_pri(struct rtprio *rtp, struct thread *td)
477  {
478  	u_char  newpri, oldclass, oldpri;
479  
480  	switch (RTP_PRIO_BASE(rtp->type)) {
481  	case RTP_PRIO_REALTIME:
482  		if (rtp->prio > RTP_PRIO_MAX)
483  			return (EINVAL);
484  		newpri = PRI_MIN_REALTIME + rtp->prio;
485  		break;
486  	case RTP_PRIO_NORMAL:
487  		if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE))
488  			return (EINVAL);
489  		newpri = PRI_MIN_TIMESHARE + rtp->prio;
490  		break;
491  	case RTP_PRIO_IDLE:
492  		if (rtp->prio > RTP_PRIO_MAX)
493  			return (EINVAL);
494  		newpri = PRI_MIN_IDLE + rtp->prio;
495  		break;
496  	default:
497  		return (EINVAL);
498  	}
499  
500  	thread_lock(td);
501  	oldclass = td->td_pri_class;
502  	sched_class(td, rtp->type);	/* XXX fix */
503  	oldpri = td->td_user_pri;
504  	sched_user_prio(td, newpri);
505  	if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL ||
506  	    td->td_pri_class != RTP_PRIO_NORMAL))
507  		sched_prio(td, td->td_user_pri);
508  	if (TD_ON_UPILOCK(td) && oldpri != newpri) {
509  		critical_enter();
510  		thread_unlock(td);
511  		umtx_pi_adjust(td, oldpri);
512  		critical_exit();
513  	} else
514  		thread_unlock(td);
515  	return (0);
516  }
517  
518  void
519  pri_to_rtp(struct thread *td, struct rtprio *rtp)
520  {
521  
522  	thread_lock(td);
523  	switch (PRI_BASE(td->td_pri_class)) {
524  	case PRI_REALTIME:
525  		rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME;
526  		break;
527  	case PRI_TIMESHARE:
528  		rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE;
529  		break;
530  	case PRI_IDLE:
531  		rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE;
532  		break;
533  	default:
534  		break;
535  	}
536  	rtp->type = td->td_pri_class;
537  	thread_unlock(td);
538  }
539  
540  #if defined(COMPAT_43)
541  #ifndef _SYS_SYSPROTO_H_
542  struct osetrlimit_args {
543  	u_int	which;
544  	struct	orlimit *rlp;
545  };
546  #endif
547  int
548  osetrlimit(struct thread *td, struct osetrlimit_args *uap)
549  {
550  	struct orlimit olim;
551  	struct rlimit lim;
552  	int error;
553  
554  	if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
555  		return (error);
556  	lim.rlim_cur = olim.rlim_cur;
557  	lim.rlim_max = olim.rlim_max;
558  	error = kern_setrlimit(td, uap->which, &lim);
559  	return (error);
560  }
561  
562  #ifndef _SYS_SYSPROTO_H_
563  struct ogetrlimit_args {
564  	u_int	which;
565  	struct	orlimit *rlp;
566  };
567  #endif
568  int
569  ogetrlimit(struct thread *td, struct ogetrlimit_args *uap)
570  {
571  	struct orlimit olim;
572  	struct rlimit rl;
573  	int error;
574  
575  	if (uap->which >= RLIM_NLIMITS)
576  		return (EINVAL);
577  	lim_rlimit(td, uap->which, &rl);
578  
579  	/*
580  	 * XXX would be more correct to convert only RLIM_INFINITY to the
581  	 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
582  	 * values.  Most 64->32 and 32->16 conversions, including not
583  	 * unimportant ones of uids are even more broken than what we
584  	 * do here (they blindly truncate).  We don't do this correctly
585  	 * here since we have little experience with EOVERFLOW yet.
586  	 * Elsewhere, getuid() can't fail...
587  	 */
588  	olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
589  	olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
590  	error = copyout(&olim, uap->rlp, sizeof(olim));
591  	return (error);
592  }
593  #endif /* COMPAT_43 */
594  
595  #ifndef _SYS_SYSPROTO_H_
596  struct setrlimit_args {
597  	u_int	which;
598  	struct	rlimit *rlp;
599  };
600  #endif
601  int
602  sys_setrlimit(struct thread *td, struct setrlimit_args *uap)
603  {
604  	struct rlimit alim;
605  	int error;
606  
607  	if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
608  		return (error);
609  	error = kern_setrlimit(td, uap->which, &alim);
610  	return (error);
611  }
612  
613  static void
614  lim_cb(void *arg)
615  {
616  	struct rlimit rlim;
617  	struct thread *td;
618  	struct proc *p;
619  
620  	p = arg;
621  	PROC_LOCK_ASSERT(p, MA_OWNED);
622  	/*
623  	 * Check if the process exceeds its cpu resource allocation.  If
624  	 * it reaches the max, arrange to kill the process in ast().
625  	 */
626  	if (p->p_cpulimit == RLIM_INFINITY)
627  		return;
628  	PROC_STATLOCK(p);
629  	FOREACH_THREAD_IN_PROC(p, td) {
630  		ruxagg(p, td);
631  	}
632  	PROC_STATUNLOCK(p);
633  	if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
634  		lim_rlimit_proc(p, RLIMIT_CPU, &rlim);
635  		if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
636  			killproc(p, "exceeded maximum CPU limit");
637  		} else {
638  			if (p->p_cpulimit < rlim.rlim_max)
639  				p->p_cpulimit += 5;
640  			kern_psignal(p, SIGXCPU);
641  		}
642  	}
643  	if ((p->p_flag & P_WEXIT) == 0)
644  		callout_reset_sbt(&p->p_limco, SBT_1S, 0,
645  		    lim_cb, p, C_PREL(1));
646  }
647  
648  int
649  kern_setrlimit(struct thread *td, u_int which, struct rlimit *limp)
650  {
651  
652  	return (kern_proc_setrlimit(td, td->td_proc, which, limp));
653  }
654  
655  int
656  kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
657      struct rlimit *limp)
658  {
659  	struct plimit *newlim, *oldlim, *oldlim_td;
660  	struct rlimit *alimp;
661  	struct rlimit oldssiz;
662  	int error;
663  
664  	if (which >= RLIM_NLIMITS)
665  		return (EINVAL);
666  
667  	/*
668  	 * Preserve historical bugs by treating negative limits as unsigned.
669  	 */
670  	if (limp->rlim_cur < 0)
671  		limp->rlim_cur = RLIM_INFINITY;
672  	if (limp->rlim_max < 0)
673  		limp->rlim_max = RLIM_INFINITY;
674  
675  	oldssiz.rlim_cur = 0;
676  	newlim = lim_alloc();
677  	PROC_LOCK(p);
678  	oldlim = p->p_limit;
679  	alimp = &oldlim->pl_rlimit[which];
680  	if (limp->rlim_cur > alimp->rlim_max ||
681  	    limp->rlim_max > alimp->rlim_max)
682  		if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) {
683  			PROC_UNLOCK(p);
684  			lim_free(newlim);
685  			return (error);
686  		}
687  	if (limp->rlim_cur > limp->rlim_max)
688  		limp->rlim_cur = limp->rlim_max;
689  	lim_copy(newlim, oldlim);
690  	alimp = &newlim->pl_rlimit[which];
691  
692  	switch (which) {
693  	case RLIMIT_CPU:
694  		if (limp->rlim_cur != RLIM_INFINITY &&
695  		    p->p_cpulimit == RLIM_INFINITY)
696  			callout_reset_sbt(&p->p_limco, SBT_1S, 0,
697  			    lim_cb, p, C_PREL(1));
698  		p->p_cpulimit = limp->rlim_cur;
699  		break;
700  	case RLIMIT_DATA:
701  		if (limp->rlim_cur > maxdsiz)
702  			limp->rlim_cur = maxdsiz;
703  		if (limp->rlim_max > maxdsiz)
704  			limp->rlim_max = maxdsiz;
705  		break;
706  
707  	case RLIMIT_STACK:
708  		if (limp->rlim_cur > maxssiz)
709  			limp->rlim_cur = maxssiz;
710  		if (limp->rlim_max > maxssiz)
711  			limp->rlim_max = maxssiz;
712  		oldssiz = *alimp;
713  		if (p->p_sysent->sv_fixlimit != NULL)
714  			p->p_sysent->sv_fixlimit(&oldssiz,
715  			    RLIMIT_STACK);
716  		break;
717  
718  	case RLIMIT_NOFILE:
719  		if (limp->rlim_cur > maxfilesperproc)
720  			limp->rlim_cur = maxfilesperproc;
721  		if (limp->rlim_max > maxfilesperproc)
722  			limp->rlim_max = maxfilesperproc;
723  		break;
724  
725  	case RLIMIT_NPROC:
726  		if (limp->rlim_cur > maxprocperuid)
727  			limp->rlim_cur = maxprocperuid;
728  		if (limp->rlim_max > maxprocperuid)
729  			limp->rlim_max = maxprocperuid;
730  		if (limp->rlim_cur < 1)
731  			limp->rlim_cur = 1;
732  		if (limp->rlim_max < 1)
733  			limp->rlim_max = 1;
734  		break;
735  	}
736  	if (p->p_sysent->sv_fixlimit != NULL)
737  		p->p_sysent->sv_fixlimit(limp, which);
738  	*alimp = *limp;
739  	p->p_limit = newlim;
740  	PROC_UPDATE_COW(p);
741  	oldlim_td = NULL;
742  	if (td == curthread && PROC_COW_CHANGECOUNT(td, p) == 1) {
743  		oldlim_td = lim_cowsync();
744  		thread_cow_synced(td);
745  	}
746  	PROC_UNLOCK(p);
747  	if (oldlim_td != NULL) {
748  		MPASS(oldlim_td == oldlim);
749  		lim_freen(oldlim, 2);
750  	} else {
751  		lim_free(oldlim);
752  	}
753  
754  	if (which == RLIMIT_STACK &&
755  	    /*
756  	     * Skip calls from exec_new_vmspace(), done when stack is
757  	     * not mapped yet.
758  	     */
759  	    (td != curthread || (p->p_flag & P_INEXEC) == 0)) {
760  		/*
761  		 * Stack is allocated to the max at exec time with only
762  		 * "rlim_cur" bytes accessible.  If stack limit is going
763  		 * up make more accessible, if going down make inaccessible.
764  		 */
765  		if (limp->rlim_cur != oldssiz.rlim_cur) {
766  			vm_offset_t addr;
767  			vm_size_t size;
768  			vm_prot_t prot;
769  
770  			if (limp->rlim_cur > oldssiz.rlim_cur) {
771  				prot = p->p_sysent->sv_stackprot;
772  				size = limp->rlim_cur - oldssiz.rlim_cur;
773  				addr = round_page(p->p_vmspace->vm_stacktop) -
774  				    limp->rlim_cur;
775  			} else {
776  				prot = VM_PROT_NONE;
777  				size = oldssiz.rlim_cur - limp->rlim_cur;
778  				addr = round_page(p->p_vmspace->vm_stacktop) -
779  				    oldssiz.rlim_cur;
780  			}
781  			addr = trunc_page(addr);
782  			size = round_page(size);
783  			(void)vm_map_protect(&p->p_vmspace->vm_map,
784  			    addr, addr + size, prot, 0,
785  			    VM_MAP_PROTECT_SET_PROT);
786  		}
787  	}
788  
789  	return (0);
790  }
791  
792  #ifndef _SYS_SYSPROTO_H_
793  struct getrlimit_args {
794  	u_int	which;
795  	struct	rlimit *rlp;
796  };
797  #endif
798  /* ARGSUSED */
799  int
800  sys_getrlimit(struct thread *td, struct getrlimit_args *uap)
801  {
802  	struct rlimit rlim;
803  	int error;
804  
805  	if (uap->which >= RLIM_NLIMITS)
806  		return (EINVAL);
807  	lim_rlimit(td, uap->which, &rlim);
808  	error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
809  	return (error);
810  }
811  
812  /*
813   * Transform the running time and tick information for children of proc p
814   * into user and system time usage.
815   */
816  void
817  calccru(struct proc *p, struct timeval *up, struct timeval *sp)
818  {
819  
820  	PROC_LOCK_ASSERT(p, MA_OWNED);
821  	calcru1(p, &p->p_crux, up, sp);
822  }
823  
824  /*
825   * Transform the running time and tick information in proc p into user
826   * and system time usage.  If appropriate, include the current time slice
827   * on this CPU.
828   */
829  void
830  calcru(struct proc *p, struct timeval *up, struct timeval *sp)
831  {
832  	struct thread *td;
833  	uint64_t runtime, u;
834  
835  	PROC_LOCK_ASSERT(p, MA_OWNED);
836  	PROC_STATLOCK_ASSERT(p, MA_OWNED);
837  	/*
838  	 * If we are getting stats for the current process, then add in the
839  	 * stats that this thread has accumulated in its current time slice.
840  	 * We reset the thread and CPU state as if we had performed a context
841  	 * switch right here.
842  	 */
843  	td = curthread;
844  	if (td->td_proc == p) {
845  		u = cpu_ticks();
846  		runtime = u - PCPU_GET(switchtime);
847  		td->td_runtime += runtime;
848  		td->td_incruntime += runtime;
849  		PCPU_SET(switchtime, u);
850  	}
851  	/* Make sure the per-thread stats are current. */
852  	FOREACH_THREAD_IN_PROC(p, td) {
853  		if (td->td_incruntime == 0)
854  			continue;
855  		ruxagg(p, td);
856  	}
857  	calcru1(p, &p->p_rux, up, sp);
858  }
859  
860  /* Collect resource usage for a single thread. */
861  void
862  rufetchtd(struct thread *td, struct rusage *ru)
863  {
864  	struct proc *p;
865  	uint64_t runtime, u;
866  
867  	p = td->td_proc;
868  	PROC_STATLOCK_ASSERT(p, MA_OWNED);
869  	THREAD_LOCK_ASSERT(td, MA_OWNED);
870  	/*
871  	 * If we are getting stats for the current thread, then add in the
872  	 * stats that this thread has accumulated in its current time slice.
873  	 * We reset the thread and CPU state as if we had performed a context
874  	 * switch right here.
875  	 */
876  	if (td == curthread) {
877  		u = cpu_ticks();
878  		runtime = u - PCPU_GET(switchtime);
879  		td->td_runtime += runtime;
880  		td->td_incruntime += runtime;
881  		PCPU_SET(switchtime, u);
882  	}
883  	ruxagg_locked(p, td);
884  	*ru = td->td_ru;
885  	calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);
886  }
887  
888  /* XXX: the MI version is too slow to use: */
889  #ifndef __HAVE_INLINE_FLSLL
890  #define	flsll(x)	(fls((x) >> 32) != 0 ? fls((x) >> 32) + 32 : fls(x))
891  #endif
892  
893  static uint64_t
894  mul64_by_fraction(uint64_t a, uint64_t b, uint64_t c)
895  {
896  	uint64_t acc, bh, bl;
897  	int i, s, sa, sb;
898  
899  	/*
900  	 * Calculate (a * b) / c accurately enough without overflowing.  c
901  	 * must be nonzero, and its top bit must be 0.  a or b must be
902  	 * <= c, and the implementation is tuned for b <= c.
903  	 *
904  	 * The comments about times are for use in calcru1() with units of
905  	 * microseconds for 'a' and stathz ticks at 128 Hz for b and c.
906  	 *
907  	 * Let n be the number of top zero bits in c.  Each iteration
908  	 * either returns, or reduces b by right shifting it by at least n.
909  	 * The number of iterations is at most 1 + 64 / n, and the error is
910  	 * at most the number of iterations.
911  	 *
912  	 * It is very unusual to need even 2 iterations.  Previous
913  	 * implementations overflowed essentially by returning early in the
914  	 * first iteration, with n = 38 giving overflow at 105+ hours and
915  	 * n = 32 giving overlow at at 388+ days despite a more careful
916  	 * calculation.  388 days is a reasonable uptime, and the calculation
917  	 * needs to work for the uptime times the number of CPUs since 'a'
918  	 * is per-process.
919  	 */
920  	if (a >= (uint64_t)1 << 63)
921  		return (0);		/* Unsupported arg -- can't happen. */
922  	acc = 0;
923  	for (i = 0; i < 128; i++) {
924  		sa = flsll(a);
925  		sb = flsll(b);
926  		if (sa + sb <= 64)
927  			/* Up to 105 hours on first iteration. */
928  			return (acc + (a * b) / c);
929  		if (a >= c) {
930  			/*
931  			 * This reduction is based on a = q * c + r, with the
932  			 * remainder r < c.  'a' may be large to start, and
933  			 * moving bits from b into 'a' at the end of the loop
934  			 * sets the top bit of 'a', so the reduction makes
935  			 * significant progress.
936  			 */
937  			acc += (a / c) * b;
938  			a %= c;
939  			sa = flsll(a);
940  			if (sa + sb <= 64)
941  				/* Up to 388 days on first iteration. */
942  				return (acc + (a * b) / c);
943  		}
944  
945  		/*
946  		 * This step writes a * b as a * ((bh << s) + bl) =
947  		 * a * (bh << s) + a * bl = (a << s) * bh + a * bl.  The 2
948  		 * additive terms are handled separately.  Splitting in
949  		 * this way is linear except for rounding errors.
950  		 *
951  		 * s = 64 - sa is the maximum such that a << s fits in 64
952  		 * bits.  Since a < c and c has at least 1 zero top bit,
953  		 * sa < 64 and s > 0.  Thus this step makes progress by
954  		 * reducing b (it increases 'a', but taking remainders on
955  		 * the next iteration completes the reduction).
956  		 *
957  		 * Finally, the choice for s is just what is needed to keep
958  		 * a * bl from overflowing, so we don't need complications
959  		 * like a recursive call mul64_by_fraction(a, bl, c) to
960  		 * handle the second additive term.
961  		 */
962  		s = 64 - sa;
963  		bh = b >> s;
964  		bl = b - (bh << s);
965  		acc += (a * bl) / c;
966  		a <<= s;
967  		b = bh;
968  	}
969  	return (0);		/* Algorithm failure -- can't happen. */
970  }
971  
972  static void
973  calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up,
974      struct timeval *sp)
975  {
976  	/* {user, system, interrupt, total} {ticks, usec}: */
977  	uint64_t ut, uu, st, su, it, tt, tu;
978  
979  	ut = ruxp->rux_uticks;
980  	st = ruxp->rux_sticks;
981  	it = ruxp->rux_iticks;
982  	tt = ut + st + it;
983  	if (tt == 0) {
984  		/* Avoid divide by zero */
985  		st = 1;
986  		tt = 1;
987  	}
988  	tu = cputick2usec(ruxp->rux_runtime);
989  	if ((int64_t)tu < 0) {
990  		/* XXX: this should be an assert /phk */
991  		printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
992  		    (intmax_t)tu, p->p_pid, p->p_comm);
993  		tu = ruxp->rux_tu;
994  	}
995  
996  	/* Subdivide tu.  Avoid overflow in the multiplications. */
997  	if (__predict_true(tu <= ((uint64_t)1 << 38) && tt <= (1 << 26))) {
998  		/* Up to 76 hours when stathz is 128. */
999  		uu = (tu * ut) / tt;
1000  		su = (tu * st) / tt;
1001  	} else {
1002  		uu = mul64_by_fraction(tu, ut, tt);
1003  		su = mul64_by_fraction(tu, st, tt);
1004  	}
1005  
1006  	if (tu >= ruxp->rux_tu) {
1007  		/*
1008  		 * The normal case, time increased.
1009  		 * Enforce monotonicity of bucketed numbers.
1010  		 */
1011  		if (uu < ruxp->rux_uu)
1012  			uu = ruxp->rux_uu;
1013  		if (su < ruxp->rux_su)
1014  			su = ruxp->rux_su;
1015  	} else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) {
1016  		/*
1017  		 * When we calibrate the cputicker, it is not uncommon to
1018  		 * see the presumably fixed frequency increase slightly over
1019  		 * time as a result of thermal stabilization and NTP
1020  		 * discipline (of the reference clock).  We therefore ignore
1021  		 * a bit of backwards slop because we  expect to catch up
1022  		 * shortly.  We use a 3 microsecond limit to catch low
1023  		 * counts and a 1% limit for high counts.
1024  		 */
1025  		uu = ruxp->rux_uu;
1026  		su = ruxp->rux_su;
1027  		tu = ruxp->rux_tu;
1028  	} else if (vm_guest == VM_GUEST_NO) {  /* tu < ruxp->rux_tu */
1029  		/*
1030  		 * What happened here was likely that a laptop, which ran at
1031  		 * a reduced clock frequency at boot, kicked into high gear.
1032  		 * The wisdom of spamming this message in that case is
1033  		 * dubious, but it might also be indicative of something
1034  		 * serious, so lets keep it and hope laptops can be made
1035  		 * more truthful about their CPU speed via ACPI.
1036  		 */
1037  		printf("calcru: runtime went backwards from %ju usec "
1038  		    "to %ju usec for pid %d (%s)\n",
1039  		    (uintmax_t)ruxp->rux_tu, (uintmax_t)tu,
1040  		    p->p_pid, p->p_comm);
1041  	}
1042  
1043  	ruxp->rux_uu = uu;
1044  	ruxp->rux_su = su;
1045  	ruxp->rux_tu = tu;
1046  
1047  	up->tv_sec = uu / 1000000;
1048  	up->tv_usec = uu % 1000000;
1049  	sp->tv_sec = su / 1000000;
1050  	sp->tv_usec = su % 1000000;
1051  }
1052  
1053  #ifndef _SYS_SYSPROTO_H_
1054  struct getrusage_args {
1055  	int	who;
1056  	struct	rusage *rusage;
1057  };
1058  #endif
1059  int
1060  sys_getrusage(struct thread *td, struct getrusage_args *uap)
1061  {
1062  	struct rusage ru;
1063  	int error;
1064  
1065  	error = kern_getrusage(td, uap->who, &ru);
1066  	if (error == 0)
1067  		error = copyout(&ru, uap->rusage, sizeof(struct rusage));
1068  	return (error);
1069  }
1070  
1071  int
1072  kern_getrusage(struct thread *td, int who, struct rusage *rup)
1073  {
1074  	struct proc *p;
1075  	int error;
1076  
1077  	error = 0;
1078  	p = td->td_proc;
1079  	PROC_LOCK(p);
1080  	switch (who) {
1081  	case RUSAGE_SELF:
1082  		rufetchcalc(p, rup, &rup->ru_utime,
1083  		    &rup->ru_stime);
1084  		break;
1085  
1086  	case RUSAGE_CHILDREN:
1087  		*rup = p->p_stats->p_cru;
1088  		calccru(p, &rup->ru_utime, &rup->ru_stime);
1089  		break;
1090  
1091  	case RUSAGE_THREAD:
1092  		PROC_STATLOCK(p);
1093  		thread_lock(td);
1094  		rufetchtd(td, rup);
1095  		thread_unlock(td);
1096  		PROC_STATUNLOCK(p);
1097  		break;
1098  
1099  	default:
1100  		error = EINVAL;
1101  	}
1102  	PROC_UNLOCK(p);
1103  	return (error);
1104  }
1105  
1106  void
1107  rucollect(struct rusage *ru, struct rusage *ru2)
1108  {
1109  	long *ip, *ip2;
1110  	int i;
1111  
1112  	if (ru->ru_maxrss < ru2->ru_maxrss)
1113  		ru->ru_maxrss = ru2->ru_maxrss;
1114  	ip = &ru->ru_first;
1115  	ip2 = &ru2->ru_first;
1116  	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
1117  		*ip++ += *ip2++;
1118  }
1119  
1120  void
1121  ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
1122      struct rusage_ext *rux2)
1123  {
1124  
1125  	rux->rux_runtime += rux2->rux_runtime;
1126  	rux->rux_uticks += rux2->rux_uticks;
1127  	rux->rux_sticks += rux2->rux_sticks;
1128  	rux->rux_iticks += rux2->rux_iticks;
1129  	rux->rux_uu += rux2->rux_uu;
1130  	rux->rux_su += rux2->rux_su;
1131  	rux->rux_tu += rux2->rux_tu;
1132  	rucollect(ru, ru2);
1133  }
1134  
1135  /*
1136   * Aggregate tick counts into the proc's rusage_ext.
1137   */
1138  static void
1139  ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td)
1140  {
1141  
1142  	rux->rux_runtime += td->td_incruntime;
1143  	rux->rux_uticks += td->td_uticks;
1144  	rux->rux_sticks += td->td_sticks;
1145  	rux->rux_iticks += td->td_iticks;
1146  }
1147  
1148  void
1149  ruxagg_locked(struct proc *p, struct thread *td)
1150  {
1151  	THREAD_LOCK_ASSERT(td, MA_OWNED);
1152  	PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
1153  
1154  	ruxagg_ext_locked(&p->p_rux, td);
1155  	ruxagg_ext_locked(&td->td_rux, td);
1156  	td->td_incruntime = 0;
1157  	td->td_uticks = 0;
1158  	td->td_iticks = 0;
1159  	td->td_sticks = 0;
1160  }
1161  
1162  void
1163  ruxagg(struct proc *p, struct thread *td)
1164  {
1165  
1166  	thread_lock(td);
1167  	ruxagg_locked(p, td);
1168  	thread_unlock(td);
1169  }
1170  
1171  /*
1172   * Update the rusage_ext structure and fetch a valid aggregate rusage
1173   * for proc p if storage for one is supplied.
1174   */
1175  void
1176  rufetch(struct proc *p, struct rusage *ru)
1177  {
1178  	struct thread *td;
1179  
1180  	PROC_STATLOCK_ASSERT(p, MA_OWNED);
1181  
1182  	*ru = p->p_ru;
1183  	if (p->p_numthreads > 0)  {
1184  		FOREACH_THREAD_IN_PROC(p, td) {
1185  			ruxagg(p, td);
1186  			rucollect(ru, &td->td_ru);
1187  		}
1188  	}
1189  }
1190  
1191  /*
1192   * Atomically perform a rufetch and a calcru together.
1193   * Consumers, can safely assume the calcru is executed only once
1194   * rufetch is completed.
1195   */
1196  void
1197  rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
1198      struct timeval *sp)
1199  {
1200  
1201  	PROC_STATLOCK(p);
1202  	rufetch(p, ru);
1203  	calcru(p, up, sp);
1204  	PROC_STATUNLOCK(p);
1205  }
1206  
1207  /*
1208   * Allocate a new resource limits structure and initialize its
1209   * reference count and mutex pointer.
1210   */
1211  struct plimit *
1212  lim_alloc()
1213  {
1214  	struct plimit *limp;
1215  
1216  	limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK);
1217  	refcount_init(&limp->pl_refcnt, 1);
1218  	return (limp);
1219  }
1220  
1221  struct plimit *
1222  lim_hold(struct plimit *limp)
1223  {
1224  
1225  	refcount_acquire(&limp->pl_refcnt);
1226  	return (limp);
1227  }
1228  
1229  struct plimit *
1230  lim_cowsync(void)
1231  {
1232  	struct thread *td;
1233  	struct proc *p;
1234  	struct plimit *oldlimit;
1235  
1236  	td = curthread;
1237  	p = td->td_proc;
1238  	PROC_LOCK_ASSERT(p, MA_OWNED);
1239  
1240  	if (td->td_limit == p->p_limit)
1241  		return (NULL);
1242  
1243  	oldlimit = td->td_limit;
1244  	td->td_limit = lim_hold(p->p_limit);
1245  
1246  	return (oldlimit);
1247  }
1248  
1249  void
1250  lim_fork(struct proc *p1, struct proc *p2)
1251  {
1252  
1253  	PROC_LOCK_ASSERT(p1, MA_OWNED);
1254  	PROC_LOCK_ASSERT(p2, MA_OWNED);
1255  
1256  	p2->p_limit = lim_hold(p1->p_limit);
1257  	callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0);
1258  	if (p1->p_cpulimit != RLIM_INFINITY)
1259  		callout_reset_sbt(&p2->p_limco, SBT_1S, 0,
1260  		    lim_cb, p2, C_PREL(1));
1261  }
1262  
1263  void
1264  lim_free(struct plimit *limp)
1265  {
1266  
1267  	if (refcount_release(&limp->pl_refcnt))
1268  		free((void *)limp, M_PLIMIT);
1269  }
1270  
1271  void
1272  lim_freen(struct plimit *limp, int n)
1273  {
1274  
1275  	if (refcount_releasen(&limp->pl_refcnt, n))
1276  		free((void *)limp, M_PLIMIT);
1277  }
1278  
1279  /*
1280   * Make a copy of the plimit structure.
1281   * We share these structures copy-on-write after fork.
1282   */
1283  void
1284  lim_copy(struct plimit *dst, struct plimit *src)
1285  {
1286  
1287  	KASSERT(dst->pl_refcnt <= 1, ("lim_copy to shared limit"));
1288  	bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
1289  }
1290  
1291  /*
1292   * Return the hard limit for a particular system resource.  The
1293   * which parameter specifies the index into the rlimit array.
1294   */
1295  rlim_t
1296  lim_max(struct thread *td, int which)
1297  {
1298  	struct rlimit rl;
1299  
1300  	lim_rlimit(td, which, &rl);
1301  	return (rl.rlim_max);
1302  }
1303  
1304  rlim_t
1305  lim_max_proc(struct proc *p, int which)
1306  {
1307  	struct rlimit rl;
1308  
1309  	lim_rlimit_proc(p, which, &rl);
1310  	return (rl.rlim_max);
1311  }
1312  
1313  /*
1314   * Return the current (soft) limit for a particular system resource.
1315   * The which parameter which specifies the index into the rlimit array
1316   */
1317  rlim_t
1318  (lim_cur)(struct thread *td, int which)
1319  {
1320  	struct rlimit rl;
1321  
1322  	lim_rlimit(td, which, &rl);
1323  	return (rl.rlim_cur);
1324  }
1325  
1326  rlim_t
1327  lim_cur_proc(struct proc *p, int which)
1328  {
1329  	struct rlimit rl;
1330  
1331  	lim_rlimit_proc(p, which, &rl);
1332  	return (rl.rlim_cur);
1333  }
1334  
1335  /*
1336   * Return a copy of the entire rlimit structure for the system limit
1337   * specified by 'which' in the rlimit structure pointed to by 'rlp'.
1338   */
1339  void
1340  lim_rlimit(struct thread *td, int which, struct rlimit *rlp)
1341  {
1342  	struct proc *p = td->td_proc;
1343  
1344  	MPASS(td == curthread);
1345  	KASSERT(which >= 0 && which < RLIM_NLIMITS,
1346  	    ("request for invalid resource limit"));
1347  	*rlp = td->td_limit->pl_rlimit[which];
1348  	if (p->p_sysent->sv_fixlimit != NULL)
1349  		p->p_sysent->sv_fixlimit(rlp, which);
1350  }
1351  
1352  void
1353  lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp)
1354  {
1355  
1356  	PROC_LOCK_ASSERT(p, MA_OWNED);
1357  	KASSERT(which >= 0 && which < RLIM_NLIMITS,
1358  	    ("request for invalid resource limit"));
1359  	*rlp = p->p_limit->pl_rlimit[which];
1360  	if (p->p_sysent->sv_fixlimit != NULL)
1361  		p->p_sysent->sv_fixlimit(rlp, which);
1362  }
1363  
1364  void
1365  uihashinit()
1366  {
1367  
1368  	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
1369  	rw_init(&uihashtbl_lock, "uidinfo hash");
1370  }
1371  
1372  /*
1373   * Look up a uidinfo struct for the parameter uid.
1374   * uihashtbl_lock must be locked.
1375   * Increase refcount on uidinfo struct returned.
1376   */
1377  static struct uidinfo *
1378  uilookup(uid_t uid)
1379  {
1380  	struct uihashhead *uipp;
1381  	struct uidinfo *uip;
1382  
1383  	rw_assert(&uihashtbl_lock, RA_LOCKED);
1384  	uipp = UIHASH(uid);
1385  	LIST_FOREACH(uip, uipp, ui_hash)
1386  		if (uip->ui_uid == uid) {
1387  			uihold(uip);
1388  			break;
1389  		}
1390  
1391  	return (uip);
1392  }
1393  
1394  /*
1395   * Find or allocate a struct uidinfo for a particular uid.
1396   * Returns with uidinfo struct referenced.
1397   * uifree() should be called on a struct uidinfo when released.
1398   */
1399  struct uidinfo *
1400  uifind(uid_t uid)
1401  {
1402  	struct uidinfo *new_uip, *uip;
1403  	struct ucred *cred;
1404  
1405  	cred = curthread->td_ucred;
1406  	if (cred->cr_uidinfo->ui_uid == uid) {
1407  		uip = cred->cr_uidinfo;
1408  		uihold(uip);
1409  		return (uip);
1410  	} else if (cred->cr_ruidinfo->ui_uid == uid) {
1411  		uip = cred->cr_ruidinfo;
1412  		uihold(uip);
1413  		return (uip);
1414  	}
1415  
1416  	rw_rlock(&uihashtbl_lock);
1417  	uip = uilookup(uid);
1418  	rw_runlock(&uihashtbl_lock);
1419  	if (uip != NULL)
1420  		return (uip);
1421  
1422  	new_uip = malloc(sizeof(*new_uip), M_UIDINFO, M_WAITOK | M_ZERO);
1423  	racct_create(&new_uip->ui_racct);
1424  	refcount_init(&new_uip->ui_ref, 1);
1425  	new_uip->ui_uid = uid;
1426  
1427  	rw_wlock(&uihashtbl_lock);
1428  	/*
1429  	 * There's a chance someone created our uidinfo while we
1430  	 * were in malloc and not holding the lock, so we have to
1431  	 * make sure we don't insert a duplicate uidinfo.
1432  	 */
1433  	if ((uip = uilookup(uid)) == NULL) {
1434  		LIST_INSERT_HEAD(UIHASH(uid), new_uip, ui_hash);
1435  		rw_wunlock(&uihashtbl_lock);
1436  		uip = new_uip;
1437  	} else {
1438  		rw_wunlock(&uihashtbl_lock);
1439  		racct_destroy(&new_uip->ui_racct);
1440  		free(new_uip, M_UIDINFO);
1441  	}
1442  	return (uip);
1443  }
1444  
1445  /*
1446   * Place another refcount on a uidinfo struct.
1447   */
1448  void
1449  uihold(struct uidinfo *uip)
1450  {
1451  
1452  	refcount_acquire(&uip->ui_ref);
1453  }
1454  
1455  /*-
1456   * Since uidinfo structs have a long lifetime, we use an
1457   * opportunistic refcounting scheme to avoid locking the lookup hash
1458   * for each release.
1459   *
1460   * If the refcount hits 0, we need to free the structure,
1461   * which means we need to lock the hash.
1462   * Optimal case:
1463   *   After locking the struct and lowering the refcount, if we find
1464   *   that we don't need to free, simply unlock and return.
1465   * Suboptimal case:
1466   *   If refcount lowering results in need to free, bump the count
1467   *   back up, lose the lock and acquire the locks in the proper
1468   *   order to try again.
1469   */
1470  void
1471  uifree(struct uidinfo *uip)
1472  {
1473  
1474  	if (refcount_release_if_not_last(&uip->ui_ref))
1475  		return;
1476  
1477  	rw_wlock(&uihashtbl_lock);
1478  	if (refcount_release(&uip->ui_ref) == 0) {
1479  		rw_wunlock(&uihashtbl_lock);
1480  		return;
1481  	}
1482  
1483  	racct_destroy(&uip->ui_racct);
1484  	LIST_REMOVE(uip, ui_hash);
1485  	rw_wunlock(&uihashtbl_lock);
1486  
1487  	if (uip->ui_sbsize != 0)
1488  		printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
1489  		    uip->ui_uid, uip->ui_sbsize);
1490  	if (uip->ui_proccnt != 0)
1491  		printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1492  		    uip->ui_uid, uip->ui_proccnt);
1493  	if (uip->ui_vmsize != 0)
1494  		printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
1495  		    uip->ui_uid, (unsigned long long)uip->ui_vmsize);
1496  	free(uip, M_UIDINFO);
1497  }
1498  
1499  #ifdef RACCT
1500  void
1501  ui_racct_foreach(void (*callback)(struct racct *racct,
1502      void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
1503      void *arg2, void *arg3)
1504  {
1505  	struct uidinfo *uip;
1506  	struct uihashhead *uih;
1507  
1508  	rw_rlock(&uihashtbl_lock);
1509  	if (pre != NULL)
1510  		(pre)();
1511  	for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
1512  		LIST_FOREACH(uip, uih, ui_hash) {
1513  			(callback)(uip->ui_racct, arg2, arg3);
1514  		}
1515  	}
1516  	if (post != NULL)
1517  		(post)();
1518  	rw_runlock(&uihashtbl_lock);
1519  }
1520  #endif
1521  
1522  static inline int
1523  chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name)
1524  {
1525  	long new;
1526  
1527  	/* Don't allow them to exceed max, but allow subtraction. */
1528  	new = atomic_fetchadd_long(limit, (long)diff) + diff;
1529  	if (diff > 0 && max != 0) {
1530  		if (new < 0 || new > max) {
1531  			atomic_subtract_long(limit, (long)diff);
1532  			return (0);
1533  		}
1534  	} else if (new < 0)
1535  		printf("negative %s for uid = %d\n", name, uip->ui_uid);
1536  	return (1);
1537  }
1538  
1539  /*
1540   * Change the count associated with number of processes
1541   * a given user is using.  When 'max' is 0, don't enforce a limit
1542   */
1543  int
1544  chgproccnt(struct uidinfo *uip, int diff, rlim_t max)
1545  {
1546  
1547  	return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt"));
1548  }
1549  
1550  /*
1551   * Change the total socket buffer size a user has used.
1552   */
1553  int
1554  chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max)
1555  {
1556  	int diff, rv;
1557  
1558  	diff = to - *hiwat;
1559  	if (diff > 0 && max == 0) {
1560  		rv = 0;
1561  	} else {
1562  		rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize");
1563  		if (rv != 0)
1564  			*hiwat = to;
1565  	}
1566  	return (rv);
1567  }
1568  
1569  /*
1570   * Change the count associated with number of pseudo-terminals
1571   * a given user is using.  When 'max' is 0, don't enforce a limit
1572   */
1573  int
1574  chgptscnt(struct uidinfo *uip, int diff, rlim_t max)
1575  {
1576  
1577  	return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt"));
1578  }
1579  
1580  int
1581  chgkqcnt(struct uidinfo *uip, int diff, rlim_t max)
1582  {
1583  
1584  	return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt"));
1585  }
1586  
1587  int
1588  chgumtxcnt(struct uidinfo *uip, int diff, rlim_t max)
1589  {
1590  
1591  	return (chglimit(uip, &uip->ui_umtxcnt, diff, max, "umtxcnt"));
1592  }
1593