xref: /illumos-gate/usr/src/uts/common/os/pid.c (revision 82b7b979be13234985096762083f39b829dbd03f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2019 Joyent, Inc.
25  * Copyright 2025 Oxide Computer Company
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved	*/
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/sysmacros.h>
34 #include <sys/proc.h>
35 #include <sys/kmem.h>
36 #include <sys/tuneable.h>
37 #include <sys/var.h>
38 #include <sys/cred.h>
39 #include <sys/systm.h>
40 #include <sys/prsystm.h>
41 #include <sys/vnode.h>
42 #include <sys/session.h>
43 #include <sys/cpuvar.h>
44 #include <sys/cmn_err.h>
45 #include <sys/bitmap.h>
46 #include <sys/debug.h>
47 #include <c2/audit.h>
48 #include <sys/project.h>
49 #include <sys/task.h>
50 #include <sys/zone.h>
51 
52 /* directory entries for /proc */
53 union procent {
54 	proc_t *pe_proc;
55 	union procent *pe_next;
56 };
57 
58 struct pid pid0 = {
59 	0,		/* pid_prinactive */
60 	1,		/* pid_pgorphaned */
61 	0,		/* pid_padding	*/
62 	0,		/* pid_prslot	*/
63 	0,		/* pid_id	*/
64 	NULL,		/* pid_pglink	*/
65 	NULL,		/* pid_pgtail	*/
66 	NULL,		/* pid_link	*/
67 	3		/* pid_ref	*/
68 };
69 
70 static int pid_hashlen = 4;	/* desired average hash chain length */
71 static int pid_hashsz;		/* number of buckets in the hash table */
72 
73 #define	HASHPID(pid)	(pidhash[((pid)&(pid_hashsz-1))])
74 
75 extern uint_t nproc;
76 extern struct kmem_cache *process_cache;
77 static void	upcount_init(void);
78 
79 kmutex_t	pidlock;	/* global process lock */
80 kmutex_t	pr_pidlock;	/* /proc global process lock */
81 kcondvar_t	*pr_pid_cv;	/* for /proc, one per process slot */
82 struct plock	*proc_lock;	/* persistent array of p_lock's */
83 
84 /*
85  * See the comment above pid_getlockslot() for a detailed explanation of this
86  * constant.  Note that a PLOCK_SHIFT of 3 implies 64-byte coherence
87  * granularity; if the coherence granularity is ever changed, this constant
88  * should be modified to reflect the change to minimize proc_lock false
89  * sharing (correctness, however, is guaranteed regardless of the coherence
90  * granularity).
91  */
92 #define	PLOCK_SHIFT	3
93 
94 static kmutex_t	pidlinklock;
95 static struct pid **pidhash;
96 static pid_t minpid;
97 static pid_t mpid = FAMOUS_PIDS;	/* one more than the last famous pid */
98 static union procent *procdir;
99 static union procent *procentfree;
100 
101 static struct pid *
pid_lookup(pid_t pid)102 pid_lookup(pid_t pid)
103 {
104 	struct pid *pidp;
105 
106 	ASSERT(MUTEX_HELD(&pidlinklock));
107 
108 	for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) {
109 		if (pidp->pid_id == pid) {
110 			ASSERT(pidp->pid_ref > 0);
111 			break;
112 		}
113 	}
114 	return (pidp);
115 }
116 
117 void
pid_setmin(void)118 pid_setmin(void)
119 {
120 	if (jump_pid && jump_pid > mpid)
121 		minpid = mpid = jump_pid;
122 	else
123 		minpid = mpid;
124 }
125 
126 /*
127  * When prslots are simply used as an index to determine a process' p_lock,
128  * adjacent prslots share adjacent p_locks.  On machines where the size
129  * of a mutex is smaller than that of a cache line (which, as of this writing,
130  * is true for all machines on which Solaris runs), this can potentially
131  * induce false sharing.  The standard solution for false sharing is to pad
132  * out one's data structures (in this case, struct plock).  However,
133  * given the size and (generally) sparse use of the proc_lock array, this
134  * is suboptimal.  We therefore stride through the proc_lock array with
135  * a stride of PLOCK_SHIFT.  PLOCK_SHIFT should be defined as:
136  *
137  *   log_2 (coherence_granularity / sizeof (kmutex_t))
138  *
139  * Under this scheme, false sharing is still possible -- but only when
140  * the number of active processes is very large.  Note that the one-to-one
141  * mapping between prslots and lockslots is maintained.
142  */
143 static int
pid_getlockslot(int prslot)144 pid_getlockslot(int prslot)
145 {
146 	int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT;
147 	int perlap = even >> PLOCK_SHIFT;
148 
149 	if (prslot >= even)
150 		return (prslot);
151 
152 	return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap));
153 }
154 
155 /*
156  * This function allocates a pid structure, a free pid, and optionally a
157  * slot in the proc table for it.
158  *
159  * pid_allocate() returns the new pid on success, -1 on failure.
160  */
161 pid_t
pid_allocate(proc_t * prp,pid_t pid,int flags)162 pid_allocate(proc_t *prp, pid_t pid, int flags)
163 {
164 	struct pid *pidp;
165 	union procent *pep;
166 	pid_t newpid, startpid;
167 
168 	pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP);
169 
170 	mutex_enter(&pidlinklock);
171 	pep = procentfree;
172 	if ((flags & PID_ALLOC_PROC) && pep == NULL) {
173 		/*
174 		 * ran out of /proc directory entries
175 		 */
176 		goto failed;
177 	}
178 
179 	if (pid != 0) {
180 		VERIFY(minpid == 0);
181 		VERIFY3P(pid, <, mpid);
182 		VERIFY3P(pid_lookup(pid), ==, NULL);
183 		newpid = pid;
184 	} else {
185 		/*
186 		 * Allocate a pid
187 		 */
188 		ASSERT(minpid <= mpid && mpid < maxpid);
189 
190 		startpid = mpid;
191 		for (;;) {
192 			newpid = mpid;
193 			if (++mpid == maxpid)
194 				mpid = minpid;
195 
196 			if (pid_lookup(newpid) == NULL)
197 				break;
198 
199 			if (mpid == startpid)
200 				goto failed;
201 		}
202 	}
203 
204 	/*
205 	 * Put pid into the pid hash table.
206 	 */
207 	pidp->pid_link = HASHPID(newpid);
208 	HASHPID(newpid) = pidp;
209 	pidp->pid_ref = 1;
210 	pidp->pid_id = newpid;
211 
212 	if (flags & PID_ALLOC_PROC) {
213 		procentfree = pep->pe_next;
214 		pidp->pid_prslot = pep - procdir;
215 		pep->pe_proc = prp;
216 		prp->p_pidp = pidp;
217 		prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
218 	} else {
219 		pidp->pid_prslot = 0;
220 	}
221 
222 	mutex_exit(&pidlinklock);
223 
224 	return (newpid);
225 
226 failed:
227 	mutex_exit(&pidlinklock);
228 	kmem_free(pidp, sizeof (struct pid));
229 	return (-1);
230 }
231 
232 /*
233  * decrement the reference count for pid
234  */
235 int
pid_rele(struct pid * pidp)236 pid_rele(struct pid *pidp)
237 {
238 	struct pid **pidpp;
239 
240 	mutex_enter(&pidlinklock);
241 	ASSERT(pidp != &pid0);
242 
243 	pidpp = &HASHPID(pidp->pid_id);
244 	for (;;) {
245 		ASSERT(*pidpp != NULL);
246 		if (*pidpp == pidp)
247 			break;
248 		pidpp = &(*pidpp)->pid_link;
249 	}
250 
251 	*pidpp = pidp->pid_link;
252 	mutex_exit(&pidlinklock);
253 
254 	kmem_free(pidp, sizeof (*pidp));
255 	return (0);
256 }
257 
258 void
proc_entry_free(struct pid * pidp)259 proc_entry_free(struct pid *pidp)
260 {
261 	mutex_enter(&pidlinklock);
262 	pidp->pid_prinactive = 1;
263 	procdir[pidp->pid_prslot].pe_next = procentfree;
264 	procentfree = &procdir[pidp->pid_prslot];
265 	mutex_exit(&pidlinklock);
266 }
267 
268 /*
269  * The original task needs to be passed in since the process has already been
270  * detached from the task at this point in time.
271  */
272 void
pid_exit(proc_t * prp,struct task * tk)273 pid_exit(proc_t *prp, struct task *tk)
274 {
275 	struct pid *pidp;
276 	zone_t	*zone = prp->p_zone;
277 
278 	ASSERT(MUTEX_HELD(&pidlock));
279 
280 	/*
281 	 * Exit process group.  If it is NULL, it's because fork failed
282 	 * before calling pgjoin().
283 	 */
284 	ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL);
285 	if (prp->p_pgidp != NULL)
286 		pgexit(prp);
287 
288 	sess_rele(prp->p_sessp, B_TRUE);
289 
290 	pidp = prp->p_pidp;
291 
292 	proc_entry_free(pidp);
293 
294 	if (audit_active)
295 		audit_pfree(prp);
296 
297 	if (practive == prp) {
298 		practive = prp->p_next;
299 	}
300 
301 	if (prp->p_next) {
302 		prp->p_next->p_prev = prp->p_prev;
303 	}
304 	if (prp->p_prev) {
305 		prp->p_prev->p_next = prp->p_next;
306 	}
307 
308 	PID_RELE(pidp);
309 
310 	mutex_destroy(&prp->p_crlock);
311 	kmem_cache_free(process_cache, prp);
312 	nproc--;
313 
314 	/*
315 	 * Decrement the process counts of the original task, project and zone.
316 	 */
317 	mutex_enter(&zone->zone_nlwps_lock);
318 	tk->tk_nprocs--;
319 	tk->tk_proj->kpj_nprocs--;
320 	zone->zone_nprocs--;
321 	mutex_exit(&zone->zone_nlwps_lock);
322 }
323 
324 /*
325  * Find a process visible from the specified zone given its process ID.
326  */
327 proc_t *
prfind_zone(pid_t pid,zoneid_t zoneid)328 prfind_zone(pid_t pid, zoneid_t zoneid)
329 {
330 	struct pid *pidp;
331 	proc_t *p;
332 
333 	ASSERT(MUTEX_HELD(&pidlock));
334 
335 	mutex_enter(&pidlinklock);
336 	pidp = pid_lookup(pid);
337 	mutex_exit(&pidlinklock);
338 	if (pidp != NULL && pidp->pid_prinactive == 0) {
339 		p = procdir[pidp->pid_prslot].pe_proc;
340 		if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid)
341 			return (p);
342 	}
343 	return (NULL);
344 }
345 
346 /*
347  * Find a process given its process ID.  This obeys zone restrictions,
348  * so if the caller is in a non-global zone it won't find processes
349  * associated with other zones.  Use prfind_zone(pid, ALL_ZONES) to
350  * bypass this restriction.
351  */
352 proc_t *
prfind(pid_t pid)353 prfind(pid_t pid)
354 {
355 	zoneid_t zoneid;
356 
357 	if (INGLOBALZONE(curproc))
358 		zoneid = ALL_ZONES;
359 	else
360 		zoneid = getzoneid();
361 	return (prfind_zone(pid, zoneid));
362 }
363 
364 proc_t *
pgfind_zone(pid_t pgid,zoneid_t zoneid)365 pgfind_zone(pid_t pgid, zoneid_t zoneid)
366 {
367 	struct pid *pidp;
368 
369 	ASSERT(MUTEX_HELD(&pidlock));
370 
371 	mutex_enter(&pidlinklock);
372 	pidp = pid_lookup(pgid);
373 	mutex_exit(&pidlinklock);
374 	if (pidp != NULL) {
375 		proc_t *p = pidp->pid_pglink;
376 
377 		if (zoneid == ALL_ZONES || pgid == 0 || p == NULL ||
378 		    p->p_zone->zone_id == zoneid)
379 			return (p);
380 	}
381 	return (NULL);
382 }
383 
384 /*
385  * return the head of the list of processes whose process group ID is 'pgid',
386  * or NULL, if no such process group
387  */
388 proc_t *
pgfind(pid_t pgid)389 pgfind(pid_t pgid)
390 {
391 	zoneid_t zoneid;
392 
393 	if (INGLOBALZONE(curproc))
394 		zoneid = ALL_ZONES;
395 	else
396 		zoneid = getzoneid();
397 	return (pgfind_zone(pgid, zoneid));
398 }
399 
400 /*
401  * Sets P_PR_LOCK on a non-system process.  Process must be fully created
402  * and not exiting to succeed.
403  *
404  * Returns 0 on success.
405  * Returns 1 if P_PR_LOCK is set.
406  * Returns -1 if proc is in invalid state.
407  */
408 int
sprtrylock_proc(proc_t * p)409 sprtrylock_proc(proc_t *p)
410 {
411 	ASSERT(MUTEX_HELD(&p->p_lock));
412 
413 	/* skip system and incomplete processes */
414 	if (p->p_stat == SIDL || p->p_stat == SZOMB ||
415 	    (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) {
416 		return (-1);
417 	}
418 
419 	if (p->p_proc_flag & P_PR_LOCK)
420 		return (1);
421 
422 	p->p_proc_flag |= P_PR_LOCK;
423 
424 	return (0);
425 }
426 
427 /*
428  * Wait for P_PR_LOCK to become clear.  Returns with p_lock dropped,
429  * and the proc pointer no longer valid, as the proc may have exited.
430  */
431 void
sprwaitlock_proc(proc_t * p)432 sprwaitlock_proc(proc_t *p)
433 {
434 	kmutex_t *mp;
435 
436 	ASSERT(MUTEX_HELD(&p->p_lock));
437 	ASSERT(p->p_proc_flag & P_PR_LOCK);
438 
439 	/*
440 	 * p_lock is persistent, but p itself is not -- it could
441 	 * vanish during cv_wait().  Load p->p_lock now so we can
442 	 * drop it after cv_wait() without referencing p.
443 	 */
444 	mp = &p->p_lock;
445 	cv_wait(&pr_pid_cv[p->p_slot], mp);
446 	mutex_exit(mp);
447 }
448 
449 /*
450  * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
451  * Returns the proc pointer on success, NULL on failure.  sprlock() is
452  * really just a stripped-down version of pr_p_lock() to allow practive
453  * walkers like dofusers() and dumpsys() to synchronize with /proc.
454  */
455 proc_t *
sprlock_zone(pid_t pid,zoneid_t zoneid)456 sprlock_zone(pid_t pid, zoneid_t zoneid)
457 {
458 	proc_t *p;
459 	int ret;
460 
461 	for (;;) {
462 		mutex_enter(&pidlock);
463 		if ((p = prfind_zone(pid, zoneid)) == NULL) {
464 			mutex_exit(&pidlock);
465 			return (NULL);
466 		}
467 		mutex_enter(&p->p_lock);
468 		mutex_exit(&pidlock);
469 
470 		if (panicstr)
471 			return (p);
472 
473 		ret = sprtrylock_proc(p);
474 		if (ret == -1) {
475 			mutex_exit(&p->p_lock);
476 			return (NULL);
477 		} else if (ret == 0) {
478 			break;
479 		}
480 		sprwaitlock_proc(p);
481 	}
482 	return (p);
483 }
484 
485 proc_t *
sprlock(pid_t pid)486 sprlock(pid_t pid)
487 {
488 	zoneid_t zoneid;
489 
490 	if (INGLOBALZONE(curproc))
491 		zoneid = ALL_ZONES;
492 	else
493 		zoneid = getzoneid();
494 	return (sprlock_zone(pid, zoneid));
495 }
496 
497 void
sprlock_proc(proc_t * p)498 sprlock_proc(proc_t *p)
499 {
500 	ASSERT(MUTEX_HELD(&p->p_lock));
501 
502 	while (p->p_proc_flag & P_PR_LOCK) {
503 		cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock);
504 	}
505 
506 	p->p_proc_flag |= P_PR_LOCK;
507 }
508 
509 void
sprunlock(proc_t * p)510 sprunlock(proc_t *p)
511 {
512 	if (panicstr) {
513 		mutex_exit(&p->p_lock);
514 		return;
515 	}
516 
517 	ASSERT(p->p_proc_flag & P_PR_LOCK);
518 	ASSERT(MUTEX_HELD(&p->p_lock));
519 
520 	if ((p->p_flag & SKILLED) && p->p_tlist != NULL) {
521 		/*
522 		 * While P_PR_LOCK was set, this process received a SIGKILL.
523 		 * The signal was posted in p->p_sig and p->p_extsig, but we
524 		 * skipped resuming stopped threads because P_PR_LOCK prevented
525 		 * the process' shape from changing.  If all threads were
526 		 * stopped by SIGSTOP or /proc PCSTOP, none will run to witness
527 		 * the SIGKILL and this process will end up stuck.
528 		 *
529 		 * While only one thread needs to be runnable to witness the
530 		 * SIGKILL, set as many running as we can in case there are
531 		 * mixed scheduler priorities.  It would otherwise be
532 		 * unfortunate if we set a single low-priority thread runnable
533 		 * in an otherwise-stopped process and did not promptly notice
534 		 * the SIGKILL.
535 		 *
536 		 * * TS_XSTART undoes the stopping effect of SIGSTOP.
537 		 * * TS_PSTART undoes the stopping effect of /proc PCSTOP.
538 		 *
539 		 * Notably, other TS_* bits are inappropriate here:
540 		 * * Do not set TS_CSTART or TS_UNPAUSE; lwps may be stopped by
541 		 *   PR_SUSPEND for many reasons. Some cases, like holdlwps(),
542 		 *   will resume the process before the corresponding syscall
543 		 *   returns. Other cases, like dumping core, the suspender
544 		 *   will tear down the lwps as it completes.
545 		 * * Do not set TS_RESUME out of caution; not sure about the
546 		 *   consequences of a process going away during CPR resume and
547 		 *   CPR should set the process running eventually.
548 		 * * Do not set TS_CREATE because lwp creation expects threads
549 		 *   to remain paused until lwp completes.
550 		 */
551 		runlwps(p, TS_XSTART | TS_PSTART);
552 	}
553 
554 	cv_signal(&pr_pid_cv[p->p_slot]);
555 	p->p_proc_flag &= ~P_PR_LOCK;
556 	mutex_exit(&p->p_lock);
557 }
558 
559 void
pid_init(void)560 pid_init(void)
561 {
562 	int i;
563 
564 	pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen);
565 
566 	pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP);
567 	procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP);
568 	pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP);
569 	proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP);
570 
571 	nproc = 1;
572 	practive = proc_sched;
573 	proc_sched->p_next = NULL;
574 	procdir[0].pe_proc = proc_sched;
575 
576 	procentfree = &procdir[1];
577 	for (i = 1; i < v.v_proc - 1; i++)
578 		procdir[i].pe_next = &procdir[i+1];
579 	procdir[i].pe_next = NULL;
580 
581 	HASHPID(0) = &pid0;
582 
583 	upcount_init();
584 }
585 
586 proc_t *
pid_entry(int slot)587 pid_entry(int slot)
588 {
589 	union procent *pep;
590 	proc_t *prp;
591 
592 	ASSERT(MUTEX_HELD(&pidlock));
593 	ASSERT(slot >= 0 && slot < v.v_proc);
594 
595 	pep = procdir[slot].pe_next;
596 	if (pep >= procdir && pep < &procdir[v.v_proc])
597 		return (NULL);
598 	prp = procdir[slot].pe_proc;
599 	if (prp != 0 && prp->p_stat == SIDL)
600 		return (NULL);
601 	return (prp);
602 }
603 
604 /*
605  * Send the specified signal to all processes whose process group ID is
606  * equal to 'pgid'
607  */
608 
609 void
signal(pid_t pgid,int sig)610 signal(pid_t pgid, int sig)
611 {
612 	struct pid *pidp;
613 	proc_t *prp;
614 
615 	mutex_enter(&pidlock);
616 	mutex_enter(&pidlinklock);
617 	if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) {
618 		mutex_exit(&pidlinklock);
619 		mutex_exit(&pidlock);
620 		return;
621 	}
622 	mutex_exit(&pidlinklock);
623 	for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) {
624 		mutex_enter(&prp->p_lock);
625 		sigtoproc(prp, NULL, sig);
626 		mutex_exit(&prp->p_lock);
627 	}
628 	mutex_exit(&pidlock);
629 }
630 
631 /*
632  * Send the specified signal to the specified process
633  */
634 
635 void
prsignal(struct pid * pidp,int sig)636 prsignal(struct pid *pidp, int sig)
637 {
638 	if (!(pidp->pid_prinactive))
639 		psignal(procdir[pidp->pid_prslot].pe_proc, sig);
640 }
641 
642 #include <sys/sunddi.h>
643 
644 /*
645  * DDI/DKI interfaces for drivers to send signals to processes
646  */
647 
648 /*
649  * obtain an opaque reference to a process for signaling
650  */
651 void *
proc_ref(void)652 proc_ref(void)
653 {
654 	struct pid *pidp;
655 
656 	mutex_enter(&pidlock);
657 	pidp = curproc->p_pidp;
658 	PID_HOLD(pidp);
659 	mutex_exit(&pidlock);
660 
661 	return (pidp);
662 }
663 
664 /*
665  * release a reference to a process
666  * - a process can exit even if a driver has a reference to it
667  * - one proc_unref for every proc_ref
668  */
669 void
proc_unref(void * pref)670 proc_unref(void *pref)
671 {
672 	mutex_enter(&pidlock);
673 	PID_RELE((struct pid *)pref);
674 	mutex_exit(&pidlock);
675 }
676 
677 /*
678  * send a signal to a process
679  *
680  * - send the process the signal
681  * - if the process went away, return a -1
682  * - if the process is still there return 0
683  */
684 int
proc_signal(void * pref,int sig)685 proc_signal(void *pref, int sig)
686 {
687 	struct pid *pidp = pref;
688 
689 	prsignal(pidp, sig);
690 	return (pidp->pid_prinactive ? -1 : 0);
691 }
692 
693 
694 static struct upcount	**upc_hash;	/* a boot time allocated array */
695 static ulong_t		upc_hashmask;
696 #define	UPC_HASH(x, y)	((ulong_t)(x ^ y) & upc_hashmask)
697 
698 /*
699  * Get us off the ground.  Called once at boot.
700  */
701 void
upcount_init(void)702 upcount_init(void)
703 {
704 	ulong_t	upc_hashsize;
705 
706 	/*
707 	 * An entry per MB of memory is our current guess
708 	 */
709 	/*
710 	 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT
711 	 * converts pages to megs (without overflowing a u_int
712 	 * if you have more than 4G of memory, like ptob(physmem)/1M
713 	 * would).
714 	 */
715 	upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT)));
716 	upc_hashmask = upc_hashsize - 1;
717 	upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *),
718 	    KM_SLEEP);
719 }
720 
721 /*
722  * Increment the number of processes associated with a given uid and zoneid.
723  */
724 void
upcount_inc(uid_t uid,zoneid_t zoneid)725 upcount_inc(uid_t uid, zoneid_t zoneid)
726 {
727 	struct upcount	**upc, **hupc;
728 	struct upcount	*new;
729 
730 	ASSERT(MUTEX_HELD(&pidlock));
731 	new = NULL;
732 	hupc = &upc_hash[UPC_HASH(uid, zoneid)];
733 top:
734 	upc = hupc;
735 	while ((*upc) != NULL) {
736 		if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
737 			(*upc)->up_count++;
738 			if (new) {
739 				/*
740 				 * did not need `new' afterall.
741 				 */
742 				kmem_free(new, sizeof (*new));
743 			}
744 			return;
745 		}
746 		upc = &(*upc)->up_next;
747 	}
748 
749 	/*
750 	 * There is no entry for this <uid,zoneid> pair.
751 	 * Allocate one.  If we have to drop pidlock, check
752 	 * again.
753 	 */
754 	if (new == NULL) {
755 		new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP);
756 		if (new == NULL) {
757 			mutex_exit(&pidlock);
758 			new = (struct upcount *)kmem_alloc(sizeof (*new),
759 			    KM_SLEEP);
760 			mutex_enter(&pidlock);
761 			goto top;
762 		}
763 	}
764 
765 
766 	/*
767 	 * On the assumption that a new user is going to do some
768 	 * more forks, put the new upcount structure on the front.
769 	 */
770 	upc = hupc;
771 
772 	new->up_uid = uid;
773 	new->up_zoneid = zoneid;
774 	new->up_count = 1;
775 	new->up_next = *upc;
776 
777 	*upc = new;
778 }
779 
780 /*
781  * Decrement the number of processes a given uid and zoneid has.
782  */
783 void
upcount_dec(uid_t uid,zoneid_t zoneid)784 upcount_dec(uid_t uid, zoneid_t zoneid)
785 {
786 	struct	upcount **upc;
787 	struct	upcount *done;
788 
789 	ASSERT(MUTEX_HELD(&pidlock));
790 
791 	upc = &upc_hash[UPC_HASH(uid, zoneid)];
792 	while ((*upc) != NULL) {
793 		if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
794 			(*upc)->up_count--;
795 			if ((*upc)->up_count == 0) {
796 				done = *upc;
797 				*upc = (*upc)->up_next;
798 				kmem_free(done, sizeof (*done));
799 			}
800 			return;
801 		}
802 		upc = &(*upc)->up_next;
803 	}
804 	cmn_err(CE_PANIC, "decr_upcount-off the end");
805 }
806 
807 /*
808  * Returns the number of processes a uid has.
809  * Non-existent uid's are assumed to have no processes.
810  */
811 int
upcount_get(uid_t uid,zoneid_t zoneid)812 upcount_get(uid_t uid, zoneid_t zoneid)
813 {
814 	struct	upcount *upc;
815 
816 	ASSERT(MUTEX_HELD(&pidlock));
817 
818 	upc = upc_hash[UPC_HASH(uid, zoneid)];
819 	while (upc != NULL) {
820 		if (upc->up_uid == uid && upc->up_zoneid == zoneid) {
821 			return (upc->up_count);
822 		}
823 		upc = upc->up_next;
824 	}
825 	return (0);
826 }
827