xref: /freebsd/sys/kern/kern_proc.c (revision a6578a04e440f79f3b913660221caa9cde3e722c)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_ddb.h"
38 #include "opt_ktrace.h"
39 #include "opt_kstack_pages.h"
40 #include "opt_stack.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/elf.h>
45 #include <sys/eventhandler.h>
46 #include <sys/exec.h>
47 #include <sys/jail.h>
48 #include <sys/kernel.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/loginclass.h>
52 #include <sys/malloc.h>
53 #include <sys/mman.h>
54 #include <sys/mount.h>
55 #include <sys/mutex.h>
56 #include <sys/proc.h>
57 #include <sys/ptrace.h>
58 #include <sys/refcount.h>
59 #include <sys/resourcevar.h>
60 #include <sys/rwlock.h>
61 #include <sys/sbuf.h>
62 #include <sys/sysent.h>
63 #include <sys/sched.h>
64 #include <sys/smp.h>
65 #include <sys/stack.h>
66 #include <sys/stat.h>
67 #include <sys/sysctl.h>
68 #include <sys/filedesc.h>
69 #include <sys/tty.h>
70 #include <sys/signalvar.h>
71 #include <sys/sdt.h>
72 #include <sys/sx.h>
73 #include <sys/user.h>
74 #include <sys/vnode.h>
75 #include <sys/wait.h>
76 
77 #ifdef DDB
78 #include <ddb/ddb.h>
79 #endif
80 
81 #include <vm/vm.h>
82 #include <vm/vm_param.h>
83 #include <vm/vm_extern.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_page.h>
88 #include <vm/uma.h>
89 
90 #ifdef COMPAT_FREEBSD32
91 #include <compat/freebsd32/freebsd32.h>
92 #include <compat/freebsd32/freebsd32_util.h>
93 #endif
94 
95 SDT_PROVIDER_DEFINE(proc);
96 
97 MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
98 MALLOC_DEFINE(M_SESSION, "session", "session header");
99 static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
100 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
101 
102 static void doenterpgrp(struct proc *, struct pgrp *);
103 static void orphanpg(struct pgrp *pg);
104 static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
105 static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
106 static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
107     int preferthread);
108 static void pgadjustjobc(struct pgrp *pgrp, int entering);
109 static void pgdelete(struct pgrp *);
110 static int proc_ctor(void *mem, int size, void *arg, int flags);
111 static void proc_dtor(void *mem, int size, void *arg);
112 static int proc_init(void *mem, int size, int flags);
113 static void proc_fini(void *mem, int size);
114 static void pargs_free(struct pargs *pa);
115 
116 /*
117  * Other process lists
118  */
119 struct pidhashhead *pidhashtbl;
120 struct sx *pidhashtbl_lock;
121 u_long pidhash;
122 u_long pidhashlock;
123 struct pgrphashhead *pgrphashtbl;
124 u_long pgrphash;
125 struct proclist allproc;
126 struct proclist zombproc;
127 struct sx __exclusive_cache_line allproc_lock;
128 struct sx __exclusive_cache_line zombproc_lock;
129 struct sx __exclusive_cache_line proctree_lock;
130 struct mtx __exclusive_cache_line ppeers_lock;
131 uma_zone_t proc_zone;
132 
133 /*
134  * The offset of various fields in struct proc and struct thread.
135  * These are used by kernel debuggers to enumerate kernel threads and
136  * processes.
137  */
138 const int proc_off_p_pid = offsetof(struct proc, p_pid);
139 const int proc_off_p_comm = offsetof(struct proc, p_comm);
140 const int proc_off_p_list = offsetof(struct proc, p_list);
141 const int proc_off_p_threads = offsetof(struct proc, p_threads);
142 const int thread_off_td_tid = offsetof(struct thread, td_tid);
143 const int thread_off_td_name = offsetof(struct thread, td_name);
144 const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu);
145 const int thread_off_td_pcb = offsetof(struct thread, td_pcb);
146 const int thread_off_td_plist = offsetof(struct thread, td_plist);
147 
148 EVENTHANDLER_LIST_DEFINE(process_ctor);
149 EVENTHANDLER_LIST_DEFINE(process_dtor);
150 EVENTHANDLER_LIST_DEFINE(process_init);
151 EVENTHANDLER_LIST_DEFINE(process_fini);
152 EVENTHANDLER_LIST_DEFINE(process_exit);
153 EVENTHANDLER_LIST_DEFINE(process_fork);
154 EVENTHANDLER_LIST_DEFINE(process_exec);
155 
156 EVENTHANDLER_LIST_DECLARE(thread_ctor);
157 EVENTHANDLER_LIST_DECLARE(thread_dtor);
158 
159 int kstack_pages = KSTACK_PAGES;
160 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
161     "Kernel stack size in pages");
162 static int vmmap_skip_res_cnt = 0;
163 SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
164     &vmmap_skip_res_cnt, 0,
165     "Skip calculation of the pages resident count in kern.proc.vmmap");
166 
167 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
168 #ifdef COMPAT_FREEBSD32
169 CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
170 #endif
171 
172 /*
173  * Initialize global process hashing structures.
174  */
175 void
176 procinit(void)
177 {
178 	u_long i;
179 
180 	sx_init(&allproc_lock, "allproc");
181 	sx_init(&zombproc_lock, "zombproc");
182 	sx_init(&proctree_lock, "proctree");
183 	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
184 	LIST_INIT(&allproc);
185 	LIST_INIT(&zombproc);
186 	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
187 	pidhashlock = (pidhash + 1) / 64;
188 	if (pidhashlock > 0)
189 		pidhashlock--;
190 	pidhashtbl_lock = malloc(sizeof(*pidhashtbl_lock) * (pidhashlock + 1),
191 	    M_PROC, M_WAITOK | M_ZERO);
192 	for (i = 0; i < pidhashlock + 1; i++)
193 		sx_init(&pidhashtbl_lock[i], "pidhash");
194 	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
195 	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
196 	    proc_ctor, proc_dtor, proc_init, proc_fini,
197 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
198 	uihashinit();
199 }
200 
201 /*
202  * Prepare a proc for use.
203  */
204 static int
205 proc_ctor(void *mem, int size, void *arg, int flags)
206 {
207 	struct proc *p;
208 	struct thread *td;
209 
210 	p = (struct proc *)mem;
211 	EVENTHANDLER_DIRECT_INVOKE(process_ctor, p);
212 	td = FIRST_THREAD_IN_PROC(p);
213 	if (td != NULL) {
214 		/* Make sure all thread constructors are executed */
215 		EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);
216 	}
217 	return (0);
218 }
219 
220 /*
221  * Reclaim a proc after use.
222  */
223 static void
224 proc_dtor(void *mem, int size, void *arg)
225 {
226 	struct proc *p;
227 	struct thread *td;
228 
229 	/* INVARIANTS checks go here */
230 	p = (struct proc *)mem;
231 	td = FIRST_THREAD_IN_PROC(p);
232 	if (td != NULL) {
233 #ifdef INVARIANTS
234 		KASSERT((p->p_numthreads == 1),
235 		    ("bad number of threads in exiting process"));
236 		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
237 #endif
238 		/* Free all OSD associated to this thread. */
239 		osd_thread_exit(td);
240 		td_softdep_cleanup(td);
241 		MPASS(td->td_su == NULL);
242 
243 		/* Make sure all thread destructors are executed */
244 		EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td);
245 	}
246 	EVENTHANDLER_DIRECT_INVOKE(process_dtor, p);
247 	if (p->p_ksi != NULL)
248 		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
249 }
250 
251 /*
252  * Initialize type-stable parts of a proc (when newly created).
253  */
254 static int
255 proc_init(void *mem, int size, int flags)
256 {
257 	struct proc *p;
258 
259 	p = (struct proc *)mem;
260 	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW);
261 	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW);
262 	mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
263 	mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
264 	mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
265 	cv_init(&p->p_pwait, "ppwait");
266 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
267 	EVENTHANDLER_DIRECT_INVOKE(process_init, p);
268 	p->p_stats = pstats_alloc();
269 	p->p_pgrp = NULL;
270 	return (0);
271 }
272 
273 /*
274  * UMA should ensure that this function is never called.
275  * Freeing a proc structure would violate type stability.
276  */
277 static void
278 proc_fini(void *mem, int size)
279 {
280 #ifdef notnow
281 	struct proc *p;
282 
283 	p = (struct proc *)mem;
284 	EVENTHANDLER_DIRECT_INVOKE(process_fini, p);
285 	pstats_free(p->p_stats);
286 	thread_free(FIRST_THREAD_IN_PROC(p));
287 	mtx_destroy(&p->p_mtx);
288 	if (p->p_ksi != NULL)
289 		ksiginfo_free(p->p_ksi);
290 #else
291 	panic("proc reclaimed");
292 #endif
293 }
294 
295 /*
296  * Is p an inferior of the current process?
297  */
298 int
299 inferior(struct proc *p)
300 {
301 
302 	sx_assert(&proctree_lock, SX_LOCKED);
303 	PROC_LOCK_ASSERT(p, MA_OWNED);
304 	for (; p != curproc; p = proc_realparent(p)) {
305 		if (p->p_pid == 0)
306 			return (0);
307 	}
308 	return (1);
309 }
310 
311 /*
312  * Locate a process by number.
313  *
314  * By not returning processes in the PRS_NEW state, we allow callers to avoid
315  * testing for that condition to avoid dereferencing p_ucred, et al.
316  */
317 static __always_inline struct proc *
318 _pfind(pid_t pid, bool zombie)
319 {
320 	struct proc *p;
321 
322 	p = curproc;
323 	if (p->p_pid == pid) {
324 		PROC_LOCK(p);
325 		return (p);
326 	}
327 	sx_slock(PIDHASHLOCK(pid));
328 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
329 		if (p->p_pid == pid) {
330 			PROC_LOCK(p);
331 			if (p->p_state == PRS_NEW ||
332 			    (zombie && p->p_state == PRS_ZOMBIE)) {
333 				PROC_UNLOCK(p);
334 				p = NULL;
335 			}
336 			break;
337 		}
338 	}
339 	sx_sunlock(PIDHASHLOCK(pid));
340 	return (p);
341 }
342 
343 struct proc *
344 pfind(pid_t pid)
345 {
346 
347 	return (_pfind(pid, false));
348 }
349 
350 /*
351  * Same as pfind but allow zombies.
352  */
353 struct proc *
354 pfind_any(pid_t pid)
355 {
356 
357 	return (_pfind(pid, true));
358 }
359 
360 static struct proc *
361 pfind_tid(pid_t tid)
362 {
363 	struct proc *p;
364 	struct thread *td;
365 
366 	sx_slock(&allproc_lock);
367 	FOREACH_PROC_IN_SYSTEM(p) {
368 		PROC_LOCK(p);
369 		if (p->p_state == PRS_NEW) {
370 			PROC_UNLOCK(p);
371 			continue;
372 		}
373 		FOREACH_THREAD_IN_PROC(p, td) {
374 			if (td->td_tid == tid)
375 				goto found;
376 		}
377 		PROC_UNLOCK(p);
378 	}
379 found:
380 	sx_sunlock(&allproc_lock);
381 	return (p);
382 }
383 
384 /*
385  * Locate a process group by number.
386  * The caller must hold proctree_lock.
387  */
388 struct pgrp *
389 pgfind(pid_t pgid)
390 {
391 	struct pgrp *pgrp;
392 
393 	sx_assert(&proctree_lock, SX_LOCKED);
394 
395 	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
396 		if (pgrp->pg_id == pgid) {
397 			PGRP_LOCK(pgrp);
398 			return (pgrp);
399 		}
400 	}
401 	return (NULL);
402 }
403 
404 /*
405  * Locate process and do additional manipulations, depending on flags.
406  */
407 int
408 pget(pid_t pid, int flags, struct proc **pp)
409 {
410 	struct proc *p;
411 	int error;
412 
413 	p = curproc;
414 	if (p->p_pid == pid) {
415 		PROC_LOCK(p);
416 	} else {
417 		p = NULL;
418 		if (pid <= PID_MAX) {
419 			if ((flags & PGET_NOTWEXIT) == 0)
420 				p = pfind_any(pid);
421 			else
422 				p = pfind(pid);
423 		} else if ((flags & PGET_NOTID) == 0) {
424 			p = pfind_tid(pid);
425 		}
426 		if (p == NULL)
427 			return (ESRCH);
428 		if ((flags & PGET_CANSEE) != 0) {
429 			error = p_cansee(curthread, p);
430 			if (error != 0)
431 				goto errout;
432 		}
433 	}
434 	if ((flags & PGET_CANDEBUG) != 0) {
435 		error = p_candebug(curthread, p);
436 		if (error != 0)
437 			goto errout;
438 	}
439 	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
440 		error = EPERM;
441 		goto errout;
442 	}
443 	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
444 		error = ESRCH;
445 		goto errout;
446 	}
447 	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
448 		/*
449 		 * XXXRW: Not clear ESRCH is the right error during proc
450 		 * execve().
451 		 */
452 		error = ESRCH;
453 		goto errout;
454 	}
455 	if ((flags & PGET_HOLD) != 0) {
456 		_PHOLD(p);
457 		PROC_UNLOCK(p);
458 	}
459 	*pp = p;
460 	return (0);
461 errout:
462 	PROC_UNLOCK(p);
463 	return (error);
464 }
465 
466 /*
467  * Create a new process group.
468  * pgid must be equal to the pid of p.
469  * Begin a new session if required.
470  */
471 int
472 enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
473 {
474 
475 	sx_assert(&proctree_lock, SX_XLOCKED);
476 
477 	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
478 	KASSERT(p->p_pid == pgid,
479 	    ("enterpgrp: new pgrp and pid != pgid"));
480 	KASSERT(pgfind(pgid) == NULL,
481 	    ("enterpgrp: pgrp with pgid exists"));
482 	KASSERT(!SESS_LEADER(p),
483 	    ("enterpgrp: session leader attempted setpgrp"));
484 
485 	mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
486 
487 	if (sess != NULL) {
488 		/*
489 		 * new session
490 		 */
491 		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
492 		PROC_LOCK(p);
493 		p->p_flag &= ~P_CONTROLT;
494 		PROC_UNLOCK(p);
495 		PGRP_LOCK(pgrp);
496 		sess->s_leader = p;
497 		sess->s_sid = p->p_pid;
498 		refcount_init(&sess->s_count, 1);
499 		sess->s_ttyvp = NULL;
500 		sess->s_ttydp = NULL;
501 		sess->s_ttyp = NULL;
502 		bcopy(p->p_session->s_login, sess->s_login,
503 			    sizeof(sess->s_login));
504 		pgrp->pg_session = sess;
505 		KASSERT(p == curproc,
506 		    ("enterpgrp: mksession and p != curproc"));
507 	} else {
508 		pgrp->pg_session = p->p_session;
509 		sess_hold(pgrp->pg_session);
510 		PGRP_LOCK(pgrp);
511 	}
512 	pgrp->pg_id = pgid;
513 	LIST_INIT(&pgrp->pg_members);
514 
515 	/*
516 	 * As we have an exclusive lock of proctree_lock,
517 	 * this should not deadlock.
518 	 */
519 	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
520 	pgrp->pg_jobc = 0;
521 	SLIST_INIT(&pgrp->pg_sigiolst);
522 	PGRP_UNLOCK(pgrp);
523 
524 	doenterpgrp(p, pgrp);
525 
526 	return (0);
527 }
528 
529 /*
530  * Move p to an existing process group
531  */
532 int
533 enterthispgrp(struct proc *p, struct pgrp *pgrp)
534 {
535 
536 	sx_assert(&proctree_lock, SX_XLOCKED);
537 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
538 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
539 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
540 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
541 	KASSERT(pgrp->pg_session == p->p_session,
542 		("%s: pgrp's session %p, p->p_session %p.\n",
543 		__func__,
544 		pgrp->pg_session,
545 		p->p_session));
546 	KASSERT(pgrp != p->p_pgrp,
547 		("%s: p belongs to pgrp.", __func__));
548 
549 	doenterpgrp(p, pgrp);
550 
551 	return (0);
552 }
553 
554 /*
555  * Move p to a process group
556  */
557 static void
558 doenterpgrp(struct proc *p, struct pgrp *pgrp)
559 {
560 	struct pgrp *savepgrp;
561 
562 	sx_assert(&proctree_lock, SX_XLOCKED);
563 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
564 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
565 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
566 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
567 
568 	savepgrp = p->p_pgrp;
569 
570 	/*
571 	 * Adjust eligibility of affected pgrps to participate in job control.
572 	 * Increment eligibility counts before decrementing, otherwise we
573 	 * could reach 0 spuriously during the first call.
574 	 */
575 	fixjobc(p, pgrp, 1);
576 	fixjobc(p, p->p_pgrp, 0);
577 
578 	PGRP_LOCK(pgrp);
579 	PGRP_LOCK(savepgrp);
580 	PROC_LOCK(p);
581 	LIST_REMOVE(p, p_pglist);
582 	p->p_pgrp = pgrp;
583 	PROC_UNLOCK(p);
584 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
585 	PGRP_UNLOCK(savepgrp);
586 	PGRP_UNLOCK(pgrp);
587 	if (LIST_EMPTY(&savepgrp->pg_members))
588 		pgdelete(savepgrp);
589 }
590 
591 /*
592  * remove process from process group
593  */
594 int
595 leavepgrp(struct proc *p)
596 {
597 	struct pgrp *savepgrp;
598 
599 	sx_assert(&proctree_lock, SX_XLOCKED);
600 	savepgrp = p->p_pgrp;
601 	PGRP_LOCK(savepgrp);
602 	PROC_LOCK(p);
603 	LIST_REMOVE(p, p_pglist);
604 	p->p_pgrp = NULL;
605 	PROC_UNLOCK(p);
606 	PGRP_UNLOCK(savepgrp);
607 	if (LIST_EMPTY(&savepgrp->pg_members))
608 		pgdelete(savepgrp);
609 	return (0);
610 }
611 
612 /*
613  * delete a process group
614  */
615 static void
616 pgdelete(struct pgrp *pgrp)
617 {
618 	struct session *savesess;
619 	struct tty *tp;
620 
621 	sx_assert(&proctree_lock, SX_XLOCKED);
622 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
623 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
624 
625 	/*
626 	 * Reset any sigio structures pointing to us as a result of
627 	 * F_SETOWN with our pgid.
628 	 */
629 	funsetownlst(&pgrp->pg_sigiolst);
630 
631 	PGRP_LOCK(pgrp);
632 	tp = pgrp->pg_session->s_ttyp;
633 	LIST_REMOVE(pgrp, pg_hash);
634 	savesess = pgrp->pg_session;
635 	PGRP_UNLOCK(pgrp);
636 
637 	/* Remove the reference to the pgrp before deallocating it. */
638 	if (tp != NULL) {
639 		tty_lock(tp);
640 		tty_rel_pgrp(tp, pgrp);
641 	}
642 
643 	mtx_destroy(&pgrp->pg_mtx);
644 	free(pgrp, M_PGRP);
645 	sess_release(savesess);
646 }
647 
648 static void
649 pgadjustjobc(struct pgrp *pgrp, int entering)
650 {
651 
652 	PGRP_LOCK(pgrp);
653 	if (entering)
654 		pgrp->pg_jobc++;
655 	else {
656 		--pgrp->pg_jobc;
657 		if (pgrp->pg_jobc == 0)
658 			orphanpg(pgrp);
659 	}
660 	PGRP_UNLOCK(pgrp);
661 }
662 
663 /*
664  * Adjust pgrp jobc counters when specified process changes process group.
665  * We count the number of processes in each process group that "qualify"
666  * the group for terminal job control (those with a parent in a different
667  * process group of the same session).  If that count reaches zero, the
668  * process group becomes orphaned.  Check both the specified process'
669  * process group and that of its children.
670  * entering == 0 => p is leaving specified group.
671  * entering == 1 => p is entering specified group.
672  */
673 void
674 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
675 {
676 	struct pgrp *hispgrp;
677 	struct session *mysession;
678 	struct proc *q;
679 
680 	sx_assert(&proctree_lock, SX_LOCKED);
681 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
682 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
683 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
684 
685 	/*
686 	 * Check p's parent to see whether p qualifies its own process
687 	 * group; if so, adjust count for p's process group.
688 	 */
689 	mysession = pgrp->pg_session;
690 	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
691 	    hispgrp->pg_session == mysession)
692 		pgadjustjobc(pgrp, entering);
693 
694 	/*
695 	 * Check this process' children to see whether they qualify
696 	 * their process groups; if so, adjust counts for children's
697 	 * process groups.
698 	 */
699 	LIST_FOREACH(q, &p->p_children, p_sibling) {
700 		hispgrp = q->p_pgrp;
701 		if (hispgrp == pgrp ||
702 		    hispgrp->pg_session != mysession)
703 			continue;
704 		if (q->p_state == PRS_ZOMBIE)
705 			continue;
706 		pgadjustjobc(hispgrp, entering);
707 	}
708 }
709 
710 void
711 killjobc(void)
712 {
713 	struct session *sp;
714 	struct tty *tp;
715 	struct proc *p;
716 	struct vnode *ttyvp;
717 
718 	p = curproc;
719 	MPASS(p->p_flag & P_WEXIT);
720 	/*
721 	 * Do a quick check to see if there is anything to do with the
722 	 * proctree_lock held. pgrp and LIST_EMPTY checks are for fixjobc().
723 	 */
724 	PROC_LOCK(p);
725 	if (!SESS_LEADER(p) &&
726 	    (p->p_pgrp == p->p_pptr->p_pgrp) &&
727 	    LIST_EMPTY(&p->p_children)) {
728 		PROC_UNLOCK(p);
729 		return;
730 	}
731 	PROC_UNLOCK(p);
732 
733 	sx_xlock(&proctree_lock);
734 	if (SESS_LEADER(p)) {
735 		sp = p->p_session;
736 
737 		/*
738 		 * s_ttyp is not zero'd; we use this to indicate that
739 		 * the session once had a controlling terminal. (for
740 		 * logging and informational purposes)
741 		 */
742 		SESS_LOCK(sp);
743 		ttyvp = sp->s_ttyvp;
744 		tp = sp->s_ttyp;
745 		sp->s_ttyvp = NULL;
746 		sp->s_ttydp = NULL;
747 		sp->s_leader = NULL;
748 		SESS_UNLOCK(sp);
749 
750 		/*
751 		 * Signal foreground pgrp and revoke access to
752 		 * controlling terminal if it has not been revoked
753 		 * already.
754 		 *
755 		 * Because the TTY may have been revoked in the mean
756 		 * time and could already have a new session associated
757 		 * with it, make sure we don't send a SIGHUP to a
758 		 * foreground process group that does not belong to this
759 		 * session.
760 		 */
761 
762 		if (tp != NULL) {
763 			tty_lock(tp);
764 			if (tp->t_session == sp)
765 				tty_signal_pgrp(tp, SIGHUP);
766 			tty_unlock(tp);
767 		}
768 
769 		if (ttyvp != NULL) {
770 			sx_xunlock(&proctree_lock);
771 			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
772 				VOP_REVOKE(ttyvp, REVOKEALL);
773 				VOP_UNLOCK(ttyvp, 0);
774 			}
775 			vrele(ttyvp);
776 			sx_xlock(&proctree_lock);
777 		}
778 	}
779 	fixjobc(p, p->p_pgrp, 0);
780 	sx_xunlock(&proctree_lock);
781 }
782 
783 /*
784  * A process group has become orphaned;
785  * if there are any stopped processes in the group,
786  * hang-up all process in that group.
787  */
788 static void
789 orphanpg(struct pgrp *pg)
790 {
791 	struct proc *p;
792 
793 	PGRP_LOCK_ASSERT(pg, MA_OWNED);
794 
795 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
796 		PROC_LOCK(p);
797 		if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
798 			PROC_UNLOCK(p);
799 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
800 				PROC_LOCK(p);
801 				kern_psignal(p, SIGHUP);
802 				kern_psignal(p, SIGCONT);
803 				PROC_UNLOCK(p);
804 			}
805 			return;
806 		}
807 		PROC_UNLOCK(p);
808 	}
809 }
810 
811 void
812 sess_hold(struct session *s)
813 {
814 
815 	refcount_acquire(&s->s_count);
816 }
817 
818 void
819 sess_release(struct session *s)
820 {
821 
822 	if (refcount_release(&s->s_count)) {
823 		if (s->s_ttyp != NULL) {
824 			tty_lock(s->s_ttyp);
825 			tty_rel_sess(s->s_ttyp, s);
826 		}
827 		mtx_destroy(&s->s_mtx);
828 		free(s, M_SESSION);
829 	}
830 }
831 
832 #ifdef DDB
833 
834 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
835 {
836 	struct pgrp *pgrp;
837 	struct proc *p;
838 	int i;
839 
840 	for (i = 0; i <= pgrphash; i++) {
841 		if (!LIST_EMPTY(&pgrphashtbl[i])) {
842 			printf("\tindx %d\n", i);
843 			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
844 				printf(
845 			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
846 				    (void *)pgrp, (long)pgrp->pg_id,
847 				    (void *)pgrp->pg_session,
848 				    pgrp->pg_session->s_count,
849 				    (void *)LIST_FIRST(&pgrp->pg_members));
850 				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
851 					printf("\t\tpid %ld addr %p pgrp %p\n",
852 					    (long)p->p_pid, (void *)p,
853 					    (void *)p->p_pgrp);
854 				}
855 			}
856 		}
857 	}
858 }
859 #endif /* DDB */
860 
861 /*
862  * Calculate the kinfo_proc members which contain process-wide
863  * informations.
864  * Must be called with the target process locked.
865  */
866 static void
867 fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
868 {
869 	struct thread *td;
870 
871 	PROC_LOCK_ASSERT(p, MA_OWNED);
872 
873 	kp->ki_estcpu = 0;
874 	kp->ki_pctcpu = 0;
875 	FOREACH_THREAD_IN_PROC(p, td) {
876 		thread_lock(td);
877 		kp->ki_pctcpu += sched_pctcpu(td);
878 		kp->ki_estcpu += sched_estcpu(td);
879 		thread_unlock(td);
880 	}
881 }
882 
883 /*
884  * Clear kinfo_proc and fill in any information that is common
885  * to all threads in the process.
886  * Must be called with the target process locked.
887  */
888 static void
889 fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
890 {
891 	struct thread *td0;
892 	struct tty *tp;
893 	struct session *sp;
894 	struct ucred *cred;
895 	struct sigacts *ps;
896 	struct timeval boottime;
897 
898 	PROC_LOCK_ASSERT(p, MA_OWNED);
899 	bzero(kp, sizeof(*kp));
900 
901 	kp->ki_structsize = sizeof(*kp);
902 	kp->ki_paddr = p;
903 	kp->ki_addr =/* p->p_addr; */0; /* XXX */
904 	kp->ki_args = p->p_args;
905 	kp->ki_textvp = p->p_textvp;
906 #ifdef KTRACE
907 	kp->ki_tracep = p->p_tracevp;
908 	kp->ki_traceflag = p->p_traceflag;
909 #endif
910 	kp->ki_fd = p->p_fd;
911 	kp->ki_vmspace = p->p_vmspace;
912 	kp->ki_flag = p->p_flag;
913 	kp->ki_flag2 = p->p_flag2;
914 	cred = p->p_ucred;
915 	if (cred) {
916 		kp->ki_uid = cred->cr_uid;
917 		kp->ki_ruid = cred->cr_ruid;
918 		kp->ki_svuid = cred->cr_svuid;
919 		kp->ki_cr_flags = 0;
920 		if (cred->cr_flags & CRED_FLAG_CAPMODE)
921 			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
922 		/* XXX bde doesn't like KI_NGROUPS */
923 		if (cred->cr_ngroups > KI_NGROUPS) {
924 			kp->ki_ngroups = KI_NGROUPS;
925 			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
926 		} else
927 			kp->ki_ngroups = cred->cr_ngroups;
928 		bcopy(cred->cr_groups, kp->ki_groups,
929 		    kp->ki_ngroups * sizeof(gid_t));
930 		kp->ki_rgid = cred->cr_rgid;
931 		kp->ki_svgid = cred->cr_svgid;
932 		/* If jailed(cred), emulate the old P_JAILED flag. */
933 		if (jailed(cred)) {
934 			kp->ki_flag |= P_JAILED;
935 			/* If inside the jail, use 0 as a jail ID. */
936 			if (cred->cr_prison != curthread->td_ucred->cr_prison)
937 				kp->ki_jid = cred->cr_prison->pr_id;
938 		}
939 		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
940 		    sizeof(kp->ki_loginclass));
941 	}
942 	ps = p->p_sigacts;
943 	if (ps) {
944 		mtx_lock(&ps->ps_mtx);
945 		kp->ki_sigignore = ps->ps_sigignore;
946 		kp->ki_sigcatch = ps->ps_sigcatch;
947 		mtx_unlock(&ps->ps_mtx);
948 	}
949 	if (p->p_state != PRS_NEW &&
950 	    p->p_state != PRS_ZOMBIE &&
951 	    p->p_vmspace != NULL) {
952 		struct vmspace *vm = p->p_vmspace;
953 
954 		kp->ki_size = vm->vm_map.size;
955 		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
956 		FOREACH_THREAD_IN_PROC(p, td0) {
957 			if (!TD_IS_SWAPPED(td0))
958 				kp->ki_rssize += td0->td_kstack_pages;
959 		}
960 		kp->ki_swrss = vm->vm_swrss;
961 		kp->ki_tsize = vm->vm_tsize;
962 		kp->ki_dsize = vm->vm_dsize;
963 		kp->ki_ssize = vm->vm_ssize;
964 	} else if (p->p_state == PRS_ZOMBIE)
965 		kp->ki_stat = SZOMB;
966 	if (kp->ki_flag & P_INMEM)
967 		kp->ki_sflag = PS_INMEM;
968 	else
969 		kp->ki_sflag = 0;
970 	/* Calculate legacy swtime as seconds since 'swtick'. */
971 	kp->ki_swtime = (ticks - p->p_swtick) / hz;
972 	kp->ki_pid = p->p_pid;
973 	kp->ki_nice = p->p_nice;
974 	kp->ki_fibnum = p->p_fibnum;
975 	kp->ki_start = p->p_stats->p_start;
976 	getboottime(&boottime);
977 	timevaladd(&kp->ki_start, &boottime);
978 	PROC_STATLOCK(p);
979 	rufetch(p, &kp->ki_rusage);
980 	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
981 	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
982 	PROC_STATUNLOCK(p);
983 	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
984 	/* Some callers want child times in a single value. */
985 	kp->ki_childtime = kp->ki_childstime;
986 	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
987 
988 	FOREACH_THREAD_IN_PROC(p, td0)
989 		kp->ki_cow += td0->td_cow;
990 
991 	tp = NULL;
992 	if (p->p_pgrp) {
993 		kp->ki_pgid = p->p_pgrp->pg_id;
994 		kp->ki_jobc = p->p_pgrp->pg_jobc;
995 		sp = p->p_pgrp->pg_session;
996 
997 		if (sp != NULL) {
998 			kp->ki_sid = sp->s_sid;
999 			SESS_LOCK(sp);
1000 			strlcpy(kp->ki_login, sp->s_login,
1001 			    sizeof(kp->ki_login));
1002 			if (sp->s_ttyvp)
1003 				kp->ki_kiflag |= KI_CTTY;
1004 			if (SESS_LEADER(p))
1005 				kp->ki_kiflag |= KI_SLEADER;
1006 			/* XXX proctree_lock */
1007 			tp = sp->s_ttyp;
1008 			SESS_UNLOCK(sp);
1009 		}
1010 	}
1011 	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
1012 		kp->ki_tdev = tty_udev(tp);
1013 		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
1014 		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
1015 		if (tp->t_session)
1016 			kp->ki_tsid = tp->t_session->s_sid;
1017 	} else {
1018 		kp->ki_tdev = NODEV;
1019 		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
1020 	}
1021 	if (p->p_comm[0] != '\0')
1022 		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
1023 	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
1024 	    p->p_sysent->sv_name[0] != '\0')
1025 		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
1026 	kp->ki_siglist = p->p_siglist;
1027 	kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig);
1028 	kp->ki_acflag = p->p_acflag;
1029 	kp->ki_lock = p->p_lock;
1030 	if (p->p_pptr) {
1031 		kp->ki_ppid = p->p_oppid;
1032 		if (p->p_flag & P_TRACED)
1033 			kp->ki_tracer = p->p_pptr->p_pid;
1034 	}
1035 }
1036 
1037 /*
1038  * Fill in information that is thread specific.  Must be called with
1039  * target process locked.  If 'preferthread' is set, overwrite certain
1040  * process-related fields that are maintained for both threads and
1041  * processes.
1042  */
1043 static void
1044 fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
1045 {
1046 	struct proc *p;
1047 
1048 	p = td->td_proc;
1049 	kp->ki_tdaddr = td;
1050 	PROC_LOCK_ASSERT(p, MA_OWNED);
1051 
1052 	if (preferthread)
1053 		PROC_STATLOCK(p);
1054 	thread_lock(td);
1055 	if (td->td_wmesg != NULL)
1056 		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
1057 	else
1058 		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
1059 	if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >=
1060 	    sizeof(kp->ki_tdname)) {
1061 		strlcpy(kp->ki_moretdname,
1062 		    td->td_name + sizeof(kp->ki_tdname) - 1,
1063 		    sizeof(kp->ki_moretdname));
1064 	} else {
1065 		bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname));
1066 	}
1067 	if (TD_ON_LOCK(td)) {
1068 		kp->ki_kiflag |= KI_LOCKBLOCK;
1069 		strlcpy(kp->ki_lockname, td->td_lockname,
1070 		    sizeof(kp->ki_lockname));
1071 	} else {
1072 		kp->ki_kiflag &= ~KI_LOCKBLOCK;
1073 		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
1074 	}
1075 
1076 	if (p->p_state == PRS_NORMAL) { /* approximate. */
1077 		if (TD_ON_RUNQ(td) ||
1078 		    TD_CAN_RUN(td) ||
1079 		    TD_IS_RUNNING(td)) {
1080 			kp->ki_stat = SRUN;
1081 		} else if (P_SHOULDSTOP(p)) {
1082 			kp->ki_stat = SSTOP;
1083 		} else if (TD_IS_SLEEPING(td)) {
1084 			kp->ki_stat = SSLEEP;
1085 		} else if (TD_ON_LOCK(td)) {
1086 			kp->ki_stat = SLOCK;
1087 		} else {
1088 			kp->ki_stat = SWAIT;
1089 		}
1090 	} else if (p->p_state == PRS_ZOMBIE) {
1091 		kp->ki_stat = SZOMB;
1092 	} else {
1093 		kp->ki_stat = SIDL;
1094 	}
1095 
1096 	/* Things in the thread */
1097 	kp->ki_wchan = td->td_wchan;
1098 	kp->ki_pri.pri_level = td->td_priority;
1099 	kp->ki_pri.pri_native = td->td_base_pri;
1100 
1101 	/*
1102 	 * Note: legacy fields; clamp at the old NOCPU value and/or
1103 	 * the maximum u_char CPU value.
1104 	 */
1105 	if (td->td_lastcpu == NOCPU)
1106 		kp->ki_lastcpu_old = NOCPU_OLD;
1107 	else if (td->td_lastcpu > MAXCPU_OLD)
1108 		kp->ki_lastcpu_old = MAXCPU_OLD;
1109 	else
1110 		kp->ki_lastcpu_old = td->td_lastcpu;
1111 
1112 	if (td->td_oncpu == NOCPU)
1113 		kp->ki_oncpu_old = NOCPU_OLD;
1114 	else if (td->td_oncpu > MAXCPU_OLD)
1115 		kp->ki_oncpu_old = MAXCPU_OLD;
1116 	else
1117 		kp->ki_oncpu_old = td->td_oncpu;
1118 
1119 	kp->ki_lastcpu = td->td_lastcpu;
1120 	kp->ki_oncpu = td->td_oncpu;
1121 	kp->ki_tdflags = td->td_flags;
1122 	kp->ki_tid = td->td_tid;
1123 	kp->ki_numthreads = p->p_numthreads;
1124 	kp->ki_pcb = td->td_pcb;
1125 	kp->ki_kstack = (void *)td->td_kstack;
1126 	kp->ki_slptime = (ticks - td->td_slptick) / hz;
1127 	kp->ki_pri.pri_class = td->td_pri_class;
1128 	kp->ki_pri.pri_user = td->td_user_pri;
1129 
1130 	if (preferthread) {
1131 		rufetchtd(td, &kp->ki_rusage);
1132 		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
1133 		kp->ki_pctcpu = sched_pctcpu(td);
1134 		kp->ki_estcpu = sched_estcpu(td);
1135 		kp->ki_cow = td->td_cow;
1136 	}
1137 
1138 	/* We can't get this anymore but ps etc never used it anyway. */
1139 	kp->ki_rqindex = 0;
1140 
1141 	if (preferthread)
1142 		kp->ki_siglist = td->td_siglist;
1143 	kp->ki_sigmask = td->td_sigmask;
1144 	thread_unlock(td);
1145 	if (preferthread)
1146 		PROC_STATUNLOCK(p);
1147 }
1148 
1149 /*
1150  * Fill in a kinfo_proc structure for the specified process.
1151  * Must be called with the target process locked.
1152  */
1153 void
1154 fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
1155 {
1156 
1157 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1158 
1159 	fill_kinfo_proc_only(p, kp);
1160 	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
1161 	fill_kinfo_aggregate(p, kp);
1162 }
1163 
1164 struct pstats *
1165 pstats_alloc(void)
1166 {
1167 
1168 	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
1169 }
1170 
1171 /*
1172  * Copy parts of p_stats; zero the rest of p_stats (statistics).
1173  */
1174 void
1175 pstats_fork(struct pstats *src, struct pstats *dst)
1176 {
1177 
1178 	bzero(&dst->pstat_startzero,
1179 	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
1180 	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
1181 	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
1182 }
1183 
1184 void
1185 pstats_free(struct pstats *ps)
1186 {
1187 
1188 	free(ps, M_SUBPROC);
1189 }
1190 
1191 /*
1192  * Locate a zombie process by number
1193  */
1194 struct proc *
1195 zpfind(pid_t pid)
1196 {
1197 	struct proc *p;
1198 
1199 	sx_slock(&zombproc_lock);
1200 	LIST_FOREACH(p, &zombproc, p_list) {
1201 		if (p->p_pid == pid) {
1202 			PROC_LOCK(p);
1203 			break;
1204 		}
1205 	}
1206 	sx_sunlock(&zombproc_lock);
1207 	return (p);
1208 }
1209 
1210 #ifdef COMPAT_FREEBSD32
1211 
1212 /*
1213  * This function is typically used to copy out the kernel address, so
1214  * it can be replaced by assignment of zero.
1215  */
1216 static inline uint32_t
1217 ptr32_trim(void *ptr)
1218 {
1219 	uintptr_t uptr;
1220 
1221 	uptr = (uintptr_t)ptr;
1222 	return ((uptr > UINT_MAX) ? 0 : uptr);
1223 }
1224 
1225 #define PTRTRIM_CP(src,dst,fld) \
1226 	do { (dst).fld = ptr32_trim((src).fld); } while (0)
1227 
1228 static void
1229 freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
1230 {
1231 	int i;
1232 
1233 	bzero(ki32, sizeof(struct kinfo_proc32));
1234 	ki32->ki_structsize = sizeof(struct kinfo_proc32);
1235 	CP(*ki, *ki32, ki_layout);
1236 	PTRTRIM_CP(*ki, *ki32, ki_args);
1237 	PTRTRIM_CP(*ki, *ki32, ki_paddr);
1238 	PTRTRIM_CP(*ki, *ki32, ki_addr);
1239 	PTRTRIM_CP(*ki, *ki32, ki_tracep);
1240 	PTRTRIM_CP(*ki, *ki32, ki_textvp);
1241 	PTRTRIM_CP(*ki, *ki32, ki_fd);
1242 	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
1243 	PTRTRIM_CP(*ki, *ki32, ki_wchan);
1244 	CP(*ki, *ki32, ki_pid);
1245 	CP(*ki, *ki32, ki_ppid);
1246 	CP(*ki, *ki32, ki_pgid);
1247 	CP(*ki, *ki32, ki_tpgid);
1248 	CP(*ki, *ki32, ki_sid);
1249 	CP(*ki, *ki32, ki_tsid);
1250 	CP(*ki, *ki32, ki_jobc);
1251 	CP(*ki, *ki32, ki_tdev);
1252 	CP(*ki, *ki32, ki_tdev_freebsd11);
1253 	CP(*ki, *ki32, ki_siglist);
1254 	CP(*ki, *ki32, ki_sigmask);
1255 	CP(*ki, *ki32, ki_sigignore);
1256 	CP(*ki, *ki32, ki_sigcatch);
1257 	CP(*ki, *ki32, ki_uid);
1258 	CP(*ki, *ki32, ki_ruid);
1259 	CP(*ki, *ki32, ki_svuid);
1260 	CP(*ki, *ki32, ki_rgid);
1261 	CP(*ki, *ki32, ki_svgid);
1262 	CP(*ki, *ki32, ki_ngroups);
1263 	for (i = 0; i < KI_NGROUPS; i++)
1264 		CP(*ki, *ki32, ki_groups[i]);
1265 	CP(*ki, *ki32, ki_size);
1266 	CP(*ki, *ki32, ki_rssize);
1267 	CP(*ki, *ki32, ki_swrss);
1268 	CP(*ki, *ki32, ki_tsize);
1269 	CP(*ki, *ki32, ki_dsize);
1270 	CP(*ki, *ki32, ki_ssize);
1271 	CP(*ki, *ki32, ki_xstat);
1272 	CP(*ki, *ki32, ki_acflag);
1273 	CP(*ki, *ki32, ki_pctcpu);
1274 	CP(*ki, *ki32, ki_estcpu);
1275 	CP(*ki, *ki32, ki_slptime);
1276 	CP(*ki, *ki32, ki_swtime);
1277 	CP(*ki, *ki32, ki_cow);
1278 	CP(*ki, *ki32, ki_runtime);
1279 	TV_CP(*ki, *ki32, ki_start);
1280 	TV_CP(*ki, *ki32, ki_childtime);
1281 	CP(*ki, *ki32, ki_flag);
1282 	CP(*ki, *ki32, ki_kiflag);
1283 	CP(*ki, *ki32, ki_traceflag);
1284 	CP(*ki, *ki32, ki_stat);
1285 	CP(*ki, *ki32, ki_nice);
1286 	CP(*ki, *ki32, ki_lock);
1287 	CP(*ki, *ki32, ki_rqindex);
1288 	CP(*ki, *ki32, ki_oncpu);
1289 	CP(*ki, *ki32, ki_lastcpu);
1290 
1291 	/* XXX TODO: wrap cpu value as appropriate */
1292 	CP(*ki, *ki32, ki_oncpu_old);
1293 	CP(*ki, *ki32, ki_lastcpu_old);
1294 
1295 	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
1296 	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
1297 	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
1298 	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
1299 	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
1300 	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
1301 	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
1302 	bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1);
1303 	CP(*ki, *ki32, ki_tracer);
1304 	CP(*ki, *ki32, ki_flag2);
1305 	CP(*ki, *ki32, ki_fibnum);
1306 	CP(*ki, *ki32, ki_cr_flags);
1307 	CP(*ki, *ki32, ki_jid);
1308 	CP(*ki, *ki32, ki_numthreads);
1309 	CP(*ki, *ki32, ki_tid);
1310 	CP(*ki, *ki32, ki_pri);
1311 	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
1312 	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
1313 	PTRTRIM_CP(*ki, *ki32, ki_pcb);
1314 	PTRTRIM_CP(*ki, *ki32, ki_kstack);
1315 	PTRTRIM_CP(*ki, *ki32, ki_udata);
1316 	PTRTRIM_CP(*ki, *ki32, ki_tdaddr);
1317 	CP(*ki, *ki32, ki_sflag);
1318 	CP(*ki, *ki32, ki_tdflags);
1319 }
1320 #endif
1321 
1322 static ssize_t
1323 kern_proc_out_size(struct proc *p, int flags)
1324 {
1325 	ssize_t size = 0;
1326 
1327 	PROC_LOCK_ASSERT(p, MA_OWNED);
1328 
1329 	if ((flags & KERN_PROC_NOTHREADS) != 0) {
1330 #ifdef COMPAT_FREEBSD32
1331 		if ((flags & KERN_PROC_MASK32) != 0) {
1332 			size += sizeof(struct kinfo_proc32);
1333 		} else
1334 #endif
1335 			size += sizeof(struct kinfo_proc);
1336 	} else {
1337 #ifdef COMPAT_FREEBSD32
1338 		if ((flags & KERN_PROC_MASK32) != 0)
1339 			size += sizeof(struct kinfo_proc32) * p->p_numthreads;
1340 		else
1341 #endif
1342 			size += sizeof(struct kinfo_proc) * p->p_numthreads;
1343 	}
1344 	PROC_UNLOCK(p);
1345 	return (size);
1346 }
1347 
1348 int
1349 kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
1350 {
1351 	struct thread *td;
1352 	struct kinfo_proc ki;
1353 #ifdef COMPAT_FREEBSD32
1354 	struct kinfo_proc32 ki32;
1355 #endif
1356 	int error;
1357 
1358 	PROC_LOCK_ASSERT(p, MA_OWNED);
1359 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1360 
1361 	error = 0;
1362 	fill_kinfo_proc(p, &ki);
1363 	if ((flags & KERN_PROC_NOTHREADS) != 0) {
1364 #ifdef COMPAT_FREEBSD32
1365 		if ((flags & KERN_PROC_MASK32) != 0) {
1366 			freebsd32_kinfo_proc_out(&ki, &ki32);
1367 			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1368 				error = ENOMEM;
1369 		} else
1370 #endif
1371 			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1372 				error = ENOMEM;
1373 	} else {
1374 		FOREACH_THREAD_IN_PROC(p, td) {
1375 			fill_kinfo_thread(td, &ki, 1);
1376 #ifdef COMPAT_FREEBSD32
1377 			if ((flags & KERN_PROC_MASK32) != 0) {
1378 				freebsd32_kinfo_proc_out(&ki, &ki32);
1379 				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1380 					error = ENOMEM;
1381 			} else
1382 #endif
1383 				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1384 					error = ENOMEM;
1385 			if (error != 0)
1386 				break;
1387 		}
1388 	}
1389 	PROC_UNLOCK(p);
1390 	return (error);
1391 }
1392 
1393 static int
1394 sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags)
1395 {
1396 	struct sbuf sb;
1397 	struct kinfo_proc ki;
1398 	int error, error2;
1399 
1400 	if (req->oldptr == NULL)
1401 		return (SYSCTL_OUT(req, 0, kern_proc_out_size(p, flags)));
1402 
1403 	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
1404 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1405 	error = kern_proc_out(p, &sb, flags);
1406 	error2 = sbuf_finish(&sb);
1407 	sbuf_delete(&sb);
1408 	if (error != 0)
1409 		return (error);
1410 	else if (error2 != 0)
1411 		return (error2);
1412 	return (0);
1413 }
1414 
1415 int
1416 proc_iterate(int (*cb)(struct proc *, void *), void *cbarg)
1417 {
1418 	struct proc *p;
1419 	int error, i, j;
1420 
1421 	for (i = 0; i < pidhashlock + 1; i++) {
1422 		sx_slock(&pidhashtbl_lock[i]);
1423 		for (j = i; j <= pidhash; j += pidhashlock + 1) {
1424 			LIST_FOREACH(p, &pidhashtbl[j], p_hash) {
1425 				if (p->p_state == PRS_NEW)
1426 					continue;
1427 				error = cb(p, cbarg);
1428 				PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1429 				if (error != 0) {
1430 					sx_sunlock(&pidhashtbl_lock[i]);
1431 					return (error);
1432 				}
1433 			}
1434 		}
1435 		sx_sunlock(&pidhashtbl_lock[i]);
1436 	}
1437 	return (0);
1438 }
1439 
1440 struct kern_proc_out_args {
1441 	struct sysctl_req *req;
1442 	int flags;
1443 	int oid_number;
1444 	int *name;
1445 };
1446 
1447 static int
1448 sysctl_kern_proc_iterate(struct proc *p, void *origarg)
1449 {
1450 	struct kern_proc_out_args *arg = origarg;
1451 	int *name = arg->name;
1452 	int oid_number = arg->oid_number;
1453 	int flags = arg->flags;
1454 	struct sysctl_req *req = arg->req;
1455 	int error = 0;
1456 
1457 	PROC_LOCK(p);
1458 
1459 	KASSERT(p->p_ucred != NULL,
1460 	    ("process credential is NULL for non-NEW proc"));
1461 	/*
1462 	 * Show a user only appropriate processes.
1463 	 */
1464 	if (p_cansee(curthread, p))
1465 		goto skip;
1466 	/*
1467 	 * TODO - make more efficient (see notes below).
1468 	 * do by session.
1469 	 */
1470 	switch (oid_number) {
1471 
1472 	case KERN_PROC_GID:
1473 		if (p->p_ucred->cr_gid != (gid_t)name[0])
1474 			goto skip;
1475 		break;
1476 
1477 	case KERN_PROC_PGRP:
1478 		/* could do this by traversing pgrp */
1479 		if (p->p_pgrp == NULL ||
1480 		    p->p_pgrp->pg_id != (pid_t)name[0])
1481 			goto skip;
1482 		break;
1483 
1484 	case KERN_PROC_RGID:
1485 		if (p->p_ucred->cr_rgid != (gid_t)name[0])
1486 			goto skip;
1487 		break;
1488 
1489 	case KERN_PROC_SESSION:
1490 		if (p->p_session == NULL ||
1491 		    p->p_session->s_sid != (pid_t)name[0])
1492 			goto skip;
1493 		break;
1494 
1495 	case KERN_PROC_TTY:
1496 		if ((p->p_flag & P_CONTROLT) == 0 ||
1497 		    p->p_session == NULL)
1498 			goto skip;
1499 		/* XXX proctree_lock */
1500 		SESS_LOCK(p->p_session);
1501 		if (p->p_session->s_ttyp == NULL ||
1502 		    tty_udev(p->p_session->s_ttyp) !=
1503 		    (dev_t)name[0]) {
1504 			SESS_UNLOCK(p->p_session);
1505 			goto skip;
1506 		}
1507 		SESS_UNLOCK(p->p_session);
1508 		break;
1509 
1510 	case KERN_PROC_UID:
1511 		if (p->p_ucred->cr_uid != (uid_t)name[0])
1512 			goto skip;
1513 		break;
1514 
1515 	case KERN_PROC_RUID:
1516 		if (p->p_ucred->cr_ruid != (uid_t)name[0])
1517 			goto skip;
1518 		break;
1519 
1520 	case KERN_PROC_PROC:
1521 		break;
1522 
1523 	default:
1524 		break;
1525 
1526 	}
1527 	error = sysctl_out_proc(p, req, flags);
1528 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1529 	return (error);
1530 skip:
1531 	PROC_UNLOCK(p);
1532 	return (0);
1533 }
1534 
1535 static int
1536 sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
1537 {
1538 	struct kern_proc_out_args iterarg;
1539 	int *name = (int *)arg1;
1540 	u_int namelen = arg2;
1541 	struct proc *p;
1542 	int flags, oid_number;
1543 	int error = 0;
1544 
1545 	oid_number = oidp->oid_number;
1546 	if (oid_number != KERN_PROC_ALL &&
1547 	    (oid_number & KERN_PROC_INC_THREAD) == 0)
1548 		flags = KERN_PROC_NOTHREADS;
1549 	else {
1550 		flags = 0;
1551 		oid_number &= ~KERN_PROC_INC_THREAD;
1552 	}
1553 #ifdef COMPAT_FREEBSD32
1554 	if (req->flags & SCTL_MASK32)
1555 		flags |= KERN_PROC_MASK32;
1556 #endif
1557 	if (oid_number == KERN_PROC_PID) {
1558 		if (namelen != 1)
1559 			return (EINVAL);
1560 		error = sysctl_wire_old_buffer(req, 0);
1561 		if (error)
1562 			return (error);
1563 		error = pget((pid_t)name[0], PGET_CANSEE, &p);
1564 		if (error == 0)
1565 			error = sysctl_out_proc(p, req, flags);
1566 		return (error);
1567 	}
1568 
1569 	switch (oid_number) {
1570 	case KERN_PROC_ALL:
1571 		if (namelen != 0)
1572 			return (EINVAL);
1573 		break;
1574 	case KERN_PROC_PROC:
1575 		if (namelen != 0 && namelen != 1)
1576 			return (EINVAL);
1577 		break;
1578 	default:
1579 		if (namelen != 1)
1580 			return (EINVAL);
1581 		break;
1582 	}
1583 
1584 	if (req->oldptr == NULL) {
1585 		/* overestimate by 5 procs */
1586 		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
1587 		if (error)
1588 			return (error);
1589 	} else {
1590 		error = sysctl_wire_old_buffer(req, 0);
1591 		if (error != 0)
1592 			return (error);
1593 	}
1594 	iterarg.flags = flags;
1595 	iterarg.oid_number = oid_number;
1596 	iterarg.req = req;
1597 	iterarg.name = name;
1598 	error = proc_iterate(sysctl_kern_proc_iterate, &iterarg);
1599 	return (error);
1600 }
1601 
1602 struct pargs *
1603 pargs_alloc(int len)
1604 {
1605 	struct pargs *pa;
1606 
1607 	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
1608 		M_WAITOK);
1609 	refcount_init(&pa->ar_ref, 1);
1610 	pa->ar_length = len;
1611 	return (pa);
1612 }
1613 
1614 static void
1615 pargs_free(struct pargs *pa)
1616 {
1617 
1618 	free(pa, M_PARGS);
1619 }
1620 
1621 void
1622 pargs_hold(struct pargs *pa)
1623 {
1624 
1625 	if (pa == NULL)
1626 		return;
1627 	refcount_acquire(&pa->ar_ref);
1628 }
1629 
1630 void
1631 pargs_drop(struct pargs *pa)
1632 {
1633 
1634 	if (pa == NULL)
1635 		return;
1636 	if (refcount_release(&pa->ar_ref))
1637 		pargs_free(pa);
1638 }
1639 
1640 static int
1641 proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
1642     size_t len)
1643 {
1644 	ssize_t n;
1645 
1646 	/*
1647 	 * This may return a short read if the string is shorter than the chunk
1648 	 * and is aligned at the end of the page, and the following page is not
1649 	 * mapped.
1650 	 */
1651 	n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len);
1652 	if (n <= 0)
1653 		return (ENOMEM);
1654 	return (0);
1655 }
1656 
1657 #define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
1658 
1659 enum proc_vector_type {
1660 	PROC_ARG,
1661 	PROC_ENV,
1662 	PROC_AUX,
1663 };
1664 
1665 #ifdef COMPAT_FREEBSD32
1666 static int
1667 get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
1668     size_t *vsizep, enum proc_vector_type type)
1669 {
1670 	struct freebsd32_ps_strings pss;
1671 	Elf32_Auxinfo aux;
1672 	vm_offset_t vptr, ptr;
1673 	uint32_t *proc_vector32;
1674 	char **proc_vector;
1675 	size_t vsize, size;
1676 	int i, error;
1677 
1678 	error = 0;
1679 	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
1680 	    sizeof(pss)) != sizeof(pss))
1681 		return (ENOMEM);
1682 	switch (type) {
1683 	case PROC_ARG:
1684 		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
1685 		vsize = pss.ps_nargvstr;
1686 		if (vsize > ARG_MAX)
1687 			return (ENOEXEC);
1688 		size = vsize * sizeof(int32_t);
1689 		break;
1690 	case PROC_ENV:
1691 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
1692 		vsize = pss.ps_nenvstr;
1693 		if (vsize > ARG_MAX)
1694 			return (ENOEXEC);
1695 		size = vsize * sizeof(int32_t);
1696 		break;
1697 	case PROC_AUX:
1698 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
1699 		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
1700 		if (vptr % 4 != 0)
1701 			return (ENOEXEC);
1702 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1703 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
1704 			    sizeof(aux))
1705 				return (ENOMEM);
1706 			if (aux.a_type == AT_NULL)
1707 				break;
1708 			ptr += sizeof(aux);
1709 		}
1710 		if (aux.a_type != AT_NULL)
1711 			return (ENOEXEC);
1712 		vsize = i + 1;
1713 		size = vsize * sizeof(aux);
1714 		break;
1715 	default:
1716 		KASSERT(0, ("Wrong proc vector type: %d", type));
1717 		return (EINVAL);
1718 	}
1719 	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
1720 	if (proc_readmem(td, p, vptr, proc_vector32, size) != size) {
1721 		error = ENOMEM;
1722 		goto done;
1723 	}
1724 	if (type == PROC_AUX) {
1725 		*proc_vectorp = (char **)proc_vector32;
1726 		*vsizep = vsize;
1727 		return (0);
1728 	}
1729 	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
1730 	for (i = 0; i < (int)vsize; i++)
1731 		proc_vector[i] = PTRIN(proc_vector32[i]);
1732 	*proc_vectorp = proc_vector;
1733 	*vsizep = vsize;
1734 done:
1735 	free(proc_vector32, M_TEMP);
1736 	return (error);
1737 }
1738 #endif
1739 
1740 static int
1741 get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
1742     size_t *vsizep, enum proc_vector_type type)
1743 {
1744 	struct ps_strings pss;
1745 	Elf_Auxinfo aux;
1746 	vm_offset_t vptr, ptr;
1747 	char **proc_vector;
1748 	size_t vsize, size;
1749 	int i;
1750 
1751 #ifdef COMPAT_FREEBSD32
1752 	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1753 		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
1754 #endif
1755 	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
1756 	    sizeof(pss)) != sizeof(pss))
1757 		return (ENOMEM);
1758 	switch (type) {
1759 	case PROC_ARG:
1760 		vptr = (vm_offset_t)pss.ps_argvstr;
1761 		vsize = pss.ps_nargvstr;
1762 		if (vsize > ARG_MAX)
1763 			return (ENOEXEC);
1764 		size = vsize * sizeof(char *);
1765 		break;
1766 	case PROC_ENV:
1767 		vptr = (vm_offset_t)pss.ps_envstr;
1768 		vsize = pss.ps_nenvstr;
1769 		if (vsize > ARG_MAX)
1770 			return (ENOEXEC);
1771 		size = vsize * sizeof(char *);
1772 		break;
1773 	case PROC_AUX:
1774 		/*
1775 		 * The aux array is just above env array on the stack. Check
1776 		 * that the address is naturally aligned.
1777 		 */
1778 		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
1779 		    * sizeof(char *);
1780 #if __ELF_WORD_SIZE == 64
1781 		if (vptr % sizeof(uint64_t) != 0)
1782 #else
1783 		if (vptr % sizeof(uint32_t) != 0)
1784 #endif
1785 			return (ENOEXEC);
1786 		/*
1787 		 * We count the array size reading the aux vectors from the
1788 		 * stack until AT_NULL vector is returned.  So (to keep the code
1789 		 * simple) we read the process stack twice: the first time here
1790 		 * to find the size and the second time when copying the vectors
1791 		 * to the allocated proc_vector.
1792 		 */
1793 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1794 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
1795 			    sizeof(aux))
1796 				return (ENOMEM);
1797 			if (aux.a_type == AT_NULL)
1798 				break;
1799 			ptr += sizeof(aux);
1800 		}
1801 		/*
1802 		 * If the PROC_AUXV_MAX entries are iterated over, and we have
1803 		 * not reached AT_NULL, it is most likely we are reading wrong
1804 		 * data: either the process doesn't have auxv array or data has
1805 		 * been modified. Return the error in this case.
1806 		 */
1807 		if (aux.a_type != AT_NULL)
1808 			return (ENOEXEC);
1809 		vsize = i + 1;
1810 		size = vsize * sizeof(aux);
1811 		break;
1812 	default:
1813 		KASSERT(0, ("Wrong proc vector type: %d", type));
1814 		return (EINVAL); /* In case we are built without INVARIANTS. */
1815 	}
1816 	proc_vector = malloc(size, M_TEMP, M_WAITOK);
1817 	if (proc_readmem(td, p, vptr, proc_vector, size) != size) {
1818 		free(proc_vector, M_TEMP);
1819 		return (ENOMEM);
1820 	}
1821 	*proc_vectorp = proc_vector;
1822 	*vsizep = vsize;
1823 
1824 	return (0);
1825 }
1826 
1827 #define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
1828 
1829 static int
1830 get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
1831     enum proc_vector_type type)
1832 {
1833 	size_t done, len, nchr, vsize;
1834 	int error, i;
1835 	char **proc_vector, *sptr;
1836 	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
1837 
1838 	PROC_ASSERT_HELD(p);
1839 
1840 	/*
1841 	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
1842 	 */
1843 	nchr = 2 * (PATH_MAX + ARG_MAX);
1844 
1845 	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
1846 	if (error != 0)
1847 		return (error);
1848 	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
1849 		/*
1850 		 * The program may have scribbled into its argv array, e.g. to
1851 		 * remove some arguments.  If that has happened, break out
1852 		 * before trying to read from NULL.
1853 		 */
1854 		if (proc_vector[i] == NULL)
1855 			break;
1856 		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
1857 			error = proc_read_string(td, p, sptr, pss_string,
1858 			    sizeof(pss_string));
1859 			if (error != 0)
1860 				goto done;
1861 			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
1862 			if (done + len >= nchr)
1863 				len = nchr - done - 1;
1864 			sbuf_bcat(sb, pss_string, len);
1865 			if (len != GET_PS_STRINGS_CHUNK_SZ)
1866 				break;
1867 			done += GET_PS_STRINGS_CHUNK_SZ;
1868 		}
1869 		sbuf_bcat(sb, "", 1);
1870 		done += len + 1;
1871 	}
1872 done:
1873 	free(proc_vector, M_TEMP);
1874 	return (error);
1875 }
1876 
1877 int
1878 proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
1879 {
1880 
1881 	return (get_ps_strings(curthread, p, sb, PROC_ARG));
1882 }
1883 
1884 int
1885 proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
1886 {
1887 
1888 	return (get_ps_strings(curthread, p, sb, PROC_ENV));
1889 }
1890 
1891 int
1892 proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
1893 {
1894 	size_t vsize, size;
1895 	char **auxv;
1896 	int error;
1897 
1898 	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
1899 	if (error == 0) {
1900 #ifdef COMPAT_FREEBSD32
1901 		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1902 			size = vsize * sizeof(Elf32_Auxinfo);
1903 		else
1904 #endif
1905 			size = vsize * sizeof(Elf_Auxinfo);
1906 		if (sbuf_bcat(sb, auxv, size) != 0)
1907 			error = ENOMEM;
1908 		free(auxv, M_TEMP);
1909 	}
1910 	return (error);
1911 }
1912 
1913 /*
1914  * This sysctl allows a process to retrieve the argument list or process
1915  * title for another process without groping around in the address space
1916  * of the other process.  It also allow a process to set its own "process
1917  * title to a string of its own choice.
1918  */
1919 static int
1920 sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
1921 {
1922 	int *name = (int *)arg1;
1923 	u_int namelen = arg2;
1924 	struct pargs *newpa, *pa;
1925 	struct proc *p;
1926 	struct sbuf sb;
1927 	int flags, error = 0, error2;
1928 	pid_t pid;
1929 
1930 	if (namelen != 1)
1931 		return (EINVAL);
1932 
1933 	pid = (pid_t)name[0];
1934 	/*
1935 	 * If the query is for this process and it is single-threaded, there
1936 	 * is nobody to modify pargs, thus we can just read.
1937 	 */
1938 	p = curproc;
1939 	if (pid == p->p_pid && p->p_numthreads == 1 && req->newptr == NULL &&
1940 	    (pa = p->p_args) != NULL)
1941 		return (SYSCTL_OUT(req, pa->ar_args, pa->ar_length));
1942 
1943 	flags = PGET_CANSEE;
1944 	if (req->newptr != NULL)
1945 		flags |= PGET_ISCURRENT;
1946 	error = pget(pid, flags, &p);
1947 	if (error)
1948 		return (error);
1949 
1950 	pa = p->p_args;
1951 	if (pa != NULL) {
1952 		pargs_hold(pa);
1953 		PROC_UNLOCK(p);
1954 		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
1955 		pargs_drop(pa);
1956 	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
1957 		_PHOLD(p);
1958 		PROC_UNLOCK(p);
1959 		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1960 		sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1961 		error = proc_getargv(curthread, p, &sb);
1962 		error2 = sbuf_finish(&sb);
1963 		PRELE(p);
1964 		sbuf_delete(&sb);
1965 		if (error == 0 && error2 != 0)
1966 			error = error2;
1967 	} else {
1968 		PROC_UNLOCK(p);
1969 	}
1970 	if (error != 0 || req->newptr == NULL)
1971 		return (error);
1972 
1973 	if (req->newlen > ps_arg_cache_limit - sizeof(struct pargs))
1974 		return (ENOMEM);
1975 
1976 	if (req->newlen == 0) {
1977 		/*
1978 		 * Clear the argument pointer, so that we'll fetch arguments
1979 		 * with proc_getargv() until further notice.
1980 		 */
1981 		newpa = NULL;
1982 	} else {
1983 		newpa = pargs_alloc(req->newlen);
1984 		error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
1985 		if (error != 0) {
1986 			pargs_free(newpa);
1987 			return (error);
1988 		}
1989 	}
1990 	PROC_LOCK(p);
1991 	pa = p->p_args;
1992 	p->p_args = newpa;
1993 	PROC_UNLOCK(p);
1994 	pargs_drop(pa);
1995 	return (0);
1996 }
1997 
1998 /*
1999  * This sysctl allows a process to retrieve environment of another process.
2000  */
2001 static int
2002 sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
2003 {
2004 	int *name = (int *)arg1;
2005 	u_int namelen = arg2;
2006 	struct proc *p;
2007 	struct sbuf sb;
2008 	int error, error2;
2009 
2010 	if (namelen != 1)
2011 		return (EINVAL);
2012 
2013 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2014 	if (error != 0)
2015 		return (error);
2016 	if ((p->p_flag & P_SYSTEM) != 0) {
2017 		PRELE(p);
2018 		return (0);
2019 	}
2020 
2021 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
2022 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2023 	error = proc_getenvv(curthread, p, &sb);
2024 	error2 = sbuf_finish(&sb);
2025 	PRELE(p);
2026 	sbuf_delete(&sb);
2027 	return (error != 0 ? error : error2);
2028 }
2029 
2030 /*
2031  * This sysctl allows a process to retrieve ELF auxiliary vector of
2032  * another process.
2033  */
2034 static int
2035 sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
2036 {
2037 	int *name = (int *)arg1;
2038 	u_int namelen = arg2;
2039 	struct proc *p;
2040 	struct sbuf sb;
2041 	int error, error2;
2042 
2043 	if (namelen != 1)
2044 		return (EINVAL);
2045 
2046 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2047 	if (error != 0)
2048 		return (error);
2049 	if ((p->p_flag & P_SYSTEM) != 0) {
2050 		PRELE(p);
2051 		return (0);
2052 	}
2053 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
2054 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2055 	error = proc_getauxv(curthread, p, &sb);
2056 	error2 = sbuf_finish(&sb);
2057 	PRELE(p);
2058 	sbuf_delete(&sb);
2059 	return (error != 0 ? error : error2);
2060 }
2061 
2062 /*
2063  * This sysctl allows a process to retrieve the path of the executable for
2064  * itself or another process.
2065  */
2066 static int
2067 sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
2068 {
2069 	pid_t *pidp = (pid_t *)arg1;
2070 	unsigned int arglen = arg2;
2071 	struct proc *p;
2072 	struct vnode *vp;
2073 	char *retbuf, *freebuf;
2074 	int error;
2075 
2076 	if (arglen != 1)
2077 		return (EINVAL);
2078 	if (*pidp == -1) {	/* -1 means this process */
2079 		p = req->td->td_proc;
2080 	} else {
2081 		error = pget(*pidp, PGET_CANSEE, &p);
2082 		if (error != 0)
2083 			return (error);
2084 	}
2085 
2086 	vp = p->p_textvp;
2087 	if (vp == NULL) {
2088 		if (*pidp != -1)
2089 			PROC_UNLOCK(p);
2090 		return (0);
2091 	}
2092 	vref(vp);
2093 	if (*pidp != -1)
2094 		PROC_UNLOCK(p);
2095 	error = vn_fullpath(req->td, vp, &retbuf, &freebuf);
2096 	vrele(vp);
2097 	if (error)
2098 		return (error);
2099 	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
2100 	free(freebuf, M_TEMP);
2101 	return (error);
2102 }
2103 
2104 static int
2105 sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
2106 {
2107 	struct proc *p;
2108 	char *sv_name;
2109 	int *name;
2110 	int namelen;
2111 	int error;
2112 
2113 	namelen = arg2;
2114 	if (namelen != 1)
2115 		return (EINVAL);
2116 
2117 	name = (int *)arg1;
2118 	error = pget((pid_t)name[0], PGET_CANSEE, &p);
2119 	if (error != 0)
2120 		return (error);
2121 	sv_name = p->p_sysent->sv_name;
2122 	PROC_UNLOCK(p);
2123 	return (sysctl_handle_string(oidp, sv_name, 0, req));
2124 }
2125 
2126 #ifdef KINFO_OVMENTRY_SIZE
2127 CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
2128 #endif
2129 
2130 #ifdef COMPAT_FREEBSD7
2131 static int
2132 sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
2133 {
2134 	vm_map_entry_t entry, tmp_entry;
2135 	unsigned int last_timestamp;
2136 	char *fullpath, *freepath;
2137 	struct kinfo_ovmentry *kve;
2138 	struct vattr va;
2139 	struct ucred *cred;
2140 	int error, *name;
2141 	struct vnode *vp;
2142 	struct proc *p;
2143 	vm_map_t map;
2144 	struct vmspace *vm;
2145 
2146 	name = (int *)arg1;
2147 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2148 	if (error != 0)
2149 		return (error);
2150 	vm = vmspace_acquire_ref(p);
2151 	if (vm == NULL) {
2152 		PRELE(p);
2153 		return (ESRCH);
2154 	}
2155 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
2156 
2157 	map = &vm->vm_map;
2158 	vm_map_lock_read(map);
2159 	for (entry = map->header.next; entry != &map->header;
2160 	    entry = entry->next) {
2161 		vm_object_t obj, tobj, lobj;
2162 		vm_offset_t addr;
2163 
2164 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2165 			continue;
2166 
2167 		bzero(kve, sizeof(*kve));
2168 		kve->kve_structsize = sizeof(*kve);
2169 
2170 		kve->kve_private_resident = 0;
2171 		obj = entry->object.vm_object;
2172 		if (obj != NULL) {
2173 			VM_OBJECT_RLOCK(obj);
2174 			if (obj->shadow_count == 1)
2175 				kve->kve_private_resident =
2176 				    obj->resident_page_count;
2177 		}
2178 		kve->kve_resident = 0;
2179 		addr = entry->start;
2180 		while (addr < entry->end) {
2181 			if (pmap_extract(map->pmap, addr))
2182 				kve->kve_resident++;
2183 			addr += PAGE_SIZE;
2184 		}
2185 
2186 		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
2187 			if (tobj != obj) {
2188 				VM_OBJECT_RLOCK(tobj);
2189 				kve->kve_offset += tobj->backing_object_offset;
2190 			}
2191 			if (lobj != obj)
2192 				VM_OBJECT_RUNLOCK(lobj);
2193 			lobj = tobj;
2194 		}
2195 
2196 		kve->kve_start = (void*)entry->start;
2197 		kve->kve_end = (void*)entry->end;
2198 		kve->kve_offset += (off_t)entry->offset;
2199 
2200 		if (entry->protection & VM_PROT_READ)
2201 			kve->kve_protection |= KVME_PROT_READ;
2202 		if (entry->protection & VM_PROT_WRITE)
2203 			kve->kve_protection |= KVME_PROT_WRITE;
2204 		if (entry->protection & VM_PROT_EXECUTE)
2205 			kve->kve_protection |= KVME_PROT_EXEC;
2206 
2207 		if (entry->eflags & MAP_ENTRY_COW)
2208 			kve->kve_flags |= KVME_FLAG_COW;
2209 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2210 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2211 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2212 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2213 
2214 		last_timestamp = map->timestamp;
2215 		vm_map_unlock_read(map);
2216 
2217 		kve->kve_fileid = 0;
2218 		kve->kve_fsid = 0;
2219 		freepath = NULL;
2220 		fullpath = "";
2221 		if (lobj) {
2222 			kve->kve_type = vm_object_kvme_type(lobj, &vp);
2223 			if (kve->kve_type == KVME_TYPE_MGTDEVICE)
2224 				kve->kve_type = KVME_TYPE_UNKNOWN;
2225 			if (vp != NULL)
2226 				vref(vp);
2227 			if (lobj != obj)
2228 				VM_OBJECT_RUNLOCK(lobj);
2229 
2230 			kve->kve_ref_count = obj->ref_count;
2231 			kve->kve_shadow_count = obj->shadow_count;
2232 			VM_OBJECT_RUNLOCK(obj);
2233 			if (vp != NULL) {
2234 				vn_fullpath(curthread, vp, &fullpath,
2235 				    &freepath);
2236 				cred = curthread->td_ucred;
2237 				vn_lock(vp, LK_SHARED | LK_RETRY);
2238 				if (VOP_GETATTR(vp, &va, cred) == 0) {
2239 					kve->kve_fileid = va.va_fileid;
2240 					/* truncate */
2241 					kve->kve_fsid = va.va_fsid;
2242 				}
2243 				vput(vp);
2244 			}
2245 		} else {
2246 			kve->kve_type = KVME_TYPE_NONE;
2247 			kve->kve_ref_count = 0;
2248 			kve->kve_shadow_count = 0;
2249 		}
2250 
2251 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2252 		if (freepath != NULL)
2253 			free(freepath, M_TEMP);
2254 
2255 		error = SYSCTL_OUT(req, kve, sizeof(*kve));
2256 		vm_map_lock_read(map);
2257 		if (error)
2258 			break;
2259 		if (last_timestamp != map->timestamp) {
2260 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2261 			entry = tmp_entry;
2262 		}
2263 	}
2264 	vm_map_unlock_read(map);
2265 	vmspace_free(vm);
2266 	PRELE(p);
2267 	free(kve, M_TEMP);
2268 	return (error);
2269 }
2270 #endif	/* COMPAT_FREEBSD7 */
2271 
2272 #ifdef KINFO_VMENTRY_SIZE
2273 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
2274 #endif
2275 
2276 void
2277 kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
2278     int *resident_count, bool *super)
2279 {
2280 	vm_object_t obj, tobj;
2281 	vm_page_t m, m_adv;
2282 	vm_offset_t addr;
2283 	vm_paddr_t locked_pa;
2284 	vm_pindex_t pi, pi_adv, pindex;
2285 
2286 	*super = false;
2287 	*resident_count = 0;
2288 	if (vmmap_skip_res_cnt)
2289 		return;
2290 
2291 	locked_pa = 0;
2292 	obj = entry->object.vm_object;
2293 	addr = entry->start;
2294 	m_adv = NULL;
2295 	pi = OFF_TO_IDX(entry->offset);
2296 	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
2297 		if (m_adv != NULL) {
2298 			m = m_adv;
2299 		} else {
2300 			pi_adv = atop(entry->end - addr);
2301 			pindex = pi;
2302 			for (tobj = obj;; tobj = tobj->backing_object) {
2303 				m = vm_page_find_least(tobj, pindex);
2304 				if (m != NULL) {
2305 					if (m->pindex == pindex)
2306 						break;
2307 					if (pi_adv > m->pindex - pindex) {
2308 						pi_adv = m->pindex - pindex;
2309 						m_adv = m;
2310 					}
2311 				}
2312 				if (tobj->backing_object == NULL)
2313 					goto next;
2314 				pindex += OFF_TO_IDX(tobj->
2315 				    backing_object_offset);
2316 			}
2317 		}
2318 		m_adv = NULL;
2319 		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
2320 		    (addr & (pagesizes[1] - 1)) == 0 &&
2321 		    (pmap_mincore(map->pmap, addr, &locked_pa) &
2322 		    MINCORE_SUPER) != 0) {
2323 			*super = true;
2324 			pi_adv = atop(pagesizes[1]);
2325 		} else {
2326 			/*
2327 			 * We do not test the found page on validity.
2328 			 * Either the page is busy and being paged in,
2329 			 * or it was invalidated.  The first case
2330 			 * should be counted as resident, the second
2331 			 * is not so clear; we do account both.
2332 			 */
2333 			pi_adv = 1;
2334 		}
2335 		*resident_count += pi_adv;
2336 next:;
2337 	}
2338 	PA_UNLOCK_COND(locked_pa);
2339 }
2340 
2341 /*
2342  * Must be called with the process locked and will return unlocked.
2343  */
2344 int
2345 kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
2346 {
2347 	vm_map_entry_t entry, tmp_entry;
2348 	struct vattr va;
2349 	vm_map_t map;
2350 	vm_object_t obj, tobj, lobj;
2351 	char *fullpath, *freepath;
2352 	struct kinfo_vmentry *kve;
2353 	struct ucred *cred;
2354 	struct vnode *vp;
2355 	struct vmspace *vm;
2356 	vm_offset_t addr;
2357 	unsigned int last_timestamp;
2358 	int error;
2359 	bool super;
2360 
2361 	PROC_LOCK_ASSERT(p, MA_OWNED);
2362 
2363 	_PHOLD(p);
2364 	PROC_UNLOCK(p);
2365 	vm = vmspace_acquire_ref(p);
2366 	if (vm == NULL) {
2367 		PRELE(p);
2368 		return (ESRCH);
2369 	}
2370 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
2371 
2372 	error = 0;
2373 	map = &vm->vm_map;
2374 	vm_map_lock_read(map);
2375 	for (entry = map->header.next; entry != &map->header;
2376 	    entry = entry->next) {
2377 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2378 			continue;
2379 
2380 		addr = entry->end;
2381 		bzero(kve, sizeof(*kve));
2382 		obj = entry->object.vm_object;
2383 		if (obj != NULL) {
2384 			for (tobj = obj; tobj != NULL;
2385 			    tobj = tobj->backing_object) {
2386 				VM_OBJECT_RLOCK(tobj);
2387 				kve->kve_offset += tobj->backing_object_offset;
2388 				lobj = tobj;
2389 			}
2390 			if (obj->backing_object == NULL)
2391 				kve->kve_private_resident =
2392 				    obj->resident_page_count;
2393 			kern_proc_vmmap_resident(map, entry,
2394 			    &kve->kve_resident, &super);
2395 			if (super)
2396 				kve->kve_flags |= KVME_FLAG_SUPER;
2397 			for (tobj = obj; tobj != NULL;
2398 			    tobj = tobj->backing_object) {
2399 				if (tobj != obj && tobj != lobj)
2400 					VM_OBJECT_RUNLOCK(tobj);
2401 			}
2402 		} else {
2403 			lobj = NULL;
2404 		}
2405 
2406 		kve->kve_start = entry->start;
2407 		kve->kve_end = entry->end;
2408 		kve->kve_offset += entry->offset;
2409 
2410 		if (entry->protection & VM_PROT_READ)
2411 			kve->kve_protection |= KVME_PROT_READ;
2412 		if (entry->protection & VM_PROT_WRITE)
2413 			kve->kve_protection |= KVME_PROT_WRITE;
2414 		if (entry->protection & VM_PROT_EXECUTE)
2415 			kve->kve_protection |= KVME_PROT_EXEC;
2416 
2417 		if (entry->eflags & MAP_ENTRY_COW)
2418 			kve->kve_flags |= KVME_FLAG_COW;
2419 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2420 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2421 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2422 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2423 		if (entry->eflags & MAP_ENTRY_GROWS_UP)
2424 			kve->kve_flags |= KVME_FLAG_GROWS_UP;
2425 		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
2426 			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
2427 
2428 		last_timestamp = map->timestamp;
2429 		vm_map_unlock_read(map);
2430 
2431 		freepath = NULL;
2432 		fullpath = "";
2433 		if (lobj != NULL) {
2434 			kve->kve_type = vm_object_kvme_type(lobj, &vp);
2435 			if (vp != NULL)
2436 				vref(vp);
2437 			if (lobj != obj)
2438 				VM_OBJECT_RUNLOCK(lobj);
2439 
2440 			kve->kve_ref_count = obj->ref_count;
2441 			kve->kve_shadow_count = obj->shadow_count;
2442 			VM_OBJECT_RUNLOCK(obj);
2443 			if (vp != NULL) {
2444 				vn_fullpath(curthread, vp, &fullpath,
2445 				    &freepath);
2446 				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
2447 				cred = curthread->td_ucred;
2448 				vn_lock(vp, LK_SHARED | LK_RETRY);
2449 				if (VOP_GETATTR(vp, &va, cred) == 0) {
2450 					kve->kve_vn_fileid = va.va_fileid;
2451 					kve->kve_vn_fsid = va.va_fsid;
2452 					kve->kve_vn_fsid_freebsd11 =
2453 					    kve->kve_vn_fsid; /* truncate */
2454 					kve->kve_vn_mode =
2455 					    MAKEIMODE(va.va_type, va.va_mode);
2456 					kve->kve_vn_size = va.va_size;
2457 					kve->kve_vn_rdev = va.va_rdev;
2458 					kve->kve_vn_rdev_freebsd11 =
2459 					    kve->kve_vn_rdev; /* truncate */
2460 					kve->kve_status = KF_ATTR_VALID;
2461 				}
2462 				vput(vp);
2463 			}
2464 		} else {
2465 			kve->kve_type = KVME_TYPE_NONE;
2466 			kve->kve_ref_count = 0;
2467 			kve->kve_shadow_count = 0;
2468 		}
2469 
2470 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2471 		if (freepath != NULL)
2472 			free(freepath, M_TEMP);
2473 
2474 		/* Pack record size down */
2475 		if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
2476 			kve->kve_structsize =
2477 			    offsetof(struct kinfo_vmentry, kve_path) +
2478 			    strlen(kve->kve_path) + 1;
2479 		else
2480 			kve->kve_structsize = sizeof(*kve);
2481 		kve->kve_structsize = roundup(kve->kve_structsize,
2482 		    sizeof(uint64_t));
2483 
2484 		/* Halt filling and truncate rather than exceeding maxlen */
2485 		if (maxlen != -1 && maxlen < kve->kve_structsize) {
2486 			error = 0;
2487 			vm_map_lock_read(map);
2488 			break;
2489 		} else if (maxlen != -1)
2490 			maxlen -= kve->kve_structsize;
2491 
2492 		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
2493 			error = ENOMEM;
2494 		vm_map_lock_read(map);
2495 		if (error != 0)
2496 			break;
2497 		if (last_timestamp != map->timestamp) {
2498 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2499 			entry = tmp_entry;
2500 		}
2501 	}
2502 	vm_map_unlock_read(map);
2503 	vmspace_free(vm);
2504 	PRELE(p);
2505 	free(kve, M_TEMP);
2506 	return (error);
2507 }
2508 
2509 static int
2510 sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
2511 {
2512 	struct proc *p;
2513 	struct sbuf sb;
2514 	int error, error2, *name;
2515 
2516 	name = (int *)arg1;
2517 	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
2518 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2519 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
2520 	if (error != 0) {
2521 		sbuf_delete(&sb);
2522 		return (error);
2523 	}
2524 	error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
2525 	error2 = sbuf_finish(&sb);
2526 	sbuf_delete(&sb);
2527 	return (error != 0 ? error : error2);
2528 }
2529 
2530 #if defined(STACK) || defined(DDB)
2531 static int
2532 sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
2533 {
2534 	struct kinfo_kstack *kkstp;
2535 	int error, i, *name, numthreads;
2536 	lwpid_t *lwpidarray;
2537 	struct thread *td;
2538 	struct stack *st;
2539 	struct sbuf sb;
2540 	struct proc *p;
2541 
2542 	name = (int *)arg1;
2543 	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
2544 	if (error != 0)
2545 		return (error);
2546 
2547 	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
2548 	st = stack_create(M_WAITOK);
2549 
2550 	lwpidarray = NULL;
2551 	PROC_LOCK(p);
2552 	do {
2553 		if (lwpidarray != NULL) {
2554 			free(lwpidarray, M_TEMP);
2555 			lwpidarray = NULL;
2556 		}
2557 		numthreads = p->p_numthreads;
2558 		PROC_UNLOCK(p);
2559 		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
2560 		    M_WAITOK | M_ZERO);
2561 		PROC_LOCK(p);
2562 	} while (numthreads < p->p_numthreads);
2563 
2564 	/*
2565 	 * XXXRW: During the below loop, execve(2) and countless other sorts
2566 	 * of changes could have taken place.  Should we check to see if the
2567 	 * vmspace has been replaced, or the like, in order to prevent
2568 	 * giving a snapshot that spans, say, execve(2), with some threads
2569 	 * before and some after?  Among other things, the credentials could
2570 	 * have changed, in which case the right to extract debug info might
2571 	 * no longer be assured.
2572 	 */
2573 	i = 0;
2574 	FOREACH_THREAD_IN_PROC(p, td) {
2575 		KASSERT(i < numthreads,
2576 		    ("sysctl_kern_proc_kstack: numthreads"));
2577 		lwpidarray[i] = td->td_tid;
2578 		i++;
2579 	}
2580 	numthreads = i;
2581 	for (i = 0; i < numthreads; i++) {
2582 		td = thread_find(p, lwpidarray[i]);
2583 		if (td == NULL) {
2584 			continue;
2585 		}
2586 		bzero(kkstp, sizeof(*kkstp));
2587 		(void)sbuf_new(&sb, kkstp->kkst_trace,
2588 		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
2589 		thread_lock(td);
2590 		kkstp->kkst_tid = td->td_tid;
2591 		if (TD_IS_SWAPPED(td)) {
2592 			kkstp->kkst_state = KKST_STATE_SWAPPED;
2593 		} else if (TD_IS_RUNNING(td)) {
2594 			if (stack_save_td_running(st, td) == 0)
2595 				kkstp->kkst_state = KKST_STATE_STACKOK;
2596 			else
2597 				kkstp->kkst_state = KKST_STATE_RUNNING;
2598 		} else {
2599 			kkstp->kkst_state = KKST_STATE_STACKOK;
2600 			stack_save_td(st, td);
2601 		}
2602 		thread_unlock(td);
2603 		PROC_UNLOCK(p);
2604 		stack_sbuf_print(&sb, st);
2605 		sbuf_finish(&sb);
2606 		sbuf_delete(&sb);
2607 		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
2608 		PROC_LOCK(p);
2609 		if (error)
2610 			break;
2611 	}
2612 	_PRELE(p);
2613 	PROC_UNLOCK(p);
2614 	if (lwpidarray != NULL)
2615 		free(lwpidarray, M_TEMP);
2616 	stack_destroy(st);
2617 	free(kkstp, M_TEMP);
2618 	return (error);
2619 }
2620 #endif
2621 
2622 /*
2623  * This sysctl allows a process to retrieve the full list of groups from
2624  * itself or another process.
2625  */
2626 static int
2627 sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
2628 {
2629 	pid_t *pidp = (pid_t *)arg1;
2630 	unsigned int arglen = arg2;
2631 	struct proc *p;
2632 	struct ucred *cred;
2633 	int error;
2634 
2635 	if (arglen != 1)
2636 		return (EINVAL);
2637 	if (*pidp == -1) {	/* -1 means this process */
2638 		p = req->td->td_proc;
2639 		PROC_LOCK(p);
2640 	} else {
2641 		error = pget(*pidp, PGET_CANSEE, &p);
2642 		if (error != 0)
2643 			return (error);
2644 	}
2645 
2646 	cred = crhold(p->p_ucred);
2647 	PROC_UNLOCK(p);
2648 
2649 	error = SYSCTL_OUT(req, cred->cr_groups,
2650 	    cred->cr_ngroups * sizeof(gid_t));
2651 	crfree(cred);
2652 	return (error);
2653 }
2654 
2655 /*
2656  * This sysctl allows a process to retrieve or/and set the resource limit for
2657  * another process.
2658  */
2659 static int
2660 sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
2661 {
2662 	int *name = (int *)arg1;
2663 	u_int namelen = arg2;
2664 	struct rlimit rlim;
2665 	struct proc *p;
2666 	u_int which;
2667 	int flags, error;
2668 
2669 	if (namelen != 2)
2670 		return (EINVAL);
2671 
2672 	which = (u_int)name[1];
2673 	if (which >= RLIM_NLIMITS)
2674 		return (EINVAL);
2675 
2676 	if (req->newptr != NULL && req->newlen != sizeof(rlim))
2677 		return (EINVAL);
2678 
2679 	flags = PGET_HOLD | PGET_NOTWEXIT;
2680 	if (req->newptr != NULL)
2681 		flags |= PGET_CANDEBUG;
2682 	else
2683 		flags |= PGET_CANSEE;
2684 	error = pget((pid_t)name[0], flags, &p);
2685 	if (error != 0)
2686 		return (error);
2687 
2688 	/*
2689 	 * Retrieve limit.
2690 	 */
2691 	if (req->oldptr != NULL) {
2692 		PROC_LOCK(p);
2693 		lim_rlimit_proc(p, which, &rlim);
2694 		PROC_UNLOCK(p);
2695 	}
2696 	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
2697 	if (error != 0)
2698 		goto errout;
2699 
2700 	/*
2701 	 * Set limit.
2702 	 */
2703 	if (req->newptr != NULL) {
2704 		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
2705 		if (error == 0)
2706 			error = kern_proc_setrlimit(curthread, p, which, &rlim);
2707 	}
2708 
2709 errout:
2710 	PRELE(p);
2711 	return (error);
2712 }
2713 
2714 /*
2715  * This sysctl allows a process to retrieve ps_strings structure location of
2716  * another process.
2717  */
2718 static int
2719 sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
2720 {
2721 	int *name = (int *)arg1;
2722 	u_int namelen = arg2;
2723 	struct proc *p;
2724 	vm_offset_t ps_strings;
2725 	int error;
2726 #ifdef COMPAT_FREEBSD32
2727 	uint32_t ps_strings32;
2728 #endif
2729 
2730 	if (namelen != 1)
2731 		return (EINVAL);
2732 
2733 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2734 	if (error != 0)
2735 		return (error);
2736 #ifdef COMPAT_FREEBSD32
2737 	if ((req->flags & SCTL_MASK32) != 0) {
2738 		/*
2739 		 * We return 0 if the 32 bit emulation request is for a 64 bit
2740 		 * process.
2741 		 */
2742 		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
2743 		    PTROUT(p->p_sysent->sv_psstrings) : 0;
2744 		PROC_UNLOCK(p);
2745 		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
2746 		return (error);
2747 	}
2748 #endif
2749 	ps_strings = p->p_sysent->sv_psstrings;
2750 	PROC_UNLOCK(p);
2751 	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
2752 	return (error);
2753 }
2754 
2755 /*
2756  * This sysctl allows a process to retrieve umask of another process.
2757  */
2758 static int
2759 sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
2760 {
2761 	int *name = (int *)arg1;
2762 	u_int namelen = arg2;
2763 	struct proc *p;
2764 	int error;
2765 	u_short fd_cmask;
2766 	pid_t pid;
2767 
2768 	if (namelen != 1)
2769 		return (EINVAL);
2770 
2771 	pid = (pid_t)name[0];
2772 	p = curproc;
2773 	if (pid == p->p_pid || pid == 0) {
2774 		fd_cmask = p->p_fd->fd_cmask;
2775 		goto out;
2776 	}
2777 
2778 	error = pget(pid, PGET_WANTREAD, &p);
2779 	if (error != 0)
2780 		return (error);
2781 
2782 	fd_cmask = p->p_fd->fd_cmask;
2783 	PRELE(p);
2784 out:
2785 	error = SYSCTL_OUT(req, &fd_cmask, sizeof(fd_cmask));
2786 	return (error);
2787 }
2788 
2789 /*
2790  * This sysctl allows a process to set and retrieve binary osreldate of
2791  * another process.
2792  */
2793 static int
2794 sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
2795 {
2796 	int *name = (int *)arg1;
2797 	u_int namelen = arg2;
2798 	struct proc *p;
2799 	int flags, error, osrel;
2800 
2801 	if (namelen != 1)
2802 		return (EINVAL);
2803 
2804 	if (req->newptr != NULL && req->newlen != sizeof(osrel))
2805 		return (EINVAL);
2806 
2807 	flags = PGET_HOLD | PGET_NOTWEXIT;
2808 	if (req->newptr != NULL)
2809 		flags |= PGET_CANDEBUG;
2810 	else
2811 		flags |= PGET_CANSEE;
2812 	error = pget((pid_t)name[0], flags, &p);
2813 	if (error != 0)
2814 		return (error);
2815 
2816 	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
2817 	if (error != 0)
2818 		goto errout;
2819 
2820 	if (req->newptr != NULL) {
2821 		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
2822 		if (error != 0)
2823 			goto errout;
2824 		if (osrel < 0) {
2825 			error = EINVAL;
2826 			goto errout;
2827 		}
2828 		p->p_osrel = osrel;
2829 	}
2830 errout:
2831 	PRELE(p);
2832 	return (error);
2833 }
2834 
2835 static int
2836 sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
2837 {
2838 	int *name = (int *)arg1;
2839 	u_int namelen = arg2;
2840 	struct proc *p;
2841 	struct kinfo_sigtramp kst;
2842 	const struct sysentvec *sv;
2843 	int error;
2844 #ifdef COMPAT_FREEBSD32
2845 	struct kinfo_sigtramp32 kst32;
2846 #endif
2847 
2848 	if (namelen != 1)
2849 		return (EINVAL);
2850 
2851 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2852 	if (error != 0)
2853 		return (error);
2854 	sv = p->p_sysent;
2855 #ifdef COMPAT_FREEBSD32
2856 	if ((req->flags & SCTL_MASK32) != 0) {
2857 		bzero(&kst32, sizeof(kst32));
2858 		if (SV_PROC_FLAG(p, SV_ILP32)) {
2859 			if (sv->sv_sigcode_base != 0) {
2860 				kst32.ksigtramp_start = sv->sv_sigcode_base;
2861 				kst32.ksigtramp_end = sv->sv_sigcode_base +
2862 				    *sv->sv_szsigcode;
2863 			} else {
2864 				kst32.ksigtramp_start = sv->sv_psstrings -
2865 				    *sv->sv_szsigcode;
2866 				kst32.ksigtramp_end = sv->sv_psstrings;
2867 			}
2868 		}
2869 		PROC_UNLOCK(p);
2870 		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
2871 		return (error);
2872 	}
2873 #endif
2874 	bzero(&kst, sizeof(kst));
2875 	if (sv->sv_sigcode_base != 0) {
2876 		kst.ksigtramp_start = (char *)sv->sv_sigcode_base;
2877 		kst.ksigtramp_end = (char *)sv->sv_sigcode_base +
2878 		    *sv->sv_szsigcode;
2879 	} else {
2880 		kst.ksigtramp_start = (char *)sv->sv_psstrings -
2881 		    *sv->sv_szsigcode;
2882 		kst.ksigtramp_end = (char *)sv->sv_psstrings;
2883 	}
2884 	PROC_UNLOCK(p);
2885 	error = SYSCTL_OUT(req, &kst, sizeof(kst));
2886 	return (error);
2887 }
2888 
2889 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
2890 
2891 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
2892 	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
2893 	"Return entire process table");
2894 
2895 static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2896 	sysctl_kern_proc, "Process table");
2897 
2898 static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
2899 	sysctl_kern_proc, "Process table");
2900 
2901 static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2902 	sysctl_kern_proc, "Process table");
2903 
2904 static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
2905 	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2906 
2907 static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
2908 	sysctl_kern_proc, "Process table");
2909 
2910 static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2911 	sysctl_kern_proc, "Process table");
2912 
2913 static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2914 	sysctl_kern_proc, "Process table");
2915 
2916 static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2917 	sysctl_kern_proc, "Process table");
2918 
2919 static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
2920 	sysctl_kern_proc, "Return process table, no threads");
2921 
2922 static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
2923 	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
2924 	sysctl_kern_proc_args, "Process argument list");
2925 
2926 static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
2927 	sysctl_kern_proc_env, "Process environment");
2928 
2929 static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
2930 	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
2931 
2932 static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
2933 	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
2934 
2935 static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
2936 	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
2937 	"Process syscall vector name (ABI type)");
2938 
2939 static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
2940 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2941 
2942 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
2943 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2944 
2945 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
2946 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2947 
2948 static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
2949 	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2950 
2951 static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
2952 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2953 
2954 static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
2955 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2956 
2957 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
2958 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2959 
2960 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
2961 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2962 
2963 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
2964 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
2965 	"Return process table, no threads");
2966 
2967 #ifdef COMPAT_FREEBSD7
2968 static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
2969 	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
2970 #endif
2971 
2972 static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
2973 	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
2974 
2975 #if defined(STACK) || defined(DDB)
2976 static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
2977 	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
2978 #endif
2979 
2980 static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
2981 	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
2982 
2983 static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
2984 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
2985 	"Process resource limits");
2986 
2987 static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
2988 	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
2989 	"Process ps_strings location");
2990 
2991 static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
2992 	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
2993 
2994 static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
2995 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
2996 	"Process binary osreldate");
2997 
2998 static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
2999 	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
3000 	"Process signal trampoline location");
3001 
3002 int allproc_gen;
3003 
3004 /*
3005  * stop_all_proc() purpose is to stop all process which have usermode,
3006  * except current process for obvious reasons.  This makes it somewhat
3007  * unreliable when invoked from multithreaded process.  The service
3008  * must not be user-callable anyway.
3009  */
3010 void
3011 stop_all_proc(void)
3012 {
3013 	struct proc *cp, *p;
3014 	int r, gen;
3015 	bool restart, seen_stopped, seen_exiting, stopped_some;
3016 
3017 	cp = curproc;
3018 allproc_loop:
3019 	sx_xlock(&allproc_lock);
3020 	gen = allproc_gen;
3021 	seen_exiting = seen_stopped = stopped_some = restart = false;
3022 	LIST_REMOVE(cp, p_list);
3023 	LIST_INSERT_HEAD(&allproc, cp, p_list);
3024 	for (;;) {
3025 		p = LIST_NEXT(cp, p_list);
3026 		if (p == NULL)
3027 			break;
3028 		LIST_REMOVE(cp, p_list);
3029 		LIST_INSERT_AFTER(p, cp, p_list);
3030 		PROC_LOCK(p);
3031 		if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) {
3032 			PROC_UNLOCK(p);
3033 			continue;
3034 		}
3035 		if ((p->p_flag & P_WEXIT) != 0) {
3036 			seen_exiting = true;
3037 			PROC_UNLOCK(p);
3038 			continue;
3039 		}
3040 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
3041 			/*
3042 			 * Stopped processes are tolerated when there
3043 			 * are no other processes which might continue
3044 			 * them.  P_STOPPED_SINGLE but not
3045 			 * P_TOTAL_STOP process still has at least one
3046 			 * thread running.
3047 			 */
3048 			seen_stopped = true;
3049 			PROC_UNLOCK(p);
3050 			continue;
3051 		}
3052 		_PHOLD(p);
3053 		sx_xunlock(&allproc_lock);
3054 		r = thread_single(p, SINGLE_ALLPROC);
3055 		if (r != 0)
3056 			restart = true;
3057 		else
3058 			stopped_some = true;
3059 		_PRELE(p);
3060 		PROC_UNLOCK(p);
3061 		sx_xlock(&allproc_lock);
3062 	}
3063 	/* Catch forked children we did not see in iteration. */
3064 	if (gen != allproc_gen)
3065 		restart = true;
3066 	sx_xunlock(&allproc_lock);
3067 	if (restart || stopped_some || seen_exiting || seen_stopped) {
3068 		kern_yield(PRI_USER);
3069 		goto allproc_loop;
3070 	}
3071 }
3072 
3073 void
3074 resume_all_proc(void)
3075 {
3076 	struct proc *cp, *p;
3077 
3078 	cp = curproc;
3079 	sx_xlock(&allproc_lock);
3080 again:
3081 	LIST_REMOVE(cp, p_list);
3082 	LIST_INSERT_HEAD(&allproc, cp, p_list);
3083 	for (;;) {
3084 		p = LIST_NEXT(cp, p_list);
3085 		if (p == NULL)
3086 			break;
3087 		LIST_REMOVE(cp, p_list);
3088 		LIST_INSERT_AFTER(p, cp, p_list);
3089 		PROC_LOCK(p);
3090 		if ((p->p_flag & P_TOTAL_STOP) != 0) {
3091 			sx_xunlock(&allproc_lock);
3092 			_PHOLD(p);
3093 			thread_single_end(p, SINGLE_ALLPROC);
3094 			_PRELE(p);
3095 			PROC_UNLOCK(p);
3096 			sx_xlock(&allproc_lock);
3097 		} else {
3098 			PROC_UNLOCK(p);
3099 		}
3100 	}
3101 	/*  Did the loop above missed any stopped process ? */
3102 	FOREACH_PROC_IN_SYSTEM(p) {
3103 		/* No need for proc lock. */
3104 		if ((p->p_flag & P_TOTAL_STOP) != 0)
3105 			goto again;
3106 	}
3107 	sx_xunlock(&allproc_lock);
3108 }
3109 
3110 /* #define	TOTAL_STOP_DEBUG	1 */
3111 #ifdef TOTAL_STOP_DEBUG
3112 volatile static int ap_resume;
3113 #include <sys/mount.h>
3114 
3115 static int
3116 sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
3117 {
3118 	int error, val;
3119 
3120 	val = 0;
3121 	ap_resume = 0;
3122 	error = sysctl_handle_int(oidp, &val, 0, req);
3123 	if (error != 0 || req->newptr == NULL)
3124 		return (error);
3125 	if (val != 0) {
3126 		stop_all_proc();
3127 		syncer_suspend();
3128 		while (ap_resume == 0)
3129 			;
3130 		syncer_resume();
3131 		resume_all_proc();
3132 	}
3133 	return (0);
3134 }
3135 
3136 SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
3137     CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
3138     sysctl_debug_stop_all_proc, "I",
3139     "");
3140 #endif
3141