xref: /illumos-gate/usr/src/uts/common/fs/proc/prsubr.c (revision 82b7b979be13234985096762083f39b829dbd03f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2019 Joyent, Inc.
25  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26  * Copyright 2022 MNX Cloud, Inc.
27  * Copyright 2025 Oxide Computer Company
28  */
29 
30 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
31 /*	  All Rights Reserved	*/
32 
33 #include <sys/types.h>
34 #include <sys/t_lock.h>
35 #include <sys/param.h>
36 #include <sys/cmn_err.h>
37 #include <sys/cred.h>
38 #include <sys/priv.h>
39 #include <sys/debug.h>
40 #include <sys/errno.h>
41 #include <sys/inline.h>
42 #include <sys/kmem.h>
43 #include <sys/mman.h>
44 #include <sys/proc.h>
45 #include <sys/brand.h>
46 #include <sys/sobject.h>
47 #include <sys/sysmacros.h>
48 #include <sys/systm.h>
49 #include <sys/uio.h>
50 #include <sys/var.h>
51 #include <sys/vfs.h>
52 #include <sys/vnode.h>
53 #include <sys/session.h>
54 #include <sys/pcb.h>
55 #include <sys/signal.h>
56 #include <sys/user.h>
57 #include <sys/disp.h>
58 #include <sys/class.h>
59 #include <sys/ts.h>
60 #include <sys/bitmap.h>
61 #include <sys/poll.h>
62 #include <sys/shm_impl.h>
63 #include <sys/fault.h>
64 #include <sys/syscall.h>
65 #include <sys/procfs.h>
66 #include <sys/processor.h>
67 #include <sys/cpuvar.h>
68 #include <sys/copyops.h>
69 #include <sys/time.h>
70 #include <sys/msacct.h>
71 #include <sys/flock_impl.h>
72 #include <sys/stropts.h>
73 #include <sys/strsubr.h>
74 #include <sys/pathname.h>
75 #include <sys/mode.h>
76 #include <sys/socketvar.h>
77 #include <sys/autoconf.h>
78 #include <sys/dtrace.h>
79 #include <sys/timod.h>
80 #include <sys/fs/namenode.h>
81 #include <netinet/udp.h>
82 #include <netinet/tcp.h>
83 #include <inet/cc.h>
84 #include <vm/as.h>
85 #include <vm/rm.h>
86 #include <vm/seg.h>
87 #include <vm/seg_vn.h>
88 #include <vm/seg_dev.h>
89 #include <vm/seg_spt.h>
90 #include <vm/page.h>
91 #include <sys/vmparam.h>
92 #include <sys/swap.h>
93 #include <fs/proc/prdata.h>
94 #include <sys/task.h>
95 #include <sys/project.h>
96 #include <sys/contract_impl.h>
97 #include <sys/contract/process.h>
98 #include <sys/contract/process_impl.h>
99 #include <sys/schedctl.h>
100 #include <sys/pool.h>
101 #include <sys/zone.h>
102 #include <sys/atomic.h>
103 #include <sys/sdt.h>
104 
105 #define	MAX_ITERS_SPIN	5
106 
107 typedef struct prpagev {
108 	uint_t *pg_protv;	/* vector of page permissions */
109 	char *pg_incore;	/* vector of incore flags */
110 	size_t pg_npages;	/* number of pages in protv and incore */
111 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
112 } prpagev_t;
113 
114 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
115 
116 extern struct seg_ops segdev_ops;	/* needs a header file */
117 extern struct seg_ops segspt_shmops;	/* needs a header file */
118 
119 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
120 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
121 
122 /*
123  * Choose an lwp from the complete set of lwps for the process.
124  * This is called for any operation applied to the process
125  * file descriptor that requires an lwp to operate upon.
126  *
127  * Returns a pointer to the thread for the selected LWP,
128  * and with the dispatcher lock held for the thread.
129  *
130  * The algorithm for choosing an lwp is critical for /proc semantics;
131  * don't touch this code unless you know all of the implications.
132  */
133 kthread_t *
prchoose(proc_t * p)134 prchoose(proc_t *p)
135 {
136 	kthread_t *t;
137 	kthread_t *t_onproc = NULL;	/* running on processor */
138 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
139 	kthread_t *t_sleep = NULL;	/* sleeping */
140 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
141 	kthread_t *t_susp = NULL;	/* suspended stop */
142 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
143 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
144 	kthread_t *t_req = NULL;	/* requested stop */
145 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
146 	kthread_t *t_dtrace = NULL;	/* DTrace stop */
147 
148 	ASSERT(MUTEX_HELD(&p->p_lock));
149 
150 	/*
151 	 * If the agent lwp exists, it takes precedence over all others.
152 	 */
153 	if ((t = p->p_agenttp) != NULL) {
154 		thread_lock(t);
155 		return (t);
156 	}
157 
158 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
159 		return (t);
160 	do {		/* for eacn lwp in the process */
161 		if (VSTOPPED(t)) {	/* virtually stopped */
162 			if (t_req == NULL)
163 				t_req = t;
164 			continue;
165 		}
166 
167 		/* If this is a process kernel thread, ignore it. */
168 		if ((t->t_proc_flag & TP_KTHREAD) != 0) {
169 			continue;
170 		}
171 
172 		thread_lock(t);		/* make sure thread is in good state */
173 		switch (t->t_state) {
174 		default:
175 			panic("prchoose: bad thread state %d, thread 0x%p",
176 			    t->t_state, (void *)t);
177 			/*NOTREACHED*/
178 		case TS_SLEEP:
179 			/* this is filthy */
180 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
181 			    t->t_wchan0 == NULL) {
182 				if (t_hold == NULL)
183 					t_hold = t;
184 			} else {
185 				if (t_sleep == NULL)
186 					t_sleep = t;
187 			}
188 			break;
189 		case TS_RUN:
190 		case TS_WAIT:
191 			if (t_run == NULL)
192 				t_run = t;
193 			break;
194 		case TS_ONPROC:
195 			if (t_onproc == NULL)
196 				t_onproc = t;
197 			break;
198 		case TS_ZOMB:		/* last possible choice */
199 			break;
200 		case TS_STOPPED:
201 			switch (t->t_whystop) {
202 			case PR_SUSPENDED:
203 				if (t_susp == NULL)
204 					t_susp = t;
205 				break;
206 			case PR_JOBCONTROL:
207 				if (t->t_proc_flag & TP_PRSTOP) {
208 					if (t_jdstop == NULL)
209 						t_jdstop = t;
210 				} else {
211 					if (t_jstop == NULL)
212 						t_jstop = t;
213 				}
214 				break;
215 			case PR_REQUESTED:
216 				if (t->t_dtrace_stop && t_dtrace == NULL)
217 					t_dtrace = t;
218 				else if (t_req == NULL)
219 					t_req = t;
220 				break;
221 			case PR_SYSENTRY:
222 			case PR_SYSEXIT:
223 			case PR_SIGNALLED:
224 			case PR_FAULTED:
225 				/*
226 				 * Make an lwp calling exit() be the
227 				 * last lwp seen in the process.
228 				 */
229 				if (t_istop == NULL ||
230 				    (t_istop->t_whystop == PR_SYSENTRY &&
231 				    t_istop->t_whatstop == SYS_exit))
232 					t_istop = t;
233 				break;
234 			case PR_CHECKPOINT:	/* can't happen? */
235 				break;
236 			default:
237 				panic("prchoose: bad t_whystop %d, thread 0x%p",
238 				    t->t_whystop, (void *)t);
239 				/*NOTREACHED*/
240 			}
241 			break;
242 		}
243 		thread_unlock(t);
244 	} while ((t = t->t_forw) != p->p_tlist);
245 
246 	if (t_onproc)
247 		t = t_onproc;
248 	else if (t_run)
249 		t = t_run;
250 	else if (t_sleep)
251 		t = t_sleep;
252 	else if (t_jstop)
253 		t = t_jstop;
254 	else if (t_jdstop)
255 		t = t_jdstop;
256 	else if (t_istop)
257 		t = t_istop;
258 	else if (t_dtrace)
259 		t = t_dtrace;
260 	else if (t_req)
261 		t = t_req;
262 	else if (t_hold)
263 		t = t_hold;
264 	else if (t_susp)
265 		t = t_susp;
266 	else			/* TS_ZOMB */
267 		t = p->p_tlist;
268 
269 	if (t != NULL)
270 		thread_lock(t);
271 	return (t);
272 }
273 
274 /*
275  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
276  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
277  * on the /proc file descriptor.  Called from stop() when a traced
278  * process stops on an event of interest.  Also called from exit()
279  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
280  */
281 void
prnotify(struct vnode * vp)282 prnotify(struct vnode *vp)
283 {
284 	prcommon_t *pcp = VTOP(vp)->pr_common;
285 
286 	mutex_enter(&pcp->prc_mutex);
287 	cv_broadcast(&pcp->prc_wait);
288 	mutex_exit(&pcp->prc_mutex);
289 	if (pcp->prc_flags & PRC_POLL) {
290 		/*
291 		 * We call pollwakeup() with POLLHUP to ensure that
292 		 * the pollers are awakened even if they are polling
293 		 * for nothing (i.e., waiting for the process to exit).
294 		 * This enables the use of the PRC_POLL flag for optimization
295 		 * (we can turn off PRC_POLL only if we know no pollers remain).
296 		 */
297 		pcp->prc_flags &= ~PRC_POLL;
298 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
299 	}
300 }
301 
302 /* called immediately below, in prfree() */
303 static void
prfreenotify(vnode_t * vp)304 prfreenotify(vnode_t *vp)
305 {
306 	prnode_t *pnp;
307 	prcommon_t *pcp;
308 
309 	while (vp != NULL) {
310 		pnp = VTOP(vp);
311 		pcp = pnp->pr_common;
312 		ASSERT(pcp->prc_thread == NULL);
313 		pcp->prc_proc = NULL;
314 		/*
315 		 * We can't call prnotify() here because we are holding
316 		 * pidlock.  We assert that there is no need to.
317 		 */
318 		mutex_enter(&pcp->prc_mutex);
319 		cv_broadcast(&pcp->prc_wait);
320 		mutex_exit(&pcp->prc_mutex);
321 		ASSERT(!(pcp->prc_flags & PRC_POLL));
322 
323 		vp = pnp->pr_next;
324 		pnp->pr_next = NULL;
325 	}
326 }
327 
328 /*
329  * Called from a hook in freeproc() when a traced process is removed
330  * from the process table.  The proc-table pointers of all associated
331  * /proc vnodes are cleared to indicate that the process has gone away.
332  */
333 void
prfree(proc_t * p)334 prfree(proc_t *p)
335 {
336 	uint_t slot = p->p_slot;
337 
338 	ASSERT(MUTEX_HELD(&pidlock));
339 
340 	/*
341 	 * Block the process against /proc so it can be freed.
342 	 * It cannot be freed while locked by some controlling process.
343 	 * Lock ordering:
344 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
345 	 */
346 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
347 	mutex_enter(&p->p_lock);
348 	while (p->p_proc_flag & P_PR_LOCK) {
349 		mutex_exit(&pr_pidlock);
350 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
351 		mutex_exit(&p->p_lock);
352 		mutex_enter(&pr_pidlock);
353 		mutex_enter(&p->p_lock);
354 	}
355 
356 	ASSERT(p->p_tlist == NULL);
357 
358 	prfreenotify(p->p_plist);
359 	p->p_plist = NULL;
360 
361 	prfreenotify(p->p_trace);
362 	p->p_trace = NULL;
363 
364 	/*
365 	 * We broadcast to wake up everyone waiting for this process.
366 	 * No one can reach this process from this point on.
367 	 */
368 	cv_broadcast(&pr_pid_cv[slot]);
369 
370 	mutex_exit(&p->p_lock);
371 	mutex_exit(&pr_pidlock);
372 }
373 
374 /*
375  * Called from a hook in exit() when a traced process is becoming a zombie.
376  */
377 void
prexit(proc_t * p)378 prexit(proc_t *p)
379 {
380 	ASSERT(MUTEX_HELD(&p->p_lock));
381 
382 	if (pr_watch_active(p)) {
383 		pr_free_watchpoints(p);
384 		watch_disable(curthread);
385 	}
386 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
387 	if (p->p_trace) {
388 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
389 		prnotify(p->p_trace);
390 	}
391 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
392 }
393 
394 /*
395  * Called when a thread calls lwp_exit().
396  */
397 void
prlwpexit(kthread_t * t)398 prlwpexit(kthread_t *t)
399 {
400 	vnode_t *vp;
401 	prnode_t *pnp;
402 	prcommon_t *pcp;
403 	proc_t *p = ttoproc(t);
404 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
405 
406 	ASSERT(t == curthread);
407 	ASSERT(MUTEX_HELD(&p->p_lock));
408 
409 	/*
410 	 * The process must be blocked against /proc to do this safely.
411 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
412 	 * It is the caller's responsibility to have called prbarrier(p).
413 	 */
414 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
415 
416 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
417 		pnp = VTOP(vp);
418 		pcp = pnp->pr_common;
419 		if (pcp->prc_thread == t) {
420 			pcp->prc_thread = NULL;
421 			pcp->prc_flags |= PRC_DESTROY;
422 		}
423 	}
424 
425 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
426 		pnp = VTOP(vp);
427 		pcp = pnp->pr_common;
428 		pcp->prc_thread = NULL;
429 		pcp->prc_flags |= PRC_DESTROY;
430 		prnotify(vp);
431 	}
432 
433 	if (p->p_trace)
434 		prnotify(p->p_trace);
435 }
436 
437 /*
438  * Called when a zombie thread is joined or when a
439  * detached lwp exits.  Called from lwp_hash_out().
440  */
441 void
prlwpfree(proc_t * p,lwpent_t * lep)442 prlwpfree(proc_t *p, lwpent_t *lep)
443 {
444 	vnode_t *vp;
445 	prnode_t *pnp;
446 	prcommon_t *pcp;
447 
448 	ASSERT(MUTEX_HELD(&p->p_lock));
449 
450 	/*
451 	 * The process must be blocked against /proc to do this safely.
452 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
453 	 * It is the caller's responsibility to have called prbarrier(p).
454 	 */
455 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
456 
457 	vp = lep->le_trace;
458 	lep->le_trace = NULL;
459 	while (vp) {
460 		prnotify(vp);
461 		pnp = VTOP(vp);
462 		pcp = pnp->pr_common;
463 		ASSERT(pcp->prc_thread == NULL &&
464 		    (pcp->prc_flags & PRC_DESTROY));
465 		pcp->prc_tslot = -1;
466 		vp = pnp->pr_next;
467 		pnp->pr_next = NULL;
468 	}
469 
470 	if (p->p_trace)
471 		prnotify(p->p_trace);
472 }
473 
474 /*
475  * Called from a hook in exec() when a thread starts exec().
476  */
477 void
prexecstart(void)478 prexecstart(void)
479 {
480 	proc_t *p = ttoproc(curthread);
481 	klwp_t *lwp = ttolwp(curthread);
482 
483 	/*
484 	 * The P_PR_EXEC flag blocks /proc operations for
485 	 * the duration of the exec().
486 	 * We can't start exec() while the process is
487 	 * locked by /proc, so we call prbarrier().
488 	 * lwp_nostop keeps the process from being stopped
489 	 * via job control for the duration of the exec().
490 	 */
491 
492 	ASSERT(MUTEX_HELD(&p->p_lock));
493 	prbarrier(p);
494 	lwp->lwp_nostop++;
495 	p->p_proc_flag |= P_PR_EXEC;
496 }
497 
498 /*
499  * Called from a hook in exec() when a thread finishes exec().
500  * The thread may or may not have succeeded.  Some other thread
501  * may have beat it to the punch.
502  */
503 void
prexecend(void)504 prexecend(void)
505 {
506 	proc_t *p = ttoproc(curthread);
507 	klwp_t *lwp = ttolwp(curthread);
508 	vnode_t *vp;
509 	prnode_t *pnp;
510 	prcommon_t *pcp;
511 	model_t model = p->p_model;
512 	id_t tid = curthread->t_tid;
513 	int tslot = curthread->t_dslot;
514 
515 	ASSERT(MUTEX_HELD(&p->p_lock));
516 
517 	lwp->lwp_nostop--;
518 	if (p->p_flag & SEXITLWPS) {
519 		/*
520 		 * We are on our way to exiting because some
521 		 * other thread beat us in the race to exec().
522 		 * Don't clear the P_PR_EXEC flag in this case.
523 		 */
524 		return;
525 	}
526 
527 	/*
528 	 * Wake up anyone waiting in /proc for the process to complete exec().
529 	 */
530 	p->p_proc_flag &= ~P_PR_EXEC;
531 	if ((vp = p->p_trace) != NULL) {
532 		pcp = VTOP(vp)->pr_common;
533 		mutex_enter(&pcp->prc_mutex);
534 		cv_broadcast(&pcp->prc_wait);
535 		mutex_exit(&pcp->prc_mutex);
536 		for (; vp != NULL; vp = pnp->pr_next) {
537 			pnp = VTOP(vp);
538 			pnp->pr_common->prc_datamodel = model;
539 		}
540 	}
541 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
542 		/*
543 		 * We dealt with the process common above.
544 		 */
545 		ASSERT(p->p_trace != NULL);
546 		pcp = VTOP(vp)->pr_common;
547 		mutex_enter(&pcp->prc_mutex);
548 		cv_broadcast(&pcp->prc_wait);
549 		mutex_exit(&pcp->prc_mutex);
550 		for (; vp != NULL; vp = pnp->pr_next) {
551 			pnp = VTOP(vp);
552 			pcp = pnp->pr_common;
553 			pcp->prc_datamodel = model;
554 			pcp->prc_tid = tid;
555 			pcp->prc_tslot = tslot;
556 		}
557 	}
558 }
559 
560 /*
561  * Called from a hook in relvm() just before freeing the address space.
562  * We free all the watched areas now.
563  */
564 void
prrelvm(void)565 prrelvm(void)
566 {
567 	proc_t *p = ttoproc(curthread);
568 
569 	mutex_enter(&p->p_lock);
570 	prbarrier(p);	/* block all other /proc operations */
571 	if (pr_watch_active(p)) {
572 		pr_free_watchpoints(p);
573 		watch_disable(curthread);
574 	}
575 	mutex_exit(&p->p_lock);
576 	pr_free_watched_pages(p);
577 }
578 
579 /*
580  * Called from hooks in exec-related code when a traced process
581  * attempts to exec(2) a setuid/setgid program or an unreadable
582  * file.  Rather than fail the exec we invalidate the associated
583  * /proc vnodes so that subsequent attempts to use them will fail.
584  *
585  * All /proc vnodes, except directory vnodes, are retained on a linked
586  * list (rooted at p_plist in the process structure) until last close.
587  *
588  * A controlling process must re-open the /proc files in order to
589  * regain control.
590  */
591 void
prinvalidate(struct user * up)592 prinvalidate(struct user *up)
593 {
594 	kthread_t *t = curthread;
595 	proc_t *p = ttoproc(t);
596 	vnode_t *vp;
597 	prnode_t *pnp;
598 	int writers = 0;
599 
600 	mutex_enter(&p->p_lock);
601 	prbarrier(p);	/* block all other /proc operations */
602 
603 	/*
604 	 * At this moment, there can be only one lwp in the process.
605 	 */
606 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
607 
608 	/*
609 	 * Invalidate any currently active /proc vnodes.
610 	 */
611 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
612 		pnp = VTOP(vp);
613 		switch (pnp->pr_type) {
614 		case PR_PSINFO:		/* these files can read by anyone */
615 		case PR_LPSINFO:
616 		case PR_LWPSINFO:
617 		case PR_LWPDIR:
618 		case PR_LWPIDDIR:
619 		case PR_USAGE:
620 		case PR_LUSAGE:
621 		case PR_LWPUSAGE:
622 			break;
623 		default:
624 			pnp->pr_flags |= PR_INVAL;
625 			break;
626 		}
627 	}
628 	/*
629 	 * Wake up anyone waiting for the process or lwp.
630 	 * p->p_trace is guaranteed to be non-NULL if there
631 	 * are any open /proc files for this process.
632 	 */
633 	if ((vp = p->p_trace) != NULL) {
634 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
635 
636 		prnotify(vp);
637 		/*
638 		 * Are there any writers?
639 		 */
640 		if ((writers = pcp->prc_writers) != 0) {
641 			/*
642 			 * Clear the exclusive open flag (old /proc interface).
643 			 * Set prc_selfopens equal to prc_writers so that
644 			 * the next O_EXCL|O_WRITE open will succeed
645 			 * even with existing (though invalid) writers.
646 			 * prclose() must decrement prc_selfopens when
647 			 * the invalid files are closed.
648 			 */
649 			pcp->prc_flags &= ~PRC_EXCL;
650 			ASSERT(pcp->prc_selfopens <= writers);
651 			pcp->prc_selfopens = writers;
652 		}
653 	}
654 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
655 	while (vp != NULL) {
656 		/*
657 		 * We should not invalidate the lwpiddir vnodes,
658 		 * but the necessities of maintaining the old
659 		 * ioctl()-based version of /proc require it.
660 		 */
661 		pnp = VTOP(vp);
662 		pnp->pr_flags |= PR_INVAL;
663 		prnotify(vp);
664 		vp = pnp->pr_next;
665 	}
666 
667 	/*
668 	 * If any tracing flags are in effect and any vnodes are open for
669 	 * writing then set the requested-stop and run-on-last-close flags.
670 	 * Otherwise, clear all tracing flags.
671 	 */
672 	t->t_proc_flag &= ~TP_PAUSE;
673 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
674 		t->t_proc_flag |= TP_PRSTOP;
675 		aston(t);		/* so ISSIG will see the flag */
676 		p->p_proc_flag |= P_PR_RUNLCL;
677 	} else {
678 		premptyset(&up->u_entrymask);		/* syscalls */
679 		premptyset(&up->u_exitmask);
680 		up->u_systrap = 0;
681 		premptyset(&p->p_sigmask);		/* signals */
682 		premptyset(&p->p_fltmask);		/* faults */
683 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
684 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
685 		prnostep(ttolwp(t));
686 	}
687 
688 	mutex_exit(&p->p_lock);
689 }
690 
691 /*
692  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
693  * Return with pr_pidlock held in all cases.
694  * Return with p_lock held if the the process still exists.
695  * Return value is the process pointer if the process still exists, else NULL.
696  * If we lock the process, give ourself kernel priority to avoid deadlocks;
697  * this is undone in prunlock().
698  */
699 proc_t *
pr_p_lock(prnode_t * pnp)700 pr_p_lock(prnode_t *pnp)
701 {
702 	proc_t *p;
703 	prcommon_t *pcp;
704 
705 	mutex_enter(&pr_pidlock);
706 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
707 		return (NULL);
708 	mutex_enter(&p->p_lock);
709 	while (p->p_proc_flag & P_PR_LOCK) {
710 		/*
711 		 * This cv/mutex pair is persistent even if
712 		 * the process disappears while we sleep.
713 		 */
714 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
715 		kmutex_t *mp = &p->p_lock;
716 
717 		mutex_exit(&pr_pidlock);
718 		cv_wait(cv, mp);
719 		mutex_exit(mp);
720 		mutex_enter(&pr_pidlock);
721 		if (pcp->prc_proc == NULL)
722 			return (NULL);
723 		ASSERT(p == pcp->prc_proc);
724 		mutex_enter(&p->p_lock);
725 	}
726 	p->p_proc_flag |= P_PR_LOCK;
727 	return (p);
728 }
729 
730 /*
731  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
732  * This prevents any lwp of the process from disappearing and
733  * blocks most operations that a process can perform on itself.
734  * Returns 0 on success, a non-zero error number on failure.
735  *
736  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
737  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
738  *
739  * error returns:
740  *	ENOENT: process or lwp has disappeared or process is exiting
741  *		(or has become a zombie and zdisp == ZNO).
742  *	EAGAIN: procfs vnode has become invalid.
743  *	EINTR:  signal arrived while waiting for exec to complete.
744  */
745 int
prlock(prnode_t * pnp,int zdisp)746 prlock(prnode_t *pnp, int zdisp)
747 {
748 	prcommon_t *pcp;
749 	proc_t *p;
750 
751 again:
752 	pcp = pnp->pr_common;
753 	p = pr_p_lock(pnp);
754 	mutex_exit(&pr_pidlock);
755 
756 	/*
757 	 * Return ENOENT immediately if there is no process.
758 	 */
759 	if (p == NULL)
760 		return (ENOENT);
761 
762 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
763 
764 	/*
765 	 * Return ENOENT if process entered zombie state or is exiting
766 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
767 	 */
768 	if (zdisp == ZNO &&
769 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
770 		prunlock(pnp);
771 		return (ENOENT);
772 	}
773 
774 	/*
775 	 * If lwp-specific, check to see if lwp has disappeared.
776 	 */
777 	if (pcp->prc_flags & PRC_LWP) {
778 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
779 		    pcp->prc_tslot == -1) {
780 			prunlock(pnp);
781 			return (ENOENT);
782 		}
783 	}
784 
785 	/*
786 	 * Return EAGAIN if we have encountered a security violation.
787 	 * (The process exec'd a set-id or unreadable executable file.)
788 	 */
789 	if (pnp->pr_flags & PR_INVAL) {
790 		prunlock(pnp);
791 		return (EAGAIN);
792 	}
793 
794 	/*
795 	 * If process is undergoing an exec(), wait for
796 	 * completion and then start all over again.
797 	 */
798 	if (p->p_proc_flag & P_PR_EXEC) {
799 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
800 		mutex_enter(&pcp->prc_mutex);
801 		prunlock(pnp);
802 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
803 			mutex_exit(&pcp->prc_mutex);
804 			return (EINTR);
805 		}
806 		mutex_exit(&pcp->prc_mutex);
807 		goto again;
808 	}
809 
810 	/*
811 	 * We return holding p->p_lock.
812 	 */
813 	return (0);
814 }
815 
816 /*
817  * Undo prlock() and pr_p_lock().
818  * p->p_lock is still held; pr_pidlock is no longer held.
819  *
820  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
821  * if any, waiting for the flag to be dropped; it retains p->p_lock.
822  *
823  * prunlock() calls prunmark() and then drops p->p_lock.
824  */
825 void
prunmark(proc_t * p)826 prunmark(proc_t *p)
827 {
828 	ASSERT(p->p_proc_flag & P_PR_LOCK);
829 	ASSERT(MUTEX_HELD(&p->p_lock));
830 
831 	cv_signal(&pr_pid_cv[p->p_slot]);
832 	p->p_proc_flag &= ~P_PR_LOCK;
833 }
834 
835 void
prunlock(prnode_t * pnp)836 prunlock(prnode_t *pnp)
837 {
838 	prcommon_t *pcp = pnp->pr_common;
839 	proc_t *p = pcp->prc_proc;
840 
841 	/*
842 	 * If we (or someone) gave it a SIGKILL, and it is not
843 	 * already a zombie, set it running unconditionally.
844 	 */
845 	if ((p->p_flag & SKILLED) &&
846 	    !(p->p_flag & SEXITING) &&
847 	    !(pcp->prc_flags & PRC_DESTROY) &&
848 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) {
849 		int err = pr_setrun(pnp, 0);
850 		/*
851 		 * EBUSY here means either the process was not stopped by /proc
852 		 * or there is an agent lwp.  If there's an agent lwp, we don't
853 		 * need to do anything as it will run and witness the SIGKILL.
854 		 * However, if there's no agent lwp and the process was not
855 		 * stopped by /proc, it may have been stopped by SIGSTOP; try
856 		 * getting lwps running with TS_XSTART to undo SIGSTOP effect.
857 		 *
858 		 * Notably, other TS_* bits are inappropriate here:
859 		 * * Do not set TS_PSTART; pr_setrun() above would have already
860 		 *   set this if it did anything for this process.
861 		 * * Do not set TS_CSTART or TS_UNPAUSE; lwps may be stopped by
862 		 *   PR_SUSPEND for many reasons. Some cases, like holdlwps(),
863 		 *   will resume the process before the corresponding syscall
864 		 *   returns. Other cases, like dumping core, the suspender
865 		 *   will tear down the lwps as it completes.
866 		 * * Do not set TS_RESUME out of caution; not sure about the
867 		 *   consequences of a process going away during CPR resume and
868 		 *   CPR should set the process running eventually.
869 		 * * Do not set TS_CREATE because lwp creation expects threads
870 		 *   to remain paused until lwp completes.
871 		 */
872 		if (err == EBUSY && p->p_agenttp == NULL) {
873 			runlwps(p, TS_XSTART);
874 		}
875 	}
876 	prunmark(p);
877 	mutex_exit(&p->p_lock);
878 }
879 
880 /*
881  * Called while holding p->p_lock to delay until the process is unlocked.
882  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
883  * The process cannot become locked again until p->p_lock is dropped.
884  */
885 void
prbarrier(proc_t * p)886 prbarrier(proc_t *p)
887 {
888 	ASSERT(MUTEX_HELD(&p->p_lock));
889 
890 	if (p->p_proc_flag & P_PR_LOCK) {
891 		/* The process is locked; delay until not locked */
892 		uint_t slot = p->p_slot;
893 
894 		while (p->p_proc_flag & P_PR_LOCK)
895 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
896 		cv_signal(&pr_pid_cv[slot]);
897 	}
898 }
899 
900 /*
901  * Return process/lwp status.
902  * The u-block is mapped in by this routine and unmapped at the end.
903  */
904 void
prgetstatus(proc_t * p,pstatus_t * sp,zone_t * zp)905 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
906 {
907 	kthread_t *t;
908 
909 	ASSERT(MUTEX_HELD(&p->p_lock));
910 
911 	t = prchoose(p);	/* returns locked thread */
912 	ASSERT(t != NULL);
913 	thread_unlock(t);
914 
915 	/* just bzero the process part, prgetlwpstatus() does the rest */
916 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
917 	sp->pr_nlwp = p->p_lwpcnt;
918 	sp->pr_nzomb = p->p_zombcnt;
919 	prassignset(&sp->pr_sigpend, &p->p_sig);
920 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
921 	sp->pr_brksize = p->p_brksize;
922 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
923 	sp->pr_stksize = p->p_stksize;
924 	sp->pr_pid = p->p_pid;
925 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
926 	    (p->p_flag & SZONETOP)) {
927 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
928 		/*
929 		 * Inside local zones, fake zsched's pid as parent pids for
930 		 * processes which reference processes outside of the zone.
931 		 */
932 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
933 	} else {
934 		sp->pr_ppid = p->p_ppid;
935 	}
936 	sp->pr_pgid  = p->p_pgrp;
937 	sp->pr_sid   = p->p_sessp->s_sid;
938 	sp->pr_taskid = p->p_task->tk_tkid;
939 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
940 	sp->pr_zoneid = p->p_zone->zone_id;
941 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
942 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
943 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
944 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
945 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
946 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
947 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
948 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
949 	switch (p->p_model) {
950 	case DATAMODEL_ILP32:
951 		sp->pr_dmodel = PR_MODEL_ILP32;
952 		break;
953 	case DATAMODEL_LP64:
954 		sp->pr_dmodel = PR_MODEL_LP64;
955 		break;
956 	}
957 	if (p->p_agenttp)
958 		sp->pr_agentid = p->p_agenttp->t_tid;
959 
960 	/* get the chosen lwp's status */
961 	prgetlwpstatus(t, &sp->pr_lwp, zp);
962 
963 	/* replicate the flags */
964 	sp->pr_flags = sp->pr_lwp.pr_flags;
965 }
966 
967 /*
968  * Query mask of held signals for a given thread.
969  *
970  * This makes use of schedctl_sigblock() to query if userspace has requested
971  * that all maskable signals be held.  While it would be tempting to call
972  * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be
973  * done safely without the risk of racing with the thread under consideration.
974  */
975 void
prgethold(kthread_t * t,sigset_t * sp)976 prgethold(kthread_t *t, sigset_t *sp)
977 {
978 	k_sigset_t set;
979 
980 	if (schedctl_sigblock(t)) {
981 		set.__sigbits[0] = FILLSET0 & ~CANTMASK0;
982 		set.__sigbits[1] = FILLSET1 & ~CANTMASK1;
983 		set.__sigbits[2] = FILLSET2 & ~CANTMASK2;
984 	} else {
985 		set = t->t_hold;
986 	}
987 	sigktou(&set, sp);
988 }
989 
990 #ifdef _SYSCALL32_IMPL
991 void
prgetlwpstatus32(kthread_t * t,lwpstatus32_t * sp,zone_t * zp)992 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
993 {
994 	proc_t *p = ttoproc(t);
995 	klwp_t *lwp = ttolwp(t);
996 	struct mstate *ms = &lwp->lwp_mstate;
997 	hrtime_t usr, sys;
998 	int flags;
999 	ulong_t instr;
1000 
1001 	ASSERT(MUTEX_HELD(&p->p_lock));
1002 
1003 	bzero(sp, sizeof (*sp));
1004 	flags = 0L;
1005 	if (t->t_state == TS_STOPPED) {
1006 		flags |= PR_STOPPED;
1007 		if ((t->t_schedflag & TS_PSTART) == 0)
1008 			flags |= PR_ISTOP;
1009 	} else if (VSTOPPED(t)) {
1010 		flags |= PR_STOPPED|PR_ISTOP;
1011 	}
1012 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1013 		flags |= PR_DSTOP;
1014 	if (lwp->lwp_asleep)
1015 		flags |= PR_ASLEEP;
1016 	if (t == p->p_agenttp)
1017 		flags |= PR_AGENT;
1018 	if (!(t->t_proc_flag & TP_TWAIT))
1019 		flags |= PR_DETACH;
1020 	if (t->t_proc_flag & TP_DAEMON)
1021 		flags |= PR_DAEMON;
1022 	if (p->p_proc_flag & P_PR_FORK)
1023 		flags |= PR_FORK;
1024 	if (p->p_proc_flag & P_PR_RUNLCL)
1025 		flags |= PR_RLC;
1026 	if (p->p_proc_flag & P_PR_KILLCL)
1027 		flags |= PR_KLC;
1028 	if (p->p_proc_flag & P_PR_ASYNC)
1029 		flags |= PR_ASYNC;
1030 	if (p->p_proc_flag & P_PR_BPTADJ)
1031 		flags |= PR_BPTADJ;
1032 	if (p->p_proc_flag & P_PR_PTRACE)
1033 		flags |= PR_PTRACE;
1034 	if (p->p_flag & SMSACCT)
1035 		flags |= PR_MSACCT;
1036 	if (p->p_flag & SMSFORK)
1037 		flags |= PR_MSFORK;
1038 	if (p->p_flag & SVFWAIT)
1039 		flags |= PR_VFORKP;
1040 	sp->pr_flags = flags;
1041 	if (VSTOPPED(t)) {
1042 		sp->pr_why   = PR_REQUESTED;
1043 		sp->pr_what  = 0;
1044 	} else {
1045 		sp->pr_why   = t->t_whystop;
1046 		sp->pr_what  = t->t_whatstop;
1047 	}
1048 	sp->pr_lwpid = t->t_tid;
1049 	sp->pr_cursig  = lwp->lwp_cursig;
1050 	prassignset(&sp->pr_lwppend, &t->t_sig);
1051 	prgethold(t, &sp->pr_lwphold);
1052 	if (t->t_whystop == PR_FAULTED) {
1053 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
1054 		if (t->t_whatstop == FLTPAGE)
1055 			sp->pr_info.si_addr =
1056 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
1057 	} else if (lwp->lwp_curinfo)
1058 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
1059 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1060 	    sp->pr_info.si_zoneid != zp->zone_id) {
1061 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1062 		sp->pr_info.si_uid = 0;
1063 		sp->pr_info.si_ctid = -1;
1064 		sp->pr_info.si_zoneid = zp->zone_id;
1065 	}
1066 	sp->pr_altstack.ss_sp =
1067 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1068 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1069 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1070 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1071 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1072 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1073 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1074 	    sizeof (sp->pr_clname) - 1);
1075 	if (flags & PR_STOPPED)
1076 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1077 	usr = ms->ms_acct[LMS_USER];
1078 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1079 	scalehrtime(&usr);
1080 	scalehrtime(&sys);
1081 	hrt2ts32(usr, &sp->pr_utime);
1082 	hrt2ts32(sys, &sp->pr_stime);
1083 
1084 	/*
1085 	 * Fetch the current instruction, if not a system process.
1086 	 * We don't attempt this unless the lwp is stopped.
1087 	 */
1088 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1089 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1090 	else if (!(flags & PR_STOPPED))
1091 		sp->pr_flags |= PR_PCINVAL;
1092 	else if (!prfetchinstr(lwp, &instr))
1093 		sp->pr_flags |= PR_PCINVAL;
1094 	else
1095 		sp->pr_instr = (uint32_t)instr;
1096 
1097 	/*
1098 	 * Drop p_lock while touching the lwp's stack.
1099 	 */
1100 	mutex_exit(&p->p_lock);
1101 	if (prisstep(lwp))
1102 		sp->pr_flags |= PR_STEP;
1103 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1104 		int i;
1105 
1106 		sp->pr_syscall = get_syscall32_args(lwp,
1107 		    (int *)sp->pr_sysarg, &i);
1108 		sp->pr_nsysarg = (ushort_t)i;
1109 	}
1110 	if ((flags & PR_STOPPED) || t == curthread)
1111 		prgetprregs32(lwp, sp->pr_reg);
1112 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1113 	    (flags & PR_VFORKP)) {
1114 		long r1, r2;
1115 		user_t *up;
1116 		auxv_t *auxp;
1117 		int i;
1118 
1119 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1120 		if (sp->pr_errno == 0) {
1121 			sp->pr_rval1 = (int32_t)r1;
1122 			sp->pr_rval2 = (int32_t)r2;
1123 			sp->pr_errpriv = PRIV_NONE;
1124 		} else
1125 			sp->pr_errpriv = lwp->lwp_badpriv;
1126 
1127 		if (t->t_sysnum == SYS_execve) {
1128 			up = PTOU(p);
1129 			sp->pr_sysarg[0] = 0;
1130 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1131 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1132 			sp->pr_sysarg[3] = 0;
1133 			for (i = 0, auxp = up->u_auxv;
1134 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1135 			    i++, auxp++) {
1136 				if (auxp->a_type == AT_SUN_EXECNAME) {
1137 					sp->pr_sysarg[0] =
1138 					    (caddr32_t)
1139 					    (uintptr_t)auxp->a_un.a_ptr;
1140 					break;
1141 				}
1142 			}
1143 		}
1144 	}
1145 	if (prhasfp())
1146 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1147 	mutex_enter(&p->p_lock);
1148 }
1149 
1150 void
prgetstatus32(proc_t * p,pstatus32_t * sp,zone_t * zp)1151 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1152 {
1153 	kthread_t *t;
1154 
1155 	ASSERT(MUTEX_HELD(&p->p_lock));
1156 
1157 	t = prchoose(p);	/* returns locked thread */
1158 	ASSERT(t != NULL);
1159 	thread_unlock(t);
1160 
1161 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1162 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1163 	sp->pr_nlwp = p->p_lwpcnt;
1164 	sp->pr_nzomb = p->p_zombcnt;
1165 	prassignset(&sp->pr_sigpend, &p->p_sig);
1166 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1167 	sp->pr_brksize = (uint32_t)p->p_brksize;
1168 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1169 	sp->pr_stksize = (uint32_t)p->p_stksize;
1170 	sp->pr_pid   = p->p_pid;
1171 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1172 	    (p->p_flag & SZONETOP)) {
1173 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1174 		/*
1175 		 * Inside local zones, fake zsched's pid as parent pids for
1176 		 * processes which reference processes outside of the zone.
1177 		 */
1178 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1179 	} else {
1180 		sp->pr_ppid = p->p_ppid;
1181 	}
1182 	sp->pr_pgid  = p->p_pgrp;
1183 	sp->pr_sid   = p->p_sessp->s_sid;
1184 	sp->pr_taskid = p->p_task->tk_tkid;
1185 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1186 	sp->pr_zoneid = p->p_zone->zone_id;
1187 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1188 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1189 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1190 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1191 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1192 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1193 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1194 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1195 	switch (p->p_model) {
1196 	case DATAMODEL_ILP32:
1197 		sp->pr_dmodel = PR_MODEL_ILP32;
1198 		break;
1199 	case DATAMODEL_LP64:
1200 		sp->pr_dmodel = PR_MODEL_LP64;
1201 		break;
1202 	}
1203 	if (p->p_agenttp)
1204 		sp->pr_agentid = p->p_agenttp->t_tid;
1205 
1206 	/* get the chosen lwp's status */
1207 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1208 
1209 	/* replicate the flags */
1210 	sp->pr_flags = sp->pr_lwp.pr_flags;
1211 }
1212 #endif	/* _SYSCALL32_IMPL */
1213 
1214 /*
1215  * Return lwp status.
1216  */
1217 void
prgetlwpstatus(kthread_t * t,lwpstatus_t * sp,zone_t * zp)1218 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1219 {
1220 	proc_t *p = ttoproc(t);
1221 	klwp_t *lwp = ttolwp(t);
1222 	struct mstate *ms = &lwp->lwp_mstate;
1223 	hrtime_t usr, sys;
1224 	int flags;
1225 	ulong_t instr;
1226 
1227 	ASSERT(MUTEX_HELD(&p->p_lock));
1228 
1229 	bzero(sp, sizeof (*sp));
1230 	flags = 0L;
1231 	if (t->t_state == TS_STOPPED) {
1232 		flags |= PR_STOPPED;
1233 		if ((t->t_schedflag & TS_PSTART) == 0)
1234 			flags |= PR_ISTOP;
1235 	} else if (VSTOPPED(t)) {
1236 		flags |= PR_STOPPED|PR_ISTOP;
1237 	}
1238 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1239 		flags |= PR_DSTOP;
1240 	if (lwp->lwp_asleep)
1241 		flags |= PR_ASLEEP;
1242 	if (t == p->p_agenttp)
1243 		flags |= PR_AGENT;
1244 	if (!(t->t_proc_flag & TP_TWAIT))
1245 		flags |= PR_DETACH;
1246 	if (t->t_proc_flag & TP_DAEMON)
1247 		flags |= PR_DAEMON;
1248 	if (p->p_proc_flag & P_PR_FORK)
1249 		flags |= PR_FORK;
1250 	if (p->p_proc_flag & P_PR_RUNLCL)
1251 		flags |= PR_RLC;
1252 	if (p->p_proc_flag & P_PR_KILLCL)
1253 		flags |= PR_KLC;
1254 	if (p->p_proc_flag & P_PR_ASYNC)
1255 		flags |= PR_ASYNC;
1256 	if (p->p_proc_flag & P_PR_BPTADJ)
1257 		flags |= PR_BPTADJ;
1258 	if (p->p_proc_flag & P_PR_PTRACE)
1259 		flags |= PR_PTRACE;
1260 	if (p->p_flag & SMSACCT)
1261 		flags |= PR_MSACCT;
1262 	if (p->p_flag & SMSFORK)
1263 		flags |= PR_MSFORK;
1264 	if (p->p_flag & SVFWAIT)
1265 		flags |= PR_VFORKP;
1266 	if (p->p_pgidp->pid_pgorphaned)
1267 		flags |= PR_ORPHAN;
1268 	if (p->p_pidflag & CLDNOSIGCHLD)
1269 		flags |= PR_NOSIGCHLD;
1270 	if (p->p_pidflag & CLDWAITPID)
1271 		flags |= PR_WAITPID;
1272 	sp->pr_flags = flags;
1273 	if (VSTOPPED(t)) {
1274 		sp->pr_why   = PR_REQUESTED;
1275 		sp->pr_what  = 0;
1276 	} else {
1277 		sp->pr_why   = t->t_whystop;
1278 		sp->pr_what  = t->t_whatstop;
1279 	}
1280 	sp->pr_lwpid = t->t_tid;
1281 	sp->pr_cursig  = lwp->lwp_cursig;
1282 	prassignset(&sp->pr_lwppend, &t->t_sig);
1283 	prgethold(t, &sp->pr_lwphold);
1284 	if (t->t_whystop == PR_FAULTED)
1285 		bcopy(&lwp->lwp_siginfo,
1286 		    &sp->pr_info, sizeof (k_siginfo_t));
1287 	else if (lwp->lwp_curinfo)
1288 		bcopy(&lwp->lwp_curinfo->sq_info,
1289 		    &sp->pr_info, sizeof (k_siginfo_t));
1290 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1291 	    sp->pr_info.si_zoneid != zp->zone_id) {
1292 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1293 		sp->pr_info.si_uid = 0;
1294 		sp->pr_info.si_ctid = -1;
1295 		sp->pr_info.si_zoneid = zp->zone_id;
1296 	}
1297 	sp->pr_altstack = lwp->lwp_sigaltstack;
1298 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1299 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1300 	sp->pr_ustack = lwp->lwp_ustack;
1301 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1302 	    sizeof (sp->pr_clname) - 1);
1303 	if (flags & PR_STOPPED)
1304 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1305 	usr = ms->ms_acct[LMS_USER];
1306 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1307 	scalehrtime(&usr);
1308 	scalehrtime(&sys);
1309 	hrt2ts(usr, &sp->pr_utime);
1310 	hrt2ts(sys, &sp->pr_stime);
1311 
1312 	/*
1313 	 * Fetch the current instruction, if not a system process.
1314 	 * We don't attempt this unless the lwp is stopped.
1315 	 */
1316 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1317 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1318 	else if (!(flags & PR_STOPPED))
1319 		sp->pr_flags |= PR_PCINVAL;
1320 	else if (!prfetchinstr(lwp, &instr))
1321 		sp->pr_flags |= PR_PCINVAL;
1322 	else
1323 		sp->pr_instr = instr;
1324 
1325 	/*
1326 	 * Drop p_lock while touching the lwp's stack.
1327 	 */
1328 	mutex_exit(&p->p_lock);
1329 	if (prisstep(lwp))
1330 		sp->pr_flags |= PR_STEP;
1331 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1332 		int i;
1333 
1334 		sp->pr_syscall = get_syscall_args(lwp,
1335 		    (long *)sp->pr_sysarg, &i);
1336 		sp->pr_nsysarg = (ushort_t)i;
1337 	}
1338 	if ((flags & PR_STOPPED) || t == curthread)
1339 		prgetprregs(lwp, sp->pr_reg);
1340 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1341 	    (flags & PR_VFORKP)) {
1342 		user_t *up;
1343 		auxv_t *auxp;
1344 		int i;
1345 
1346 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1347 		if (sp->pr_errno == 0)
1348 			sp->pr_errpriv = PRIV_NONE;
1349 		else
1350 			sp->pr_errpriv = lwp->lwp_badpriv;
1351 
1352 		if (t->t_sysnum == SYS_execve) {
1353 			up = PTOU(p);
1354 			sp->pr_sysarg[0] = 0;
1355 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1356 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1357 			sp->pr_sysarg[3] = 0;
1358 			for (i = 0, auxp = up->u_auxv;
1359 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1360 			    i++, auxp++) {
1361 				if (auxp->a_type == AT_SUN_EXECNAME) {
1362 					sp->pr_sysarg[0] =
1363 					    (uintptr_t)auxp->a_un.a_ptr;
1364 					break;
1365 				}
1366 			}
1367 		}
1368 	}
1369 	if (prhasfp())
1370 		prgetprfpregs(lwp, &sp->pr_fpreg);
1371 	mutex_enter(&p->p_lock);
1372 }
1373 
1374 /*
1375  * Get the sigaction structure for the specified signal.  The u-block
1376  * must already have been mapped in by the caller.
1377  */
1378 void
prgetaction(proc_t * p,user_t * up,uint_t sig,struct sigaction * sp)1379 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1380 {
1381 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1382 
1383 	bzero(sp, sizeof (*sp));
1384 
1385 	if (sig != 0 && (unsigned)sig < nsig) {
1386 		sp->sa_handler = up->u_signal[sig-1];
1387 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1388 		if (sigismember(&up->u_sigonstack, sig))
1389 			sp->sa_flags |= SA_ONSTACK;
1390 		if (sigismember(&up->u_sigresethand, sig))
1391 			sp->sa_flags |= SA_RESETHAND;
1392 		if (sigismember(&up->u_sigrestart, sig))
1393 			sp->sa_flags |= SA_RESTART;
1394 		if (sigismember(&p->p_siginfo, sig))
1395 			sp->sa_flags |= SA_SIGINFO;
1396 		if (sigismember(&up->u_signodefer, sig))
1397 			sp->sa_flags |= SA_NODEFER;
1398 		if (sig == SIGCLD) {
1399 			if (p->p_flag & SNOWAIT)
1400 				sp->sa_flags |= SA_NOCLDWAIT;
1401 			if ((p->p_flag & SJCTL) == 0)
1402 				sp->sa_flags |= SA_NOCLDSTOP;
1403 		}
1404 	}
1405 }
1406 
1407 #ifdef _SYSCALL32_IMPL
1408 void
prgetaction32(proc_t * p,user_t * up,uint_t sig,struct sigaction32 * sp)1409 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1410 {
1411 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1412 
1413 	bzero(sp, sizeof (*sp));
1414 
1415 	if (sig != 0 && (unsigned)sig < nsig) {
1416 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1417 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1418 		if (sigismember(&up->u_sigonstack, sig))
1419 			sp->sa_flags |= SA_ONSTACK;
1420 		if (sigismember(&up->u_sigresethand, sig))
1421 			sp->sa_flags |= SA_RESETHAND;
1422 		if (sigismember(&up->u_sigrestart, sig))
1423 			sp->sa_flags |= SA_RESTART;
1424 		if (sigismember(&p->p_siginfo, sig))
1425 			sp->sa_flags |= SA_SIGINFO;
1426 		if (sigismember(&up->u_signodefer, sig))
1427 			sp->sa_flags |= SA_NODEFER;
1428 		if (sig == SIGCLD) {
1429 			if (p->p_flag & SNOWAIT)
1430 				sp->sa_flags |= SA_NOCLDWAIT;
1431 			if ((p->p_flag & SJCTL) == 0)
1432 				sp->sa_flags |= SA_NOCLDSTOP;
1433 		}
1434 	}
1435 }
1436 #endif	/* _SYSCALL32_IMPL */
1437 
1438 /*
1439  * Count the number of segments in this process's address space.
1440  */
1441 uint_t
prnsegs(struct as * as,int reserved)1442 prnsegs(struct as *as, int reserved)
1443 {
1444 	uint_t n = 0;
1445 	struct seg *seg;
1446 
1447 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1448 
1449 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1450 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1451 		caddr_t saddr, naddr;
1452 		void *tmp = NULL;
1453 
1454 		if ((seg->s_flags & S_HOLE) != 0) {
1455 			continue;
1456 		}
1457 
1458 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1459 			(void) pr_getprot(seg, reserved, &tmp,
1460 			    &saddr, &naddr, eaddr);
1461 			if (saddr != naddr) {
1462 				n++;
1463 				/*
1464 				 * prnsegs() was formerly designated to return
1465 				 * an 'int' despite having no ability or use
1466 				 * for negative results.  As part of changing
1467 				 * it to 'uint_t', keep the old effective limit
1468 				 * of INT_MAX in place.
1469 				 */
1470 				if (n == INT_MAX) {
1471 					pr_getprot_done(&tmp);
1472 					ASSERT(tmp == NULL);
1473 					return (n);
1474 				}
1475 			}
1476 		}
1477 
1478 		ASSERT(tmp == NULL);
1479 	}
1480 
1481 	return (n);
1482 }
1483 
1484 /*
1485  * Convert uint32_t to decimal string w/o leading zeros.
1486  * Add trailing null characters if 'len' is greater than string length.
1487  * Return the string length.
1488  */
1489 int
pr_u32tos(uint32_t n,char * s,int len)1490 pr_u32tos(uint32_t n, char *s, int len)
1491 {
1492 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1493 	char *cp = cbuf;
1494 	char *end = s + len;
1495 
1496 	do {
1497 		*cp++ = (char)(n % 10 + '0');
1498 		n /= 10;
1499 	} while (n);
1500 
1501 	len = (int)(cp - cbuf);
1502 
1503 	do {
1504 		*s++ = *--cp;
1505 	} while (cp > cbuf);
1506 
1507 	while (s < end)		/* optional pad */
1508 		*s++ = '\0';
1509 
1510 	return (len);
1511 }
1512 
1513 /*
1514  * Convert uint64_t to decimal string w/o leading zeros.
1515  * Return the string length.
1516  */
1517 static int
pr_u64tos(uint64_t n,char * s)1518 pr_u64tos(uint64_t n, char *s)
1519 {
1520 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1521 	char *cp = cbuf;
1522 	int len;
1523 
1524 	do {
1525 		*cp++ = (char)(n % 10 + '0');
1526 		n /= 10;
1527 	} while (n);
1528 
1529 	len = (int)(cp - cbuf);
1530 
1531 	do {
1532 		*s++ = *--cp;
1533 	} while (cp > cbuf);
1534 
1535 	return (len);
1536 }
1537 
1538 /*
1539  * Similar to getf() / getf_gen(), but for the specified process.  On success,
1540  * returns the fp with fp->f_count incremented.  The caller MUST call
1541  * closef(fp) on the returned fp after completing any actions using that fp.
1542  * We return a reference-held (fp->f_count bumped) file_t so no other closef()
1543  * can invoke destructive VOP_CLOSE actions while we're inspecting the
1544  * process's FD.
1545  *
1546  * Returns NULL for errors: either an empty process-table slot post-fi_lock
1547  * and UF_ENTER, or too many mutex_tryenter() failures on the file_t's f_tlock.
1548  * Both failure modes have DTrace probes.
1549  *
1550  * The current design of the procfs "close" code path uses the following lock
1551  * order of:
1552  *
1553  *   1: (file_t) f_tlock
1554  *   2: (proc_t) p_lock AND setting p->p_proc_flag's P_PR_LOCK
1555  *
1556  * That happens because closef() holds f_tlock while calling fop_close(),
1557  * which can be prclose(), which currently waits on and sets P_PR_LOCK at its
1558  * beginning.
1559  *
1560  * That lock order creates a challenge for pr_getf, which needs to take those
1561  * locks in the opposite order when the fd points to a procfs file descriptor.
1562  * The solution chosen here is to use mutex_tryenter on f_tlock and retry some
1563  * (limited) number of times, failing if we don't get both locks.
1564  *
1565  * The cases where this can fail are rare, and all involve a procfs caller
1566  * asking for info (eg. FDINFO) on another procfs FD.  In these cases,
1567  * returning EBADF (which results from a NULL return from pr_getf()) is
1568  * acceptable.
1569  *
1570  * One can increase the number of tries in pr_getf_maxtries if one is worried
1571  * about the contentuous case.
1572  */
1573 
1574 uint64_t pr_getf_tryfails; /* Bumped for statistic purposes. */
1575 int pr_getf_maxtries = 3;  /* So you can tune it from /etc/system */
1576 
1577 file_t *
pr_getf(proc_t * p,uint_t fd,short * flag)1578 pr_getf(proc_t *p, uint_t fd, short *flag)
1579 {
1580 	uf_entry_t *ufp;
1581 	uf_info_t *fip;
1582 	file_t *fp;
1583 	int tries = 0;
1584 
1585 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1586 
1587 retry:
1588 	fip = P_FINFO(p);
1589 
1590 	if (fd >= fip->fi_nfiles)
1591 		return (NULL);
1592 
1593 	mutex_exit(&p->p_lock);
1594 	mutex_enter(&fip->fi_lock);
1595 	UF_ENTER(ufp, fip, fd);
1596 	if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
1597 		if (mutex_tryenter(&fp->f_tlock)) {
1598 			ASSERT(fp->f_count > 0);
1599 			fp->f_count++;
1600 			mutex_exit(&fp->f_tlock);
1601 			if (flag != NULL)
1602 				*flag = ufp->uf_flag;
1603 		} else {
1604 			/*
1605 			 * Note the number of mutex_trylock attempts.
1606 			 *
1607 			 * The exit path will catch this and try again if we
1608 			 * are below the retry threshhold (pr_getf_maxtries).
1609 			 */
1610 			tries++;
1611 			pr_getf_tryfails++;
1612 			/*
1613 			 * If we hit pr_getf_maxtries, we'll return NULL.
1614 			 * DTrace scripts looking for this sort of failure
1615 			 * should check when arg1 is pr_getf_maxtries.
1616 			 */
1617 			DTRACE_PROBE2(pr_getf_tryfail, file_t *, fp, int,
1618 			    tries);
1619 			fp = NULL;
1620 		}
1621 	} else {
1622 		fp = NULL;
1623 		/* If we fail here, someone else closed this FD. */
1624 		DTRACE_PROBE1(pr_getf_emptyslot, int, tries);
1625 		tries = pr_getf_maxtries; /* Don't bother retrying. */
1626 	}
1627 	UF_EXIT(ufp);
1628 	mutex_exit(&fip->fi_lock);
1629 	mutex_enter(&p->p_lock);
1630 
1631 	/* Use goto instead of tail-recursion so we can keep "tries" around. */
1632 	if (fp == NULL) {
1633 		/* "tries" starts at 1. */
1634 		if (tries < pr_getf_maxtries)
1635 			goto retry;
1636 	} else {
1637 		/*
1638 		 * Probes here will detect successes after arg1's number of
1639 		 * mutex_tryenter() calls.
1640 		 */
1641 		DTRACE_PROBE2(pr_getf_trysuccess, file_t *, fp, int, tries + 1);
1642 	}
1643 
1644 	return (fp);
1645 }
1646 
1647 
1648 /*
1649  * Just as pr_getf() is a little unusual in how it goes about making the file_t
1650  * safe for procfs consumers to access it, so too is pr_releasef() for safely
1651  * releasing that "hold".  The "hold" is unlike normal file descriptor activity
1652  * -- procfs is just an interloper here, wanting access to the vnode_t without
1653  * risk of a racing close() disrupting the state.  Just as pr_getf() avoids some
1654  * of the typical file_t behavior (such as auditing) when establishing its hold,
1655  * so too should pr_releasef().  It should not go through the motions of
1656  * closef() (since it is not a true close()) unless racing activity causes it to
1657  * be the last actor holding the refcount above zero.
1658  *
1659  * Under normal circumstances, we expect to find file_t`f_count > 1 after
1660  * the successful pr_getf() call.  We are, after all, accessing a resource
1661  * already held by the process in question.  We would also expect to rarely race
1662  * with a close() of the underlying fd, meaning that file_t`f_count > 1 would
1663  * still holds at pr_releasef() time.  That would mean we only need to decrement
1664  * f_count, leaving it to the process to later close the fd (thus triggering
1665  * VOP_CLOSE(), etc).
1666  *
1667  * It is only when that process manages to close() the fd while we have it
1668  * "held" in procfs that we must make a trip through the traditional closef()
1669  * logic to ensure proper tear-down of the file_t.
1670  */
1671 void
pr_releasef(file_t * fp)1672 pr_releasef(file_t *fp)
1673 {
1674 	mutex_enter(&fp->f_tlock);
1675 	if (fp->f_count > 1) {
1676 		/*
1677 		 * This is the most common case: The file is still held open by
1678 		 * the process, and we simply need to release our hold by
1679 		 * decrementing f_count
1680 		 */
1681 		fp->f_count--;
1682 		mutex_exit(&fp->f_tlock);
1683 	} else {
1684 		/*
1685 		 * A rare occasion: The process snuck a close() of this file
1686 		 * while we were doing our business in procfs.  Given that
1687 		 * f_count == 1, we are the only one with a reference to the
1688 		 * file_t and need to take a trip through closef() to free it.
1689 		 */
1690 		mutex_exit(&fp->f_tlock);
1691 		(void) closef(fp);
1692 	}
1693 }
1694 
1695 void
pr_object_name(char * name,vnode_t * vp,struct vattr * vattr)1696 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1697 {
1698 	char *s = name;
1699 	struct vfs *vfsp;
1700 	struct vfssw *vfsswp;
1701 
1702 	if ((vfsp = vp->v_vfsp) != NULL &&
1703 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1704 	    *vfsswp->vsw_name) {
1705 		(void) strcpy(s, vfsswp->vsw_name);
1706 		s += strlen(s);
1707 		*s++ = '.';
1708 	}
1709 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1710 	*s++ = '.';
1711 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1712 	*s++ = '.';
1713 	s += pr_u64tos(vattr->va_nodeid, s);
1714 	*s++ = '\0';
1715 }
1716 
1717 struct seg *
break_seg(proc_t * p)1718 break_seg(proc_t *p)
1719 {
1720 	caddr_t addr = p->p_brkbase;
1721 	struct seg *seg;
1722 	struct vnode *vp;
1723 
1724 	if (p->p_brksize != 0)
1725 		addr += p->p_brksize - 1;
1726 	seg = as_segat(p->p_as, addr);
1727 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1728 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1729 		return (seg);
1730 	return (NULL);
1731 }
1732 
1733 /*
1734  * Implementation of service functions to handle procfs generic chained
1735  * copyout buffers.
1736  */
1737 typedef struct pr_iobuf_list {
1738 	list_node_t	piol_link;	/* buffer linkage */
1739 	size_t		piol_size;	/* total size (header + data) */
1740 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1741 } piol_t;
1742 
1743 #define	MAPSIZE	(64 * 1024)
1744 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1745 
1746 void
pr_iol_initlist(list_t * iolhead,size_t itemsize,int n)1747 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1748 {
1749 	piol_t	*iol;
1750 	size_t	initial_size = MIN(1, n) * itemsize;
1751 
1752 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1753 
1754 	ASSERT(list_head(iolhead) == NULL);
1755 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1756 	ASSERT(initial_size > 0);
1757 
1758 	/*
1759 	 * Someone creating chained copyout buffers may ask for less than
1760 	 * MAPSIZE if the amount of data to be buffered is known to be
1761 	 * smaller than that.
1762 	 * But in order to prevent involuntary self-denial of service,
1763 	 * the requested input size is clamped at MAPSIZE.
1764 	 */
1765 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1766 	iol = kmem_alloc(initial_size, KM_SLEEP);
1767 	list_insert_head(iolhead, iol);
1768 	iol->piol_usedsize = 0;
1769 	iol->piol_size = initial_size;
1770 }
1771 
1772 void *
pr_iol_newbuf(list_t * iolhead,size_t itemsize)1773 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1774 {
1775 	piol_t	*iol;
1776 	char	*new;
1777 
1778 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1779 	ASSERT(list_head(iolhead) != NULL);
1780 
1781 	iol = (piol_t *)list_tail(iolhead);
1782 
1783 	if (iol->piol_size <
1784 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1785 		/*
1786 		 * Out of space in the current buffer. Allocate more.
1787 		 */
1788 		piol_t *newiol;
1789 
1790 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1791 		newiol->piol_size = MAPSIZE;
1792 		newiol->piol_usedsize = 0;
1793 
1794 		list_insert_after(iolhead, iol, newiol);
1795 		iol = list_next(iolhead, iol);
1796 		ASSERT(iol == newiol);
1797 	}
1798 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1799 	iol->piol_usedsize += itemsize;
1800 	bzero(new, itemsize);
1801 	return (new);
1802 }
1803 
1804 void
pr_iol_freelist(list_t * iolhead)1805 pr_iol_freelist(list_t *iolhead)
1806 {
1807 	piol_t	*iol;
1808 
1809 	while ((iol = list_head(iolhead)) != NULL) {
1810 		list_remove(iolhead, iol);
1811 		kmem_free(iol, iol->piol_size);
1812 	}
1813 	list_destroy(iolhead);
1814 }
1815 
1816 int
pr_iol_copyout_and_free(list_t * iolhead,caddr_t * tgt,int errin)1817 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1818 {
1819 	int error = errin;
1820 	piol_t	*iol;
1821 
1822 	while ((iol = list_head(iolhead)) != NULL) {
1823 		list_remove(iolhead, iol);
1824 		if (!error) {
1825 			if (copyout(PIOL_DATABUF(iol), *tgt,
1826 			    iol->piol_usedsize))
1827 				error = EFAULT;
1828 			*tgt += iol->piol_usedsize;
1829 		}
1830 		kmem_free(iol, iol->piol_size);
1831 	}
1832 	list_destroy(iolhead);
1833 
1834 	return (error);
1835 }
1836 
1837 int
pr_iol_uiomove_and_free(list_t * iolhead,uio_t * uiop,int errin)1838 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1839 {
1840 	offset_t	off = uiop->uio_offset;
1841 	char		*base;
1842 	size_t		size;
1843 	piol_t		*iol;
1844 	int		error = errin;
1845 
1846 	while ((iol = list_head(iolhead)) != NULL) {
1847 		list_remove(iolhead, iol);
1848 		base = PIOL_DATABUF(iol);
1849 		size = iol->piol_usedsize;
1850 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1851 			error = uiomove(base + off, size - off,
1852 			    UIO_READ, uiop);
1853 		off = MAX(0, off - (offset_t)size);
1854 		kmem_free(iol, iol->piol_size);
1855 	}
1856 	list_destroy(iolhead);
1857 
1858 	return (error);
1859 }
1860 
1861 /*
1862  * Return an array of structures with memory map information.
1863  * We allocate here; the caller must deallocate.
1864  */
1865 int
prgetmap(proc_t * p,int reserved,list_t * iolhead)1866 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1867 {
1868 	struct as *as = p->p_as;
1869 	prmap_t *mp;
1870 	struct seg *seg;
1871 	struct seg *brkseg, *stkseg;
1872 	struct vnode *vp;
1873 	struct vattr vattr;
1874 	uint_t prot;
1875 
1876 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1877 
1878 	/*
1879 	 * Request an initial buffer size that doesn't waste memory
1880 	 * if the address space has only a small number of segments.
1881 	 */
1882 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1883 
1884 	if ((seg = AS_SEGFIRST(as)) == NULL)
1885 		return (0);
1886 
1887 	brkseg = break_seg(p);
1888 	stkseg = as_segat(as, prgetstackbase(p));
1889 
1890 	do {
1891 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1892 		caddr_t saddr, naddr;
1893 		void *tmp = NULL;
1894 
1895 		if ((seg->s_flags & S_HOLE) != 0) {
1896 			continue;
1897 		}
1898 
1899 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1900 			prot = pr_getprot(seg, reserved, &tmp,
1901 			    &saddr, &naddr, eaddr);
1902 			if (saddr == naddr)
1903 				continue;
1904 
1905 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1906 
1907 			mp->pr_vaddr = (uintptr_t)saddr;
1908 			mp->pr_size = naddr - saddr;
1909 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1910 			mp->pr_mflags = 0;
1911 			if (prot & PROT_READ)
1912 				mp->pr_mflags |= MA_READ;
1913 			if (prot & PROT_WRITE)
1914 				mp->pr_mflags |= MA_WRITE;
1915 			if (prot & PROT_EXEC)
1916 				mp->pr_mflags |= MA_EXEC;
1917 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1918 				mp->pr_mflags |= MA_SHARED;
1919 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1920 				mp->pr_mflags |= MA_NORESERVE;
1921 			if (seg->s_ops == &segspt_shmops ||
1922 			    (seg->s_ops == &segvn_ops &&
1923 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1924 				mp->pr_mflags |= MA_ANON;
1925 			if (seg == brkseg)
1926 				mp->pr_mflags |= MA_BREAK;
1927 			else if (seg == stkseg) {
1928 				mp->pr_mflags |= MA_STACK;
1929 				if (reserved) {
1930 					size_t maxstack =
1931 					    ((size_t)p->p_stk_ctl +
1932 					    PAGEOFFSET) & PAGEMASK;
1933 					mp->pr_vaddr =
1934 					    (uintptr_t)prgetstackbase(p) +
1935 					    p->p_stksize - maxstack;
1936 					mp->pr_size = (uintptr_t)naddr -
1937 					    mp->pr_vaddr;
1938 				}
1939 			}
1940 			if (seg->s_ops == &segspt_shmops)
1941 				mp->pr_mflags |= MA_ISM | MA_SHM;
1942 			mp->pr_pagesize = PAGESIZE;
1943 
1944 			/*
1945 			 * Manufacture a filename for the "object" directory.
1946 			 */
1947 			vattr.va_mask = AT_FSID|AT_NODEID;
1948 			if (seg->s_ops == &segvn_ops &&
1949 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1950 			    vp != NULL && vp->v_type == VREG &&
1951 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1952 				if (vp == p->p_exec)
1953 					(void) strcpy(mp->pr_mapname, "a.out");
1954 				else
1955 					pr_object_name(mp->pr_mapname,
1956 					    vp, &vattr);
1957 			}
1958 
1959 			/*
1960 			 * Get the SysV shared memory id, if any.
1961 			 */
1962 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1963 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1964 			    SHMID_NONE) {
1965 				if (mp->pr_shmid == SHMID_FREE)
1966 					mp->pr_shmid = -1;
1967 
1968 				mp->pr_mflags |= MA_SHM;
1969 			} else {
1970 				mp->pr_shmid = -1;
1971 			}
1972 		}
1973 		ASSERT(tmp == NULL);
1974 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1975 
1976 	return (0);
1977 }
1978 
1979 #ifdef _SYSCALL32_IMPL
1980 int
prgetmap32(proc_t * p,int reserved,list_t * iolhead)1981 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1982 {
1983 	struct as *as = p->p_as;
1984 	prmap32_t *mp;
1985 	struct seg *seg;
1986 	struct seg *brkseg, *stkseg;
1987 	struct vnode *vp;
1988 	struct vattr vattr;
1989 	uint_t prot;
1990 
1991 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1992 
1993 	/*
1994 	 * Request an initial buffer size that doesn't waste memory
1995 	 * if the address space has only a small number of segments.
1996 	 */
1997 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1998 
1999 	if ((seg = AS_SEGFIRST(as)) == NULL)
2000 		return (0);
2001 
2002 	brkseg = break_seg(p);
2003 	stkseg = as_segat(as, prgetstackbase(p));
2004 
2005 	do {
2006 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
2007 		caddr_t saddr, naddr;
2008 		void *tmp = NULL;
2009 
2010 		if ((seg->s_flags & S_HOLE) != 0) {
2011 			continue;
2012 		}
2013 
2014 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2015 			prot = pr_getprot(seg, reserved, &tmp,
2016 			    &saddr, &naddr, eaddr);
2017 			if (saddr == naddr)
2018 				continue;
2019 
2020 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
2021 
2022 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2023 			mp->pr_size = (size32_t)(naddr - saddr);
2024 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2025 			mp->pr_mflags = 0;
2026 			if (prot & PROT_READ)
2027 				mp->pr_mflags |= MA_READ;
2028 			if (prot & PROT_WRITE)
2029 				mp->pr_mflags |= MA_WRITE;
2030 			if (prot & PROT_EXEC)
2031 				mp->pr_mflags |= MA_EXEC;
2032 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2033 				mp->pr_mflags |= MA_SHARED;
2034 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2035 				mp->pr_mflags |= MA_NORESERVE;
2036 			if (seg->s_ops == &segspt_shmops ||
2037 			    (seg->s_ops == &segvn_ops &&
2038 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2039 				mp->pr_mflags |= MA_ANON;
2040 			if (seg == brkseg)
2041 				mp->pr_mflags |= MA_BREAK;
2042 			else if (seg == stkseg) {
2043 				mp->pr_mflags |= MA_STACK;
2044 				if (reserved) {
2045 					size_t maxstack =
2046 					    ((size_t)p->p_stk_ctl +
2047 					    PAGEOFFSET) & PAGEMASK;
2048 					uintptr_t vaddr =
2049 					    (uintptr_t)prgetstackbase(p) +
2050 					    p->p_stksize - maxstack;
2051 					mp->pr_vaddr = (caddr32_t)vaddr;
2052 					mp->pr_size = (size32_t)
2053 					    ((uintptr_t)naddr - vaddr);
2054 				}
2055 			}
2056 			if (seg->s_ops == &segspt_shmops)
2057 				mp->pr_mflags |= MA_ISM | MA_SHM;
2058 			mp->pr_pagesize = PAGESIZE;
2059 
2060 			/*
2061 			 * Manufacture a filename for the "object" directory.
2062 			 */
2063 			vattr.va_mask = AT_FSID|AT_NODEID;
2064 			if (seg->s_ops == &segvn_ops &&
2065 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2066 			    vp != NULL && vp->v_type == VREG &&
2067 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2068 				if (vp == p->p_exec)
2069 					(void) strcpy(mp->pr_mapname, "a.out");
2070 				else
2071 					pr_object_name(mp->pr_mapname,
2072 					    vp, &vattr);
2073 			}
2074 
2075 			/*
2076 			 * Get the SysV shared memory id, if any.
2077 			 */
2078 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
2079 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
2080 			    SHMID_NONE) {
2081 				if (mp->pr_shmid == SHMID_FREE)
2082 					mp->pr_shmid = -1;
2083 
2084 				mp->pr_mflags |= MA_SHM;
2085 			} else {
2086 				mp->pr_shmid = -1;
2087 			}
2088 		}
2089 		ASSERT(tmp == NULL);
2090 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2091 
2092 	return (0);
2093 }
2094 #endif	/* _SYSCALL32_IMPL */
2095 
2096 /*
2097  * Return the size of the /proc page data file.
2098  */
2099 size_t
prpdsize(struct as * as)2100 prpdsize(struct as *as)
2101 {
2102 	struct seg *seg;
2103 	size_t size;
2104 
2105 	ASSERT(as != &kas && AS_WRITE_HELD(as));
2106 
2107 	if ((seg = AS_SEGFIRST(as)) == NULL)
2108 		return (0);
2109 
2110 	size = sizeof (prpageheader_t);
2111 	do {
2112 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2113 		caddr_t saddr, naddr;
2114 		void *tmp = NULL;
2115 		size_t npage;
2116 
2117 		if ((seg->s_flags & S_HOLE) != 0) {
2118 			continue;
2119 		}
2120 
2121 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2122 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2123 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
2124 				size += sizeof (prasmap_t) + round8(npage);
2125 		}
2126 		ASSERT(tmp == NULL);
2127 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2128 
2129 	return (size);
2130 }
2131 
2132 #ifdef _SYSCALL32_IMPL
2133 size_t
prpdsize32(struct as * as)2134 prpdsize32(struct as *as)
2135 {
2136 	struct seg *seg;
2137 	size_t size;
2138 
2139 	ASSERT(as != &kas && AS_WRITE_HELD(as));
2140 
2141 	if ((seg = AS_SEGFIRST(as)) == NULL)
2142 		return (0);
2143 
2144 	size = sizeof (prpageheader32_t);
2145 	do {
2146 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2147 		caddr_t saddr, naddr;
2148 		void *tmp = NULL;
2149 		size_t npage;
2150 
2151 		if ((seg->s_flags & S_HOLE) != 0) {
2152 			continue;
2153 		}
2154 
2155 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2156 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2157 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
2158 				size += sizeof (prasmap32_t) + round8(npage);
2159 		}
2160 		ASSERT(tmp == NULL);
2161 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2162 
2163 	return (size);
2164 }
2165 #endif	/* _SYSCALL32_IMPL */
2166 
2167 /*
2168  * Read page data information.
2169  */
2170 int
prpdread(proc_t * p,uint_t hatid,struct uio * uiop)2171 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
2172 {
2173 	struct as *as = p->p_as;
2174 	caddr_t buf;
2175 	size_t size;
2176 	prpageheader_t *php;
2177 	prasmap_t *pmp;
2178 	struct seg *seg;
2179 	int error;
2180 
2181 again:
2182 	AS_LOCK_ENTER(as, RW_WRITER);
2183 
2184 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2185 		AS_LOCK_EXIT(as);
2186 		return (0);
2187 	}
2188 	size = prpdsize(as);
2189 	if (uiop->uio_resid < size) {
2190 		AS_LOCK_EXIT(as);
2191 		return (E2BIG);
2192 	}
2193 
2194 	buf = kmem_zalloc(size, KM_SLEEP);
2195 	php = (prpageheader_t *)buf;
2196 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
2197 
2198 	hrt2ts(gethrtime(), &php->pr_tstamp);
2199 	php->pr_nmap = 0;
2200 	php->pr_npage = 0;
2201 	do {
2202 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2203 		caddr_t saddr, naddr;
2204 		void *tmp = NULL;
2205 
2206 		if ((seg->s_flags & S_HOLE) != 0) {
2207 			continue;
2208 		}
2209 
2210 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2211 			struct vnode *vp;
2212 			struct vattr vattr;
2213 			size_t len;
2214 			size_t npage;
2215 			uint_t prot;
2216 			uintptr_t next;
2217 
2218 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2219 			if ((len = (size_t)(naddr - saddr)) == 0)
2220 				continue;
2221 			npage = len / PAGESIZE;
2222 			next = (uintptr_t)(pmp + 1) + round8(npage);
2223 			/*
2224 			 * It's possible that the address space can change
2225 			 * subtlely even though we're holding as->a_lock
2226 			 * due to the nondeterminism of page_exists() in
2227 			 * the presence of asychronously flushed pages or
2228 			 * mapped files whose sizes are changing.
2229 			 * page_exists() may be called indirectly from
2230 			 * pr_getprot() by a SEGOP_INCORE() routine.
2231 			 * If this happens we need to make sure we don't
2232 			 * overrun the buffer whose size we computed based
2233 			 * on the initial iteration through the segments.
2234 			 * Once we've detected an overflow, we need to clean
2235 			 * up the temporary memory allocated in pr_getprot()
2236 			 * and retry. If there's a pending signal, we return
2237 			 * EINTR so that this thread can be dislodged if
2238 			 * a latent bug causes us to spin indefinitely.
2239 			 */
2240 			if (next > (uintptr_t)buf + size) {
2241 				pr_getprot_done(&tmp);
2242 				AS_LOCK_EXIT(as);
2243 
2244 				kmem_free(buf, size);
2245 
2246 				if (ISSIG(curthread, JUSTLOOKING))
2247 					return (EINTR);
2248 
2249 				goto again;
2250 			}
2251 
2252 			php->pr_nmap++;
2253 			php->pr_npage += npage;
2254 			pmp->pr_vaddr = (uintptr_t)saddr;
2255 			pmp->pr_npage = npage;
2256 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2257 			pmp->pr_mflags = 0;
2258 			if (prot & PROT_READ)
2259 				pmp->pr_mflags |= MA_READ;
2260 			if (prot & PROT_WRITE)
2261 				pmp->pr_mflags |= MA_WRITE;
2262 			if (prot & PROT_EXEC)
2263 				pmp->pr_mflags |= MA_EXEC;
2264 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2265 				pmp->pr_mflags |= MA_SHARED;
2266 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2267 				pmp->pr_mflags |= MA_NORESERVE;
2268 			if (seg->s_ops == &segspt_shmops ||
2269 			    (seg->s_ops == &segvn_ops &&
2270 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2271 				pmp->pr_mflags |= MA_ANON;
2272 			if (seg->s_ops == &segspt_shmops)
2273 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2274 			pmp->pr_pagesize = PAGESIZE;
2275 			/*
2276 			 * Manufacture a filename for the "object" directory.
2277 			 */
2278 			vattr.va_mask = AT_FSID|AT_NODEID;
2279 			if (seg->s_ops == &segvn_ops &&
2280 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2281 			    vp != NULL && vp->v_type == VREG &&
2282 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2283 				if (vp == p->p_exec)
2284 					(void) strcpy(pmp->pr_mapname, "a.out");
2285 				else
2286 					pr_object_name(pmp->pr_mapname,
2287 					    vp, &vattr);
2288 			}
2289 
2290 			/*
2291 			 * Get the SysV shared memory id, if any.
2292 			 */
2293 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2294 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2295 			    SHMID_NONE) {
2296 				if (pmp->pr_shmid == SHMID_FREE)
2297 					pmp->pr_shmid = -1;
2298 
2299 				pmp->pr_mflags |= MA_SHM;
2300 			} else {
2301 				pmp->pr_shmid = -1;
2302 			}
2303 
2304 			hat_getstat(as, saddr, len, hatid,
2305 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2306 			pmp = (prasmap_t *)next;
2307 		}
2308 		ASSERT(tmp == NULL);
2309 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2310 
2311 	AS_LOCK_EXIT(as);
2312 
2313 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2314 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2315 	kmem_free(buf, size);
2316 
2317 	return (error);
2318 }
2319 
2320 #ifdef _SYSCALL32_IMPL
2321 int
prpdread32(proc_t * p,uint_t hatid,struct uio * uiop)2322 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2323 {
2324 	struct as *as = p->p_as;
2325 	caddr_t buf;
2326 	size_t size;
2327 	prpageheader32_t *php;
2328 	prasmap32_t *pmp;
2329 	struct seg *seg;
2330 	int error;
2331 
2332 again:
2333 	AS_LOCK_ENTER(as, RW_WRITER);
2334 
2335 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2336 		AS_LOCK_EXIT(as);
2337 		return (0);
2338 	}
2339 	size = prpdsize32(as);
2340 	if (uiop->uio_resid < size) {
2341 		AS_LOCK_EXIT(as);
2342 		return (E2BIG);
2343 	}
2344 
2345 	buf = kmem_zalloc(size, KM_SLEEP);
2346 	php = (prpageheader32_t *)buf;
2347 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2348 
2349 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2350 	php->pr_nmap = 0;
2351 	php->pr_npage = 0;
2352 	do {
2353 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2354 		caddr_t saddr, naddr;
2355 		void *tmp = NULL;
2356 
2357 		if ((seg->s_flags & S_HOLE) != 0) {
2358 			continue;
2359 		}
2360 
2361 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2362 			struct vnode *vp;
2363 			struct vattr vattr;
2364 			size_t len;
2365 			size_t npage;
2366 			uint_t prot;
2367 			uintptr_t next;
2368 
2369 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2370 			if ((len = (size_t)(naddr - saddr)) == 0)
2371 				continue;
2372 			npage = len / PAGESIZE;
2373 			next = (uintptr_t)(pmp + 1) + round8(npage);
2374 			/*
2375 			 * It's possible that the address space can change
2376 			 * subtlely even though we're holding as->a_lock
2377 			 * due to the nondeterminism of page_exists() in
2378 			 * the presence of asychronously flushed pages or
2379 			 * mapped files whose sizes are changing.
2380 			 * page_exists() may be called indirectly from
2381 			 * pr_getprot() by a SEGOP_INCORE() routine.
2382 			 * If this happens we need to make sure we don't
2383 			 * overrun the buffer whose size we computed based
2384 			 * on the initial iteration through the segments.
2385 			 * Once we've detected an overflow, we need to clean
2386 			 * up the temporary memory allocated in pr_getprot()
2387 			 * and retry. If there's a pending signal, we return
2388 			 * EINTR so that this thread can be dislodged if
2389 			 * a latent bug causes us to spin indefinitely.
2390 			 */
2391 			if (next > (uintptr_t)buf + size) {
2392 				pr_getprot_done(&tmp);
2393 				AS_LOCK_EXIT(as);
2394 
2395 				kmem_free(buf, size);
2396 
2397 				if (ISSIG(curthread, JUSTLOOKING))
2398 					return (EINTR);
2399 
2400 				goto again;
2401 			}
2402 
2403 			php->pr_nmap++;
2404 			php->pr_npage += npage;
2405 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2406 			pmp->pr_npage = (size32_t)npage;
2407 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2408 			pmp->pr_mflags = 0;
2409 			if (prot & PROT_READ)
2410 				pmp->pr_mflags |= MA_READ;
2411 			if (prot & PROT_WRITE)
2412 				pmp->pr_mflags |= MA_WRITE;
2413 			if (prot & PROT_EXEC)
2414 				pmp->pr_mflags |= MA_EXEC;
2415 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2416 				pmp->pr_mflags |= MA_SHARED;
2417 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2418 				pmp->pr_mflags |= MA_NORESERVE;
2419 			if (seg->s_ops == &segspt_shmops ||
2420 			    (seg->s_ops == &segvn_ops &&
2421 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2422 				pmp->pr_mflags |= MA_ANON;
2423 			if (seg->s_ops == &segspt_shmops)
2424 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2425 			pmp->pr_pagesize = PAGESIZE;
2426 			/*
2427 			 * Manufacture a filename for the "object" directory.
2428 			 */
2429 			vattr.va_mask = AT_FSID|AT_NODEID;
2430 			if (seg->s_ops == &segvn_ops &&
2431 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2432 			    vp != NULL && vp->v_type == VREG &&
2433 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2434 				if (vp == p->p_exec)
2435 					(void) strcpy(pmp->pr_mapname, "a.out");
2436 				else
2437 					pr_object_name(pmp->pr_mapname,
2438 					    vp, &vattr);
2439 			}
2440 
2441 			/*
2442 			 * Get the SysV shared memory id, if any.
2443 			 */
2444 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2445 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2446 			    SHMID_NONE) {
2447 				if (pmp->pr_shmid == SHMID_FREE)
2448 					pmp->pr_shmid = -1;
2449 
2450 				pmp->pr_mflags |= MA_SHM;
2451 			} else {
2452 				pmp->pr_shmid = -1;
2453 			}
2454 
2455 			hat_getstat(as, saddr, len, hatid,
2456 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2457 			pmp = (prasmap32_t *)next;
2458 		}
2459 		ASSERT(tmp == NULL);
2460 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2461 
2462 	AS_LOCK_EXIT(as);
2463 
2464 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2465 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2466 	kmem_free(buf, size);
2467 
2468 	return (error);
2469 }
2470 #endif	/* _SYSCALL32_IMPL */
2471 
2472 ushort_t
prgetpctcpu(uint64_t pct)2473 prgetpctcpu(uint64_t pct)
2474 {
2475 	/*
2476 	 * The value returned will be relevant in the zone of the examiner,
2477 	 * which may not be the same as the zone which performed the procfs
2478 	 * mount.
2479 	 */
2480 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2481 
2482 	/*
2483 	 * Prorate over online cpus so we don't exceed 100%
2484 	 */
2485 	if (nonline > 1)
2486 		pct /= nonline;
2487 	pct >>= 16;		/* convert to 16-bit scaled integer */
2488 	if (pct > 0x8000)	/* might happen, due to rounding */
2489 		pct = 0x8000;
2490 	return ((ushort_t)pct);
2491 }
2492 
2493 /*
2494  * Return information used by ps(1).
2495  */
2496 void
prgetpsinfo(proc_t * p,psinfo_t * psp)2497 prgetpsinfo(proc_t *p, psinfo_t *psp)
2498 {
2499 	kthread_t *t;
2500 	struct cred *cred;
2501 	hrtime_t hrutime, hrstime;
2502 
2503 	ASSERT(MUTEX_HELD(&p->p_lock));
2504 
2505 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2506 		bzero(psp, sizeof (*psp));
2507 	else {
2508 		thread_unlock(t);
2509 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2510 	}
2511 
2512 	/*
2513 	 * only export SSYS and SMSACCT; everything else is off-limits to
2514 	 * userland apps.
2515 	 */
2516 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2517 	psp->pr_nlwp = p->p_lwpcnt;
2518 	psp->pr_nzomb = p->p_zombcnt;
2519 	mutex_enter(&p->p_crlock);
2520 	cred = p->p_cred;
2521 	psp->pr_uid = crgetruid(cred);
2522 	psp->pr_euid = crgetuid(cred);
2523 	psp->pr_gid = crgetrgid(cred);
2524 	psp->pr_egid = crgetgid(cred);
2525 	mutex_exit(&p->p_crlock);
2526 	psp->pr_pid = p->p_pid;
2527 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2528 	    (p->p_flag & SZONETOP)) {
2529 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2530 		/*
2531 		 * Inside local zones, fake zsched's pid as parent pids for
2532 		 * processes which reference processes outside of the zone.
2533 		 */
2534 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2535 	} else {
2536 		psp->pr_ppid = p->p_ppid;
2537 	}
2538 	psp->pr_pgid = p->p_pgrp;
2539 	psp->pr_sid = p->p_sessp->s_sid;
2540 	psp->pr_taskid = p->p_task->tk_tkid;
2541 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2542 	psp->pr_poolid = p->p_pool->pool_id;
2543 	psp->pr_zoneid = p->p_zone->zone_id;
2544 	if ((psp->pr_contract = PRCTID(p)) == 0)
2545 		psp->pr_contract = -1;
2546 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2547 	switch (p->p_model) {
2548 	case DATAMODEL_ILP32:
2549 		psp->pr_dmodel = PR_MODEL_ILP32;
2550 		break;
2551 	case DATAMODEL_LP64:
2552 		psp->pr_dmodel = PR_MODEL_LP64;
2553 		break;
2554 	}
2555 	hrutime = mstate_aggr_state(p, LMS_USER);
2556 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2557 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2558 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2559 
2560 	if (t == NULL) {
2561 		int wcode = p->p_wcode;		/* must be atomic read */
2562 
2563 		if (wcode)
2564 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2565 		psp->pr_ttydev = PRNODEV;
2566 		psp->pr_lwp.pr_state = SZOMB;
2567 		psp->pr_lwp.pr_sname = 'Z';
2568 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2569 		psp->pr_lwp.pr_bindpset = PS_NONE;
2570 	} else {
2571 		user_t *up = PTOU(p);
2572 		struct as *as;
2573 		dev_t d;
2574 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2575 
2576 		d = cttydev(p);
2577 		/*
2578 		 * If the controlling terminal is the real
2579 		 * or workstation console device, map to what the
2580 		 * user thinks is the console device. Handle case when
2581 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2582 		 */
2583 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2584 			d = uconsdev;
2585 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2586 		psp->pr_start = up->u_start;
2587 		bcopy(up->u_comm, psp->pr_fname,
2588 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2589 		bcopy(up->u_psargs, psp->pr_psargs,
2590 		    MIN(PRARGSZ-1, PSARGSZ));
2591 		psp->pr_argc = up->u_argc;
2592 		psp->pr_argv = up->u_argv;
2593 		psp->pr_envp = up->u_envp;
2594 
2595 		/* get the chosen lwp's lwpsinfo */
2596 		prgetlwpsinfo(t, &psp->pr_lwp);
2597 
2598 		/* compute %cpu for the process */
2599 		if (p->p_lwpcnt == 1)
2600 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2601 		else {
2602 			uint64_t pct = 0;
2603 			hrtime_t cur_time = gethrtime_unscaled();
2604 
2605 			t = p->p_tlist;
2606 			do {
2607 				pct += cpu_update_pct(t, cur_time);
2608 			} while ((t = t->t_forw) != p->p_tlist);
2609 
2610 			psp->pr_pctcpu = prgetpctcpu(pct);
2611 		}
2612 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2613 			psp->pr_size = 0;
2614 			psp->pr_rssize = 0;
2615 		} else {
2616 			mutex_exit(&p->p_lock);
2617 			AS_LOCK_ENTER(as, RW_READER);
2618 			psp->pr_size = btopr(as->a_resvsize) *
2619 			    (PAGESIZE / 1024);
2620 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2621 			psp->pr_pctmem = rm_pctmemory(as);
2622 			AS_LOCK_EXIT(as);
2623 			mutex_enter(&p->p_lock);
2624 		}
2625 	}
2626 }
2627 
2628 static size_t
prfdinfomisc(list_t * data,uint_t type,const void * val,size_t vlen)2629 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen)
2630 {
2631 	pr_misc_header_t *misc;
2632 	size_t len;
2633 
2634 	len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2635 
2636 	if (data != NULL) {
2637 		misc = pr_iol_newbuf(data, len);
2638 		misc->pr_misc_type = type;
2639 		misc->pr_misc_size = len;
2640 		misc++;
2641 		bcopy((char *)val, (char *)misc, vlen);
2642 	}
2643 
2644 	return (len);
2645 }
2646 
2647 /*
2648  * There's no elegant way to determine if a character device
2649  * supports TLI, so just check a hardcoded list of known TLI
2650  * devices.
2651  */
2652 
2653 static boolean_t
pristli(vnode_t * vp)2654 pristli(vnode_t *vp)
2655 {
2656 	static const char *tlidevs[] = {
2657 	    "udp", "udp6", "tcp", "tcp6"
2658 	};
2659 	char *devname;
2660 	uint_t i;
2661 
2662 	ASSERT(vp != NULL);
2663 
2664 	if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0)
2665 		return (B_FALSE);
2666 
2667 	if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL)
2668 		return (B_FALSE);
2669 
2670 	for (i = 0; i < ARRAY_SIZE(tlidevs); i++) {
2671 		if (strcmp(devname, tlidevs[i]) == 0)
2672 			return (B_TRUE);
2673 	}
2674 
2675 	return (B_FALSE);
2676 }
2677 
2678 static size_t
prfdinfopath(proc_t * p,vnode_t * vp,list_t * data,cred_t * cred)2679 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
2680 {
2681 	char *pathname;
2682 	size_t pathlen;
2683 	size_t sz = 0;
2684 
2685 	/*
2686 	 * The global zone's path to a file in a non-global zone can exceed
2687 	 * MAXPATHLEN.
2688 	 */
2689 	pathlen = MAXPATHLEN * 2 + 1;
2690 	pathname = kmem_alloc(pathlen, KM_SLEEP);
2691 
2692 	if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
2693 		sz += prfdinfomisc(data, PR_PATHNAME,
2694 		    pathname, strlen(pathname) + 1);
2695 	}
2696 
2697 	kmem_free(pathname, pathlen);
2698 
2699 	return (sz);
2700 }
2701 
2702 static size_t
prfdinfotlisockopt(vnode_t * vp,list_t * data,cred_t * cred)2703 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
2704 {
2705 	strcmd_t strcmd;
2706 	int32_t rval;
2707 	size_t sz = 0;
2708 
2709 	strcmd.sc_cmd = TI_GETMYNAME;
2710 	strcmd.sc_timeout = 1;
2711 	strcmd.sc_len = STRCMDBUFSIZE;
2712 
2713 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2714 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2715 		sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
2716 		    strcmd.sc_len);
2717 	}
2718 
2719 	strcmd.sc_cmd = TI_GETPEERNAME;
2720 	strcmd.sc_timeout = 1;
2721 	strcmd.sc_len = STRCMDBUFSIZE;
2722 
2723 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2724 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2725 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
2726 		    strcmd.sc_len);
2727 	}
2728 
2729 	return (sz);
2730 }
2731 
2732 static size_t
prfdinfosockopt(vnode_t * vp,list_t * data,cred_t * cred)2733 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
2734 {
2735 	sonode_t *so;
2736 	socklen_t vlen;
2737 	size_t sz = 0;
2738 	uint_t i;
2739 
2740 	if (vp->v_stream != NULL) {
2741 		so = VTOSO(vp->v_stream->sd_vnode);
2742 
2743 		if (so->so_version == SOV_STREAM)
2744 			so = NULL;
2745 	} else {
2746 		so = VTOSO(vp);
2747 	}
2748 
2749 	if (so == NULL)
2750 		return (0);
2751 
2752 	DTRACE_PROBE1(sonode, sonode_t *, so);
2753 
2754 	/* prmisc - PR_SOCKETNAME */
2755 
2756 	struct sockaddr_storage buf;
2757 	struct sockaddr *name = (struct sockaddr *)&buf;
2758 
2759 	vlen = sizeof (buf);
2760 	if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
2761 		sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);
2762 
2763 	/* prmisc - PR_PEERSOCKNAME */
2764 
2765 	vlen = sizeof (buf);
2766 	if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
2767 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);
2768 
2769 	/* prmisc - PR_SOCKOPTS_BOOL_OPTS */
2770 
2771 	static struct boolopt {
2772 		int		level;
2773 		int		opt;
2774 		int		bopt;
2775 	} boolopts[] = {
2776 		{ SOL_SOCKET, SO_DEBUG,		PR_SO_DEBUG },
2777 		{ SOL_SOCKET, SO_REUSEADDR,	PR_SO_REUSEADDR },
2778 #ifdef SO_REUSEPORT
2779 		/* SmartOS and OmniOS have SO_REUSEPORT */
2780 		{ SOL_SOCKET, SO_REUSEPORT,	PR_SO_REUSEPORT },
2781 #endif
2782 		{ SOL_SOCKET, SO_KEEPALIVE,	PR_SO_KEEPALIVE },
2783 		{ SOL_SOCKET, SO_DONTROUTE,	PR_SO_DONTROUTE },
2784 		{ SOL_SOCKET, SO_BROADCAST,	PR_SO_BROADCAST },
2785 		{ SOL_SOCKET, SO_OOBINLINE,	PR_SO_OOBINLINE },
2786 		{ SOL_SOCKET, SO_DGRAM_ERRIND,	PR_SO_DGRAM_ERRIND },
2787 		{ SOL_SOCKET, SO_ALLZONES,	PR_SO_ALLZONES },
2788 		{ SOL_SOCKET, SO_MAC_EXEMPT,	PR_SO_MAC_EXEMPT },
2789 		{ SOL_SOCKET, SO_MAC_IMPLICIT,	PR_SO_MAC_IMPLICIT },
2790 		{ SOL_SOCKET, SO_EXCLBIND,	PR_SO_EXCLBIND },
2791 		{ SOL_SOCKET, SO_VRRP,		PR_SO_VRRP },
2792 		{ IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
2793 		    PR_UDP_NAT_T_ENDPOINT }
2794 	};
2795 	prsockopts_bool_opts_t opts;
2796 	int val;
2797 
2798 	if (data != NULL) {
2799 		opts.prsock_bool_opts = 0;
2800 
2801 		for (i = 0; i < ARRAY_SIZE(boolopts); i++) {
2802 			vlen = sizeof (val);
2803 			if (SOP_GETSOCKOPT(so, boolopts[i].level,
2804 			    boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
2805 			    val != 0) {
2806 				opts.prsock_bool_opts |= boolopts[i].bopt;
2807 			}
2808 		}
2809 	}
2810 
2811 	sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));
2812 
2813 	/* prmisc - PR_SOCKOPT_LINGER */
2814 
2815 	struct linger l;
2816 
2817 	vlen = sizeof (l);
2818 	if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
2819 	    0, cred) == 0 && vlen > 0) {
2820 		sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
2821 	}
2822 
2823 	/* prmisc - PR_SOCKOPT_* int types */
2824 
2825 	static struct sopt {
2826 		int		level;
2827 		int		opt;
2828 		int		bopt;
2829 	} sopts[] = {
2830 		{ SOL_SOCKET, SO_TYPE,		PR_SOCKOPT_TYPE },
2831 		{ SOL_SOCKET, SO_SNDBUF,	PR_SOCKOPT_SNDBUF },
2832 		{ SOL_SOCKET, SO_RCVBUF,	PR_SOCKOPT_RCVBUF }
2833 	};
2834 
2835 	for (i = 0; i < ARRAY_SIZE(sopts); i++) {
2836 		vlen = sizeof (val);
2837 		if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
2838 		    &val, &vlen, 0, cred) == 0 && vlen > 0) {
2839 			sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
2840 		}
2841 	}
2842 
2843 	/* prmisc - PR_SOCKOPT_IP_NEXTHOP */
2844 
2845 	in_addr_t nexthop_val;
2846 
2847 	vlen = sizeof (nexthop_val);
2848 	if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
2849 	    &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
2850 		sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
2851 		    &nexthop_val, vlen);
2852 	}
2853 
2854 	/* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */
2855 
2856 	struct sockaddr_in6 nexthop6_val;
2857 
2858 	vlen = sizeof (nexthop6_val);
2859 	if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
2860 	    &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
2861 		sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
2862 		    &nexthop6_val, vlen);
2863 	}
2864 
2865 	/* prmisc - PR_SOCKOPT_TCP_CONGESTION */
2866 
2867 	char cong[CC_ALGO_NAME_MAX];
2868 
2869 	vlen = sizeof (cong);
2870 	if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
2871 	    &cong, &vlen, 0, cred) == 0 && vlen > 0) {
2872 		sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
2873 	}
2874 
2875 	/* prmisc - PR_SOCKFILTERS_PRIV */
2876 
2877 	struct fil_info fi;
2878 
2879 	vlen = sizeof (fi);
2880 	if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2881 	    &fi, &vlen, 0, cred) == 0 && vlen != 0) {
2882 		pr_misc_header_t *misc;
2883 		size_t len;
2884 
2885 		/*
2886 		 * We limit the number of returned filters to 32.
2887 		 * This is the maximum number that pfiles will print
2888 		 * anyway.
2889 		 */
2890 		vlen = MIN(32, fi.fi_pos + 1);
2891 		vlen *= sizeof (fi);
2892 
2893 		len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2894 		sz += len;
2895 
2896 		if (data != NULL) {
2897 			/*
2898 			 * So that the filter list can be built incrementally,
2899 			 * prfdinfomisc() is not used here. Instead we
2900 			 * allocate a buffer directly on the copyout list using
2901 			 * pr_iol_newbuf()
2902 			 */
2903 			misc = pr_iol_newbuf(data, len);
2904 			misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
2905 			misc->pr_misc_size = len;
2906 			misc++;
2907 			len = vlen;
2908 			if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2909 			    misc, &vlen, 0, cred) == 0) {
2910 				/*
2911 				 * In case the number of filters has reduced
2912 				 * since the first call, explicitly zero out
2913 				 * any unpopulated space.
2914 				 */
2915 				if (vlen < len)
2916 					bzero(misc + vlen, len - vlen);
2917 			} else {
2918 				/* Something went wrong, zero out the result */
2919 				bzero(misc, vlen);
2920 			}
2921 		}
2922 	}
2923 
2924 	return (sz);
2925 }
2926 
2927 typedef struct prfdinfo_nm_path_cbdata {
2928 	proc_t		*nmp_p;
2929 	u_offset_t	nmp_sz;
2930 	list_t		*nmp_data;
2931 } prfdinfo_nm_path_cbdata_t;
2932 
2933 static int
prfdinfo_nm_path(const struct namenode * np,cred_t * cred,void * arg)2934 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg)
2935 {
2936 	prfdinfo_nm_path_cbdata_t *cb = arg;
2937 
2938 	cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred);
2939 
2940 	return (0);
2941 }
2942 
2943 u_offset_t
prgetfdinfosize(proc_t * p,vnode_t * vp,cred_t * cred)2944 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
2945 {
2946 	u_offset_t sz;
2947 
2948 	/*
2949 	 * All fdinfo files will be at least this big -
2950 	 * sizeof fdinfo struct + zero length trailer
2951 	 */
2952 	sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t);
2953 
2954 	/* Pathname */
2955 	switch (vp->v_type) {
2956 	case VDOOR: {
2957 		prfdinfo_nm_path_cbdata_t cb = {
2958 			.nmp_p		= p,
2959 			.nmp_data	= NULL,
2960 			.nmp_sz		= 0
2961 		};
2962 
2963 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2964 		sz += cb.nmp_sz;
2965 		break;
2966 	}
2967 	case VSOCK:
2968 		break;
2969 	default:
2970 		sz += prfdinfopath(p, vp, NULL, cred);
2971 	}
2972 
2973 	/* Socket options */
2974 	if (vp->v_type == VSOCK)
2975 		sz += prfdinfosockopt(vp, NULL, cred);
2976 
2977 	/* TLI/XTI sockets */
2978 	if (pristli(vp))
2979 		sz += prfdinfotlisockopt(vp, NULL, cred);
2980 
2981 	return (sz);
2982 }
2983 
2984 int
prgetfdinfo(proc_t * p,vnode_t * vp,prfdinfo_t * fdinfo,cred_t * cred,cred_t * file_cred,list_t * data)2985 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
2986     cred_t *file_cred, list_t *data)
2987 {
2988 	vattr_t vattr;
2989 	int error;
2990 
2991 	/*
2992 	 * The buffer has been initialised to zero by pr_iol_newbuf().
2993 	 * Initialise defaults for any values that should not default to zero.
2994 	 */
2995 	fdinfo->pr_uid = (uid_t)-1;
2996 	fdinfo->pr_gid = (gid_t)-1;
2997 	fdinfo->pr_size = -1;
2998 	fdinfo->pr_locktype = F_UNLCK;
2999 	fdinfo->pr_lockpid = -1;
3000 	fdinfo->pr_locksysid = -1;
3001 	fdinfo->pr_peerpid = -1;
3002 
3003 	/* Offset */
3004 
3005 	/*
3006 	 * pr_offset has already been set from the underlying file_t.
3007 	 * Check if it is plausible and reset to -1 if not.
3008 	 */
3009 	if (fdinfo->pr_offset != -1 &&
3010 	    VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0)
3011 		fdinfo->pr_offset = -1;
3012 
3013 	/*
3014 	 * Attributes
3015 	 *
3016 	 * We have two cred_t structures available here.
3017 	 * 'cred' is the caller's credential, and 'file_cred' is the credential
3018 	 * for the file being inspected.
3019 	 *
3020 	 * When looking up the file attributes, file_cred is used in order
3021 	 * that the correct ownership is set for doors and FIFOs. Since the
3022 	 * caller has permission to read the fdinfo file in proc, this does
3023 	 * not expose any additional information.
3024 	 */
3025 	vattr.va_mask = AT_STAT;
3026 	if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) {
3027 		fdinfo->pr_major = getmajor(vattr.va_fsid);
3028 		fdinfo->pr_minor = getminor(vattr.va_fsid);
3029 		fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
3030 		fdinfo->pr_rminor = getminor(vattr.va_rdev);
3031 		fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
3032 		fdinfo->pr_size = (off64_t)vattr.va_size;
3033 		fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
3034 		fdinfo->pr_uid = vattr.va_uid;
3035 		fdinfo->pr_gid = vattr.va_gid;
3036 		if (vp->v_type == VSOCK)
3037 			fdinfo->pr_fileflags |= sock_getfasync(vp);
3038 	}
3039 
3040 	/* locks */
3041 
3042 	flock64_t bf;
3043 
3044 	bzero(&bf, sizeof (bf));
3045 	bf.l_type = F_WRLCK;
3046 
3047 	if (VOP_FRLOCK(vp, F_GETLK, &bf,
3048 	    (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL,
3049 	    cred, NULL) == 0 && bf.l_type != F_UNLCK) {
3050 		fdinfo->pr_locktype = bf.l_type;
3051 		fdinfo->pr_lockpid = bf.l_pid;
3052 		fdinfo->pr_locksysid = bf.l_sysid;
3053 	}
3054 
3055 	/* peer cred */
3056 
3057 	k_peercred_t kpc;
3058 
3059 	switch (vp->v_type) {
3060 	case VFIFO:
3061 	case VSOCK: {
3062 		int32_t rval;
3063 
3064 		error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc,
3065 		    FKIOCTL, cred, &rval, NULL);
3066 		break;
3067 	}
3068 	case VCHR: {
3069 		struct strioctl strioc;
3070 		int32_t rval;
3071 
3072 		if (vp->v_stream == NULL) {
3073 			error = ENOTSUP;
3074 			break;
3075 		}
3076 		strioc.ic_cmd = _I_GETPEERCRED;
3077 		strioc.ic_timout = INFTIM;
3078 		strioc.ic_len = (int)sizeof (k_peercred_t);
3079 		strioc.ic_dp = (char *)&kpc;
3080 
3081 		error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL,
3082 		    STR_NOSIG | K_TO_K, cred, &rval);
3083 		break;
3084 	}
3085 	default:
3086 		error = ENOTSUP;
3087 		break;
3088 	}
3089 
3090 	if (error == 0 && kpc.pc_cr != NULL) {
3091 		proc_t *peerp;
3092 
3093 		fdinfo->pr_peerpid = kpc.pc_cpid;
3094 
3095 		crfree(kpc.pc_cr);
3096 
3097 		mutex_enter(&pidlock);
3098 		if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) {
3099 			user_t *up;
3100 
3101 			mutex_enter(&peerp->p_lock);
3102 			mutex_exit(&pidlock);
3103 
3104 			up = PTOU(peerp);
3105 			bcopy(up->u_comm, fdinfo->pr_peername,
3106 			    MIN(sizeof (up->u_comm),
3107 			    sizeof (fdinfo->pr_peername) - 1));
3108 
3109 			mutex_exit(&peerp->p_lock);
3110 		} else {
3111 			mutex_exit(&pidlock);
3112 		}
3113 	}
3114 
3115 	/* pathname */
3116 
3117 	switch (vp->v_type) {
3118 	case VDOOR: {
3119 		prfdinfo_nm_path_cbdata_t cb = {
3120 			.nmp_p		= p,
3121 			.nmp_data	= data,
3122 			.nmp_sz		= 0
3123 		};
3124 
3125 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
3126 		break;
3127 	}
3128 	case VSOCK:
3129 		/*
3130 		 * Don't attempt to determine the path for a socket as the
3131 		 * vnode has no associated v_path. It will cause a linear scan
3132 		 * of the dnlc table and result in no path being found.
3133 		 */
3134 		break;
3135 	default:
3136 		(void) prfdinfopath(p, vp, data, cred);
3137 	}
3138 
3139 	/* socket options */
3140 	if (vp->v_type == VSOCK)
3141 		(void) prfdinfosockopt(vp, data, cred);
3142 
3143 	/* TLI/XTI stream sockets */
3144 	if (pristli(vp))
3145 		(void) prfdinfotlisockopt(vp, data, cred);
3146 
3147 	/*
3148 	 * Add a terminating header with a zero size.
3149 	 */
3150 	pr_misc_header_t *misc;
3151 
3152 	misc = pr_iol_newbuf(data, sizeof (*misc));
3153 	misc->pr_misc_size = 0;
3154 	misc->pr_misc_type = (uint_t)-1;
3155 
3156 	return (0);
3157 }
3158 
3159 #ifdef _SYSCALL32_IMPL
3160 void
prgetpsinfo32(proc_t * p,psinfo32_t * psp)3161 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
3162 {
3163 	kthread_t *t;
3164 	struct cred *cred;
3165 	hrtime_t hrutime, hrstime;
3166 
3167 	ASSERT(MUTEX_HELD(&p->p_lock));
3168 
3169 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
3170 		bzero(psp, sizeof (*psp));
3171 	else {
3172 		thread_unlock(t);
3173 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
3174 	}
3175 
3176 	/*
3177 	 * only export SSYS and SMSACCT; everything else is off-limits to
3178 	 * userland apps.
3179 	 */
3180 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
3181 	psp->pr_nlwp = p->p_lwpcnt;
3182 	psp->pr_nzomb = p->p_zombcnt;
3183 	mutex_enter(&p->p_crlock);
3184 	cred = p->p_cred;
3185 	psp->pr_uid = crgetruid(cred);
3186 	psp->pr_euid = crgetuid(cred);
3187 	psp->pr_gid = crgetrgid(cred);
3188 	psp->pr_egid = crgetgid(cred);
3189 	mutex_exit(&p->p_crlock);
3190 	psp->pr_pid = p->p_pid;
3191 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
3192 	    (p->p_flag & SZONETOP)) {
3193 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
3194 		/*
3195 		 * Inside local zones, fake zsched's pid as parent pids for
3196 		 * processes which reference processes outside of the zone.
3197 		 */
3198 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
3199 	} else {
3200 		psp->pr_ppid = p->p_ppid;
3201 	}
3202 	psp->pr_pgid = p->p_pgrp;
3203 	psp->pr_sid = p->p_sessp->s_sid;
3204 	psp->pr_taskid = p->p_task->tk_tkid;
3205 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
3206 	psp->pr_poolid = p->p_pool->pool_id;
3207 	psp->pr_zoneid = p->p_zone->zone_id;
3208 	if ((psp->pr_contract = PRCTID(p)) == 0)
3209 		psp->pr_contract = -1;
3210 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3211 	switch (p->p_model) {
3212 	case DATAMODEL_ILP32:
3213 		psp->pr_dmodel = PR_MODEL_ILP32;
3214 		break;
3215 	case DATAMODEL_LP64:
3216 		psp->pr_dmodel = PR_MODEL_LP64;
3217 		break;
3218 	}
3219 	hrutime = mstate_aggr_state(p, LMS_USER);
3220 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
3221 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3222 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
3223 
3224 	if (t == NULL) {
3225 		extern int wstat(int, int);	/* needs a header file */
3226 		int wcode = p->p_wcode;		/* must be atomic read */
3227 
3228 		if (wcode)
3229 			psp->pr_wstat = wstat(wcode, p->p_wdata);
3230 		psp->pr_ttydev = PRNODEV32;
3231 		psp->pr_lwp.pr_state = SZOMB;
3232 		psp->pr_lwp.pr_sname = 'Z';
3233 	} else {
3234 		user_t *up = PTOU(p);
3235 		struct as *as;
3236 		dev_t d;
3237 		extern dev_t rwsconsdev, rconsdev, uconsdev;
3238 
3239 		d = cttydev(p);
3240 		/*
3241 		 * If the controlling terminal is the real
3242 		 * or workstation console device, map to what the
3243 		 * user thinks is the console device. Handle case when
3244 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
3245 		 */
3246 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
3247 			d = uconsdev;
3248 		(void) cmpldev(&psp->pr_ttydev, d);
3249 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
3250 		bcopy(up->u_comm, psp->pr_fname,
3251 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
3252 		bcopy(up->u_psargs, psp->pr_psargs,
3253 		    MIN(PRARGSZ-1, PSARGSZ));
3254 		psp->pr_argc = up->u_argc;
3255 		psp->pr_argv = (caddr32_t)up->u_argv;
3256 		psp->pr_envp = (caddr32_t)up->u_envp;
3257 
3258 		/* get the chosen lwp's lwpsinfo */
3259 		prgetlwpsinfo32(t, &psp->pr_lwp);
3260 
3261 		/* compute %cpu for the process */
3262 		if (p->p_lwpcnt == 1)
3263 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
3264 		else {
3265 			uint64_t pct = 0;
3266 			hrtime_t cur_time;
3267 
3268 			t = p->p_tlist;
3269 			cur_time = gethrtime_unscaled();
3270 			do {
3271 				pct += cpu_update_pct(t, cur_time);
3272 			} while ((t = t->t_forw) != p->p_tlist);
3273 
3274 			psp->pr_pctcpu = prgetpctcpu(pct);
3275 		}
3276 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
3277 			psp->pr_size = 0;
3278 			psp->pr_rssize = 0;
3279 		} else {
3280 			mutex_exit(&p->p_lock);
3281 			AS_LOCK_ENTER(as, RW_READER);
3282 			psp->pr_size = (size32_t)
3283 			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
3284 			psp->pr_rssize = (size32_t)
3285 			    (rm_asrss(as) * (PAGESIZE / 1024));
3286 			psp->pr_pctmem = rm_pctmemory(as);
3287 			AS_LOCK_EXIT(as);
3288 			mutex_enter(&p->p_lock);
3289 		}
3290 	}
3291 
3292 	/*
3293 	 * If we are looking at an LP64 process, zero out
3294 	 * the fields that cannot be represented in ILP32.
3295 	 */
3296 	if (p->p_model != DATAMODEL_ILP32) {
3297 		psp->pr_size = 0;
3298 		psp->pr_rssize = 0;
3299 		psp->pr_argv = 0;
3300 		psp->pr_envp = 0;
3301 	}
3302 }
3303 
3304 #endif	/* _SYSCALL32_IMPL */
3305 
3306 void
prgetlwpsinfo(kthread_t * t,lwpsinfo_t * psp)3307 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
3308 {
3309 	klwp_t *lwp = ttolwp(t);
3310 	sobj_ops_t *sobj;
3311 	char c, state;
3312 	uint64_t pct;
3313 	int retval, niceval;
3314 	hrtime_t hrutime, hrstime;
3315 
3316 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3317 
3318 	bzero(psp, sizeof (*psp));
3319 
3320 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3321 	psp->pr_lwpid = t->t_tid;
3322 	psp->pr_addr = (uintptr_t)t;
3323 	psp->pr_wchan = (uintptr_t)t->t_wchan;
3324 
3325 	/* map the thread state enum into a process state enum */
3326 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3327 	switch (state) {
3328 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3329 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3330 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3331 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3332 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3333 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3334 	default:		state = 0;		c = '?';	break;
3335 	}
3336 	psp->pr_state = state;
3337 	psp->pr_sname = c;
3338 	if ((sobj = t->t_sobj_ops) != NULL)
3339 		psp->pr_stype = SOBJ_TYPE(sobj);
3340 	retval = CL_DONICE(t, NULL, 0, &niceval);
3341 	if (retval == 0) {
3342 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3343 		psp->pr_nice = niceval + NZERO;
3344 	}
3345 	psp->pr_syscall = t->t_sysnum;
3346 	psp->pr_pri = t->t_pri;
3347 	psp->pr_start.tv_sec = t->t_start;
3348 	psp->pr_start.tv_nsec = 0L;
3349 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3350 	scalehrtime(&hrutime);
3351 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3352 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3353 	scalehrtime(&hrstime);
3354 	hrt2ts(hrutime + hrstime, &psp->pr_time);
3355 	/* compute %cpu for the lwp */
3356 	pct = cpu_update_pct(t, gethrtime_unscaled());
3357 	psp->pr_pctcpu = prgetpctcpu(pct);
3358 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3359 	if (psp->pr_cpu > 99)
3360 		psp->pr_cpu = 99;
3361 
3362 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3363 	    sizeof (psp->pr_clname) - 1);
3364 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3365 	psp->pr_onpro = t->t_cpu->cpu_id;
3366 	psp->pr_bindpro = t->t_bind_cpu;
3367 	psp->pr_bindpset = t->t_bind_pset;
3368 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3369 }
3370 
3371 #ifdef _SYSCALL32_IMPL
3372 void
prgetlwpsinfo32(kthread_t * t,lwpsinfo32_t * psp)3373 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
3374 {
3375 	klwp_t *lwp = ttolwp(t);
3376 	sobj_ops_t *sobj;
3377 	char c, state;
3378 	uint64_t pct;
3379 	int retval, niceval;
3380 	hrtime_t hrutime, hrstime;
3381 
3382 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3383 
3384 	bzero(psp, sizeof (*psp));
3385 
3386 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3387 	psp->pr_lwpid = t->t_tid;
3388 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3389 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
3390 
3391 	/* map the thread state enum into a process state enum */
3392 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3393 	switch (state) {
3394 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3395 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3396 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3397 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3398 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3399 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3400 	default:		state = 0;		c = '?';	break;
3401 	}
3402 	psp->pr_state = state;
3403 	psp->pr_sname = c;
3404 	if ((sobj = t->t_sobj_ops) != NULL)
3405 		psp->pr_stype = SOBJ_TYPE(sobj);
3406 	retval = CL_DONICE(t, NULL, 0, &niceval);
3407 	if (retval == 0) {
3408 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3409 		psp->pr_nice = niceval + NZERO;
3410 	} else {
3411 		psp->pr_oldpri = 0;
3412 		psp->pr_nice = 0;
3413 	}
3414 	psp->pr_syscall = t->t_sysnum;
3415 	psp->pr_pri = t->t_pri;
3416 	psp->pr_start.tv_sec = (time32_t)t->t_start;
3417 	psp->pr_start.tv_nsec = 0L;
3418 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3419 	scalehrtime(&hrutime);
3420 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3421 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3422 	scalehrtime(&hrstime);
3423 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3424 	/* compute %cpu for the lwp */
3425 	pct = cpu_update_pct(t, gethrtime_unscaled());
3426 	psp->pr_pctcpu = prgetpctcpu(pct);
3427 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3428 	if (psp->pr_cpu > 99)
3429 		psp->pr_cpu = 99;
3430 
3431 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3432 	    sizeof (psp->pr_clname) - 1);
3433 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3434 	psp->pr_onpro = t->t_cpu->cpu_id;
3435 	psp->pr_bindpro = t->t_bind_cpu;
3436 	psp->pr_bindpset = t->t_bind_pset;
3437 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3438 }
3439 #endif	/* _SYSCALL32_IMPL */
3440 
3441 #ifdef _SYSCALL32_IMPL
3442 
3443 #define	PR_COPY_FIELD(s, d, field)	 d->field = s->field
3444 
3445 #define	PR_COPY_FIELD_ILP32(s, d, field)				\
3446 	if (s->pr_dmodel == PR_MODEL_ILP32) {			\
3447 		d->field = s->field;				\
3448 	}
3449 
3450 #define	PR_COPY_TIMESPEC(s, d, field)				\
3451 	TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
3452 
3453 #define	PR_COPY_BUF(s, d, field)				\
3454 	bcopy(s->field, d->field, sizeof (d->field));
3455 
3456 #define	PR_IGNORE_FIELD(s, d, field)
3457 
3458 void
lwpsinfo_kto32(const struct lwpsinfo * src,struct lwpsinfo32 * dest)3459 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
3460 {
3461 	bzero(dest, sizeof (*dest));
3462 
3463 	PR_COPY_FIELD(src, dest, pr_flag);
3464 	PR_COPY_FIELD(src, dest, pr_lwpid);
3465 	PR_IGNORE_FIELD(src, dest, pr_addr);
3466 	PR_IGNORE_FIELD(src, dest, pr_wchan);
3467 	PR_COPY_FIELD(src, dest, pr_stype);
3468 	PR_COPY_FIELD(src, dest, pr_state);
3469 	PR_COPY_FIELD(src, dest, pr_sname);
3470 	PR_COPY_FIELD(src, dest, pr_nice);
3471 	PR_COPY_FIELD(src, dest, pr_syscall);
3472 	PR_COPY_FIELD(src, dest, pr_oldpri);
3473 	PR_COPY_FIELD(src, dest, pr_cpu);
3474 	PR_COPY_FIELD(src, dest, pr_pri);
3475 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3476 	PR_COPY_TIMESPEC(src, dest, pr_start);
3477 	PR_COPY_BUF(src, dest, pr_clname);
3478 	PR_COPY_BUF(src, dest, pr_name);
3479 	PR_COPY_FIELD(src, dest, pr_onpro);
3480 	PR_COPY_FIELD(src, dest, pr_bindpro);
3481 	PR_COPY_FIELD(src, dest, pr_bindpset);
3482 	PR_COPY_FIELD(src, dest, pr_lgrp);
3483 }
3484 
3485 void
psinfo_kto32(const struct psinfo * src,struct psinfo32 * dest)3486 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
3487 {
3488 	bzero(dest, sizeof (*dest));
3489 
3490 	PR_COPY_FIELD(src, dest, pr_flag);
3491 	PR_COPY_FIELD(src, dest, pr_nlwp);
3492 	PR_COPY_FIELD(src, dest, pr_pid);
3493 	PR_COPY_FIELD(src, dest, pr_ppid);
3494 	PR_COPY_FIELD(src, dest, pr_pgid);
3495 	PR_COPY_FIELD(src, dest, pr_sid);
3496 	PR_COPY_FIELD(src, dest, pr_uid);
3497 	PR_COPY_FIELD(src, dest, pr_euid);
3498 	PR_COPY_FIELD(src, dest, pr_gid);
3499 	PR_COPY_FIELD(src, dest, pr_egid);
3500 	PR_IGNORE_FIELD(src, dest, pr_addr);
3501 	PR_COPY_FIELD_ILP32(src, dest, pr_size);
3502 	PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
3503 	PR_COPY_FIELD(src, dest, pr_ttydev);
3504 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3505 	PR_COPY_FIELD(src, dest, pr_pctmem);
3506 	PR_COPY_TIMESPEC(src, dest, pr_start);
3507 	PR_COPY_TIMESPEC(src, dest, pr_time);
3508 	PR_COPY_TIMESPEC(src, dest, pr_ctime);
3509 	PR_COPY_BUF(src, dest, pr_fname);
3510 	PR_COPY_BUF(src, dest, pr_psargs);
3511 	PR_COPY_FIELD(src, dest, pr_wstat);
3512 	PR_COPY_FIELD(src, dest, pr_argc);
3513 	PR_COPY_FIELD_ILP32(src, dest, pr_argv);
3514 	PR_COPY_FIELD_ILP32(src, dest, pr_envp);
3515 	PR_COPY_FIELD(src, dest, pr_dmodel);
3516 	PR_COPY_FIELD(src, dest, pr_taskid);
3517 	PR_COPY_FIELD(src, dest, pr_projid);
3518 	PR_COPY_FIELD(src, dest, pr_nzomb);
3519 	PR_COPY_FIELD(src, dest, pr_poolid);
3520 	PR_COPY_FIELD(src, dest, pr_contract);
3521 	PR_COPY_FIELD(src, dest, pr_poolid);
3522 	PR_COPY_FIELD(src, dest, pr_poolid);
3523 
3524 	lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
3525 }
3526 
3527 #undef	PR_COPY_FIELD
3528 #undef	PR_COPY_FIELD_ILP32
3529 #undef	PR_COPY_TIMESPEC
3530 #undef	PR_COPY_BUF
3531 #undef	PR_IGNORE_FIELD
3532 
3533 #endif	/* _SYSCALL32_IMPL */
3534 
3535 /*
3536  * This used to get called when microstate accounting was disabled but
3537  * microstate information was requested.  Since Microstate accounting is on
3538  * regardless of the proc flags, this simply makes it appear to procfs that
3539  * microstate accounting is on.  This is relatively meaningless since you
3540  * can't turn it off, but this is here for the sake of appearances.
3541  */
3542 
3543 /*ARGSUSED*/
3544 void
estimate_msacct(kthread_t * t,hrtime_t curtime)3545 estimate_msacct(kthread_t *t, hrtime_t curtime)
3546 {
3547 	proc_t *p;
3548 
3549 	if (t == NULL)
3550 		return;
3551 
3552 	p = ttoproc(t);
3553 	ASSERT(MUTEX_HELD(&p->p_lock));
3554 
3555 	/*
3556 	 * A system process (p0) could be referenced if the thread is
3557 	 * in the process of exiting.  Don't turn on microstate accounting
3558 	 * in that case.
3559 	 */
3560 	if (p->p_flag & SSYS)
3561 		return;
3562 
3563 	/*
3564 	 * Loop through all the LWPs (kernel threads) in the process.
3565 	 */
3566 	t = p->p_tlist;
3567 	do {
3568 		t->t_proc_flag |= TP_MSACCT;
3569 	} while ((t = t->t_forw) != p->p_tlist);
3570 
3571 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
3572 }
3573 
3574 /*
3575  * It's not really possible to disable microstate accounting anymore.
3576  * However, this routine simply turns off the ms accounting flags in a process
3577  * This way procfs can still pretend to turn microstate accounting on and
3578  * off for a process, but it actually doesn't do anything.  This is
3579  * a neutered form of preemptive idiot-proofing.
3580  */
3581 void
disable_msacct(proc_t * p)3582 disable_msacct(proc_t *p)
3583 {
3584 	kthread_t *t;
3585 
3586 	ASSERT(MUTEX_HELD(&p->p_lock));
3587 
3588 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
3589 	/*
3590 	 * Loop through all the LWPs (kernel threads) in the process.
3591 	 */
3592 	if ((t = p->p_tlist) != NULL) {
3593 		do {
3594 			/* clear per-thread flag */
3595 			t->t_proc_flag &= ~TP_MSACCT;
3596 		} while ((t = t->t_forw) != p->p_tlist);
3597 	}
3598 }
3599 
3600 /*
3601  * Return resource usage information.
3602  */
3603 void
prgetusage(kthread_t * t,prhusage_t * pup)3604 prgetusage(kthread_t *t, prhusage_t *pup)
3605 {
3606 	klwp_t *lwp = ttolwp(t);
3607 	hrtime_t *mstimep;
3608 	struct mstate *ms = &lwp->lwp_mstate;
3609 	int state;
3610 	int i;
3611 	hrtime_t curtime;
3612 	hrtime_t waitrq;
3613 	hrtime_t tmp1;
3614 
3615 	curtime = gethrtime_unscaled();
3616 
3617 	pup->pr_lwpid	= t->t_tid;
3618 	pup->pr_count	= 1;
3619 	pup->pr_create	= ms->ms_start;
3620 	pup->pr_term    = ms->ms_term;
3621 	scalehrtime(&pup->pr_create);
3622 	scalehrtime(&pup->pr_term);
3623 	if (ms->ms_term == 0) {
3624 		pup->pr_rtime = curtime - ms->ms_start;
3625 		scalehrtime(&pup->pr_rtime);
3626 	} else {
3627 		pup->pr_rtime = ms->ms_term - ms->ms_start;
3628 		scalehrtime(&pup->pr_rtime);
3629 	}
3630 
3631 
3632 	pup->pr_utime    = ms->ms_acct[LMS_USER];
3633 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
3634 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
3635 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
3636 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
3637 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
3638 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
3639 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
3640 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
3641 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
3642 
3643 	prscaleusage(pup);
3644 
3645 	/*
3646 	 * Adjust for time waiting in the dispatcher queue.
3647 	 */
3648 	waitrq = t->t_waitrq;	/* hopefully atomic */
3649 	if (waitrq != 0) {
3650 		if (waitrq > curtime) {
3651 			curtime = gethrtime_unscaled();
3652 		}
3653 		tmp1 = curtime - waitrq;
3654 		scalehrtime(&tmp1);
3655 		pup->pr_wtime += tmp1;
3656 		curtime = waitrq;
3657 	}
3658 
3659 	/*
3660 	 * Adjust for time spent in current microstate.
3661 	 */
3662 	if (ms->ms_state_start > curtime) {
3663 		curtime = gethrtime_unscaled();
3664 	}
3665 
3666 	i = 0;
3667 	do {
3668 		switch (state = t->t_mstate) {
3669 		case LMS_SLEEP:
3670 			/*
3671 			 * Update the timer for the current sleep state.
3672 			 */
3673 			switch (state = ms->ms_prev) {
3674 			case LMS_TFAULT:
3675 			case LMS_DFAULT:
3676 			case LMS_KFAULT:
3677 			case LMS_USER_LOCK:
3678 				break;
3679 			default:
3680 				state = LMS_SLEEP;
3681 				break;
3682 			}
3683 			break;
3684 		case LMS_TFAULT:
3685 		case LMS_DFAULT:
3686 		case LMS_KFAULT:
3687 		case LMS_USER_LOCK:
3688 			state = LMS_SYSTEM;
3689 			break;
3690 		}
3691 		switch (state) {
3692 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3693 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3694 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3695 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3696 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3697 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3698 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3699 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3700 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3701 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3702 		default:		panic("prgetusage: unknown microstate");
3703 		}
3704 		tmp1 = curtime - ms->ms_state_start;
3705 		if (tmp1 < 0) {
3706 			curtime = gethrtime_unscaled();
3707 			i++;
3708 			continue;
3709 		}
3710 		scalehrtime(&tmp1);
3711 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
3712 
3713 	*mstimep += tmp1;
3714 
3715 	/* update pup timestamp */
3716 	pup->pr_tstamp = curtime;
3717 	scalehrtime(&pup->pr_tstamp);
3718 
3719 	/*
3720 	 * Resource usage counters.
3721 	 */
3722 	pup->pr_minf  = lwp->lwp_ru.minflt;
3723 	pup->pr_majf  = lwp->lwp_ru.majflt;
3724 	pup->pr_nswap = lwp->lwp_ru.nswap;
3725 	pup->pr_inblk = lwp->lwp_ru.inblock;
3726 	pup->pr_oublk = lwp->lwp_ru.oublock;
3727 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
3728 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
3729 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
3730 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
3731 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
3732 	pup->pr_sysc  = lwp->lwp_ru.sysc;
3733 	pup->pr_ioch  = lwp->lwp_ru.ioch;
3734 }
3735 
3736 /*
3737  * Convert ms_acct stats from unscaled high-res time to nanoseconds
3738  */
3739 void
prscaleusage(prhusage_t * usg)3740 prscaleusage(prhusage_t *usg)
3741 {
3742 	scalehrtime(&usg->pr_utime);
3743 	scalehrtime(&usg->pr_stime);
3744 	scalehrtime(&usg->pr_ttime);
3745 	scalehrtime(&usg->pr_tftime);
3746 	scalehrtime(&usg->pr_dftime);
3747 	scalehrtime(&usg->pr_kftime);
3748 	scalehrtime(&usg->pr_ltime);
3749 	scalehrtime(&usg->pr_slptime);
3750 	scalehrtime(&usg->pr_wtime);
3751 	scalehrtime(&usg->pr_stoptime);
3752 }
3753 
3754 
3755 /*
3756  * Sum resource usage information.
3757  */
3758 void
praddusage(kthread_t * t,prhusage_t * pup)3759 praddusage(kthread_t *t, prhusage_t *pup)
3760 {
3761 	klwp_t *lwp = ttolwp(t);
3762 	hrtime_t *mstimep;
3763 	struct mstate *ms = &lwp->lwp_mstate;
3764 	int state;
3765 	int i;
3766 	hrtime_t curtime;
3767 	hrtime_t waitrq;
3768 	hrtime_t tmp;
3769 	prhusage_t conv;
3770 
3771 	curtime = gethrtime_unscaled();
3772 
3773 	if (ms->ms_term == 0) {
3774 		tmp = curtime - ms->ms_start;
3775 		scalehrtime(&tmp);
3776 		pup->pr_rtime += tmp;
3777 	} else {
3778 		tmp = ms->ms_term - ms->ms_start;
3779 		scalehrtime(&tmp);
3780 		pup->pr_rtime += tmp;
3781 	}
3782 
3783 	conv.pr_utime = ms->ms_acct[LMS_USER];
3784 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
3785 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
3786 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
3787 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
3788 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
3789 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3790 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
3791 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3792 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
3793 
3794 	prscaleusage(&conv);
3795 
3796 	pup->pr_utime	+= conv.pr_utime;
3797 	pup->pr_stime	+= conv.pr_stime;
3798 	pup->pr_ttime	+= conv.pr_ttime;
3799 	pup->pr_tftime	+= conv.pr_tftime;
3800 	pup->pr_dftime	+= conv.pr_dftime;
3801 	pup->pr_kftime	+= conv.pr_kftime;
3802 	pup->pr_ltime	+= conv.pr_ltime;
3803 	pup->pr_slptime	+= conv.pr_slptime;
3804 	pup->pr_wtime	+= conv.pr_wtime;
3805 	pup->pr_stoptime += conv.pr_stoptime;
3806 
3807 	/*
3808 	 * Adjust for time waiting in the dispatcher queue.
3809 	 */
3810 	waitrq = t->t_waitrq;	/* hopefully atomic */
3811 	if (waitrq != 0) {
3812 		if (waitrq > curtime) {
3813 			curtime = gethrtime_unscaled();
3814 		}
3815 		tmp = curtime - waitrq;
3816 		scalehrtime(&tmp);
3817 		pup->pr_wtime += tmp;
3818 		curtime = waitrq;
3819 	}
3820 
3821 	/*
3822 	 * Adjust for time spent in current microstate.
3823 	 */
3824 	if (ms->ms_state_start > curtime) {
3825 		curtime = gethrtime_unscaled();
3826 	}
3827 
3828 	i = 0;
3829 	do {
3830 		switch (state = t->t_mstate) {
3831 		case LMS_SLEEP:
3832 			/*
3833 			 * Update the timer for the current sleep state.
3834 			 */
3835 			switch (state = ms->ms_prev) {
3836 			case LMS_TFAULT:
3837 			case LMS_DFAULT:
3838 			case LMS_KFAULT:
3839 			case LMS_USER_LOCK:
3840 				break;
3841 			default:
3842 				state = LMS_SLEEP;
3843 				break;
3844 			}
3845 			break;
3846 		case LMS_TFAULT:
3847 		case LMS_DFAULT:
3848 		case LMS_KFAULT:
3849 		case LMS_USER_LOCK:
3850 			state = LMS_SYSTEM;
3851 			break;
3852 		}
3853 		switch (state) {
3854 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3855 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3856 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3857 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3858 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3859 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3860 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3861 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3862 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3863 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3864 		default:		panic("praddusage: unknown microstate");
3865 		}
3866 		tmp = curtime - ms->ms_state_start;
3867 		if (tmp < 0) {
3868 			curtime = gethrtime_unscaled();
3869 			i++;
3870 			continue;
3871 		}
3872 		scalehrtime(&tmp);
3873 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
3874 
3875 	*mstimep += tmp;
3876 
3877 	/* update pup timestamp */
3878 	pup->pr_tstamp = curtime;
3879 	scalehrtime(&pup->pr_tstamp);
3880 
3881 	/*
3882 	 * Resource usage counters.
3883 	 */
3884 	pup->pr_minf  += lwp->lwp_ru.minflt;
3885 	pup->pr_majf  += lwp->lwp_ru.majflt;
3886 	pup->pr_nswap += lwp->lwp_ru.nswap;
3887 	pup->pr_inblk += lwp->lwp_ru.inblock;
3888 	pup->pr_oublk += lwp->lwp_ru.oublock;
3889 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
3890 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
3891 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
3892 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
3893 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
3894 	pup->pr_sysc  += lwp->lwp_ru.sysc;
3895 	pup->pr_ioch  += lwp->lwp_ru.ioch;
3896 }
3897 
3898 /*
3899  * Convert a prhusage_t to a prusage_t.
3900  * This means convert each hrtime_t to a timestruc_t
3901  * and copy the count fields uint64_t => ulong_t.
3902  */
3903 void
prcvtusage(prhusage_t * pup,prusage_t * upup)3904 prcvtusage(prhusage_t *pup, prusage_t *upup)
3905 {
3906 	uint64_t *ullp;
3907 	ulong_t *ulp;
3908 	int i;
3909 
3910 	upup->pr_lwpid = pup->pr_lwpid;
3911 	upup->pr_count = pup->pr_count;
3912 
3913 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3914 	hrt2ts(pup->pr_create,	&upup->pr_create);
3915 	hrt2ts(pup->pr_term,	&upup->pr_term);
3916 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3917 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3918 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3919 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3920 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3921 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3922 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3923 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3924 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3925 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3926 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3927 	bzero(upup->filltime, sizeof (upup->filltime));
3928 
3929 	ullp = &pup->pr_minf;
3930 	ulp = &upup->pr_minf;
3931 	for (i = 0; i < 22; i++)
3932 		*ulp++ = (ulong_t)*ullp++;
3933 }
3934 
3935 #ifdef _SYSCALL32_IMPL
3936 void
prcvtusage32(prhusage_t * pup,prusage32_t * upup)3937 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3938 {
3939 	uint64_t *ullp;
3940 	uint32_t *ulp;
3941 	int i;
3942 
3943 	upup->pr_lwpid = pup->pr_lwpid;
3944 	upup->pr_count = pup->pr_count;
3945 
3946 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3947 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3948 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3949 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3950 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3951 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3952 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3953 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3954 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3955 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3956 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3957 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3958 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3959 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3960 	bzero(upup->filltime, sizeof (upup->filltime));
3961 
3962 	ullp = &pup->pr_minf;
3963 	ulp = &upup->pr_minf;
3964 	for (i = 0; i < 22; i++)
3965 		*ulp++ = (uint32_t)*ullp++;
3966 }
3967 #endif	/* _SYSCALL32_IMPL */
3968 
3969 /*
3970  * Determine whether a set is empty.
3971  */
3972 int
setisempty(uint32_t * sp,uint_t n)3973 setisempty(uint32_t *sp, uint_t n)
3974 {
3975 	while (n--)
3976 		if (*sp++)
3977 			return (0);
3978 	return (1);
3979 }
3980 
3981 /*
3982  * Utility routine for establishing a watched area in the process.
3983  * Keep the list of watched areas sorted by virtual address.
3984  */
3985 int
set_watched_area(proc_t * p,struct watched_area * pwa)3986 set_watched_area(proc_t *p, struct watched_area *pwa)
3987 {
3988 	caddr_t vaddr = pwa->wa_vaddr;
3989 	caddr_t eaddr = pwa->wa_eaddr;
3990 	ulong_t flags = pwa->wa_flags;
3991 	struct watched_area *target;
3992 	avl_index_t where;
3993 	int error = 0;
3994 
3995 	/* we must not be holding p->p_lock, but the process must be locked */
3996 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3997 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3998 
3999 	/*
4000 	 * If this is our first watchpoint, enable watchpoints for the process.
4001 	 */
4002 	if (!pr_watch_active(p)) {
4003 		kthread_t *t;
4004 
4005 		mutex_enter(&p->p_lock);
4006 		if ((t = p->p_tlist) != NULL) {
4007 			do {
4008 				watch_enable(t);
4009 			} while ((t = t->t_forw) != p->p_tlist);
4010 		}
4011 		mutex_exit(&p->p_lock);
4012 	}
4013 
4014 	target = pr_find_watched_area(p, pwa, &where);
4015 	if (target != NULL) {
4016 		/*
4017 		 * We discovered an existing, overlapping watched area.
4018 		 * Allow it only if it is an exact match.
4019 		 */
4020 		if (target->wa_vaddr != vaddr ||
4021 		    target->wa_eaddr != eaddr)
4022 			error = EINVAL;
4023 		else if (target->wa_flags != flags) {
4024 			error = set_watched_page(p, vaddr, eaddr,
4025 			    flags, target->wa_flags);
4026 			target->wa_flags = flags;
4027 		}
4028 		kmem_free(pwa, sizeof (struct watched_area));
4029 	} else {
4030 		avl_insert(&p->p_warea, pwa, where);
4031 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
4032 	}
4033 
4034 	return (error);
4035 }
4036 
4037 /*
4038  * Utility routine for clearing a watched area in the process.
4039  * Must be an exact match of the virtual address.
4040  * size and flags don't matter.
4041  */
4042 int
clear_watched_area(proc_t * p,struct watched_area * pwa)4043 clear_watched_area(proc_t *p, struct watched_area *pwa)
4044 {
4045 	struct watched_area *found;
4046 
4047 	/* we must not be holding p->p_lock, but the process must be locked */
4048 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
4049 	ASSERT(p->p_proc_flag & P_PR_LOCK);
4050 
4051 
4052 	if (!pr_watch_active(p)) {
4053 		kmem_free(pwa, sizeof (struct watched_area));
4054 		return (0);
4055 	}
4056 
4057 	/*
4058 	 * Look for a matching address in the watched areas.  If a match is
4059 	 * found, clear the old watched area and adjust the watched page(s).  It
4060 	 * is not an error if there is no match.
4061 	 */
4062 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
4063 	    found->wa_vaddr == pwa->wa_vaddr) {
4064 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
4065 		    found->wa_flags);
4066 		avl_remove(&p->p_warea, found);
4067 		kmem_free(found, sizeof (struct watched_area));
4068 	}
4069 
4070 	kmem_free(pwa, sizeof (struct watched_area));
4071 
4072 	/*
4073 	 * If we removed the last watched area from the process, disable
4074 	 * watchpoints.
4075 	 */
4076 	if (!pr_watch_active(p)) {
4077 		kthread_t *t;
4078 
4079 		mutex_enter(&p->p_lock);
4080 		if ((t = p->p_tlist) != NULL) {
4081 			do {
4082 				watch_disable(t);
4083 			} while ((t = t->t_forw) != p->p_tlist);
4084 		}
4085 		mutex_exit(&p->p_lock);
4086 	}
4087 
4088 	return (0);
4089 }
4090 
4091 /*
4092  * Frees all the watched_area structures
4093  */
4094 void
pr_free_watchpoints(proc_t * p)4095 pr_free_watchpoints(proc_t *p)
4096 {
4097 	struct watched_area *delp;
4098 	void *cookie;
4099 
4100 	cookie = NULL;
4101 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
4102 		kmem_free(delp, sizeof (struct watched_area));
4103 
4104 	avl_destroy(&p->p_warea);
4105 }
4106 
4107 /*
4108  * This one is called by the traced process to unwatch all the
4109  * pages while deallocating the list of watched_page structs.
4110  */
4111 void
pr_free_watched_pages(proc_t * p)4112 pr_free_watched_pages(proc_t *p)
4113 {
4114 	struct as *as = p->p_as;
4115 	struct watched_page *pwp;
4116 	uint_t prot;
4117 	int    retrycnt, err;
4118 	void *cookie;
4119 
4120 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
4121 		return;
4122 
4123 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
4124 	AS_LOCK_ENTER(as, RW_WRITER);
4125 
4126 	pwp = avl_first(&as->a_wpage);
4127 
4128 	cookie = NULL;
4129 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
4130 		retrycnt = 0;
4131 		if ((prot = pwp->wp_oprot) != 0) {
4132 			caddr_t addr = pwp->wp_vaddr;
4133 			struct seg *seg;
4134 		retry:
4135 
4136 			if ((pwp->wp_prot != prot ||
4137 			    (pwp->wp_flags & WP_NOWATCH)) &&
4138 			    (seg = as_segat(as, addr)) != NULL) {
4139 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
4140 				if (err == IE_RETRY) {
4141 					ASSERT(retrycnt == 0);
4142 					retrycnt++;
4143 					goto retry;
4144 				}
4145 			}
4146 		}
4147 		kmem_free(pwp, sizeof (struct watched_page));
4148 	}
4149 
4150 	avl_destroy(&as->a_wpage);
4151 	p->p_wprot = NULL;
4152 
4153 	AS_LOCK_EXIT(as);
4154 }
4155 
4156 /*
4157  * Insert a watched area into the list of watched pages.
4158  * If oflags is zero then we are adding a new watched area.
4159  * Otherwise we are changing the flags of an existing watched area.
4160  */
4161 static int
set_watched_page(proc_t * p,caddr_t vaddr,caddr_t eaddr,ulong_t flags,ulong_t oflags)4162 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
4163     ulong_t flags, ulong_t oflags)
4164 {
4165 	struct as *as = p->p_as;
4166 	avl_tree_t *pwp_tree;
4167 	struct watched_page *pwp, *newpwp;
4168 	struct watched_page tpw;
4169 	avl_index_t where;
4170 	struct seg *seg;
4171 	uint_t prot;
4172 	caddr_t addr;
4173 
4174 	/*
4175 	 * We need to pre-allocate a list of structures before we grab the
4176 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
4177 	 * held.
4178 	 */
4179 	newpwp = NULL;
4180 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4181 	    addr < eaddr; addr += PAGESIZE) {
4182 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
4183 		pwp->wp_list = newpwp;
4184 		newpwp = pwp;
4185 	}
4186 
4187 	AS_LOCK_ENTER(as, RW_WRITER);
4188 
4189 	/*
4190 	 * Search for an existing watched page to contain the watched area.
4191 	 * If none is found, grab a new one from the available list
4192 	 * and insert it in the active list, keeping the list sorted
4193 	 * by user-level virtual address.
4194 	 */
4195 	if (p->p_flag & SVFWAIT)
4196 		pwp_tree = &p->p_wpage;
4197 	else
4198 		pwp_tree = &as->a_wpage;
4199 
4200 again:
4201 	if (avl_numnodes(pwp_tree) > prnwatch) {
4202 		AS_LOCK_EXIT(as);
4203 		while (newpwp != NULL) {
4204 			pwp = newpwp->wp_list;
4205 			kmem_free(newpwp, sizeof (struct watched_page));
4206 			newpwp = pwp;
4207 		}
4208 		return (E2BIG);
4209 	}
4210 
4211 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4212 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
4213 		pwp = newpwp;
4214 		newpwp = newpwp->wp_list;
4215 		pwp->wp_list = NULL;
4216 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
4217 		    (uintptr_t)PAGEMASK);
4218 		avl_insert(pwp_tree, pwp, where);
4219 	}
4220 
4221 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
4222 
4223 	if (oflags & WA_READ)
4224 		pwp->wp_read--;
4225 	if (oflags & WA_WRITE)
4226 		pwp->wp_write--;
4227 	if (oflags & WA_EXEC)
4228 		pwp->wp_exec--;
4229 
4230 	ASSERT(pwp->wp_read >= 0);
4231 	ASSERT(pwp->wp_write >= 0);
4232 	ASSERT(pwp->wp_exec >= 0);
4233 
4234 	if (flags & WA_READ)
4235 		pwp->wp_read++;
4236 	if (flags & WA_WRITE)
4237 		pwp->wp_write++;
4238 	if (flags & WA_EXEC)
4239 		pwp->wp_exec++;
4240 
4241 	if (!(p->p_flag & SVFWAIT)) {
4242 		vaddr = pwp->wp_vaddr;
4243 		if (pwp->wp_oprot == 0 &&
4244 		    (seg = as_segat(as, vaddr)) != NULL) {
4245 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
4246 			pwp->wp_oprot = (uchar_t)prot;
4247 			pwp->wp_prot = (uchar_t)prot;
4248 		}
4249 		if (pwp->wp_oprot != 0) {
4250 			prot = pwp->wp_oprot;
4251 			if (pwp->wp_read)
4252 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4253 			if (pwp->wp_write)
4254 				prot &= ~PROT_WRITE;
4255 			if (pwp->wp_exec)
4256 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4257 			if (!(pwp->wp_flags & WP_NOWATCH) &&
4258 			    pwp->wp_prot != prot &&
4259 			    (pwp->wp_flags & WP_SETPROT) == 0) {
4260 				pwp->wp_flags |= WP_SETPROT;
4261 				pwp->wp_list = p->p_wprot;
4262 				p->p_wprot = pwp;
4263 			}
4264 			pwp->wp_prot = (uchar_t)prot;
4265 		}
4266 	}
4267 
4268 	/*
4269 	 * If the watched area extends into the next page then do
4270 	 * it over again with the virtual address of the next page.
4271 	 */
4272 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
4273 		goto again;
4274 
4275 	AS_LOCK_EXIT(as);
4276 
4277 	/*
4278 	 * Free any pages we may have over-allocated
4279 	 */
4280 	while (newpwp != NULL) {
4281 		pwp = newpwp->wp_list;
4282 		kmem_free(newpwp, sizeof (struct watched_page));
4283 		newpwp = pwp;
4284 	}
4285 
4286 	return (0);
4287 }
4288 
4289 /*
4290  * Remove a watched area from the list of watched pages.
4291  * A watched area may extend over more than one page.
4292  */
4293 static void
clear_watched_page(proc_t * p,caddr_t vaddr,caddr_t eaddr,ulong_t flags)4294 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
4295 {
4296 	struct as *as = p->p_as;
4297 	struct watched_page *pwp;
4298 	struct watched_page tpw;
4299 	avl_tree_t *tree;
4300 	avl_index_t where;
4301 
4302 	AS_LOCK_ENTER(as, RW_WRITER);
4303 
4304 	if (p->p_flag & SVFWAIT)
4305 		tree = &p->p_wpage;
4306 	else
4307 		tree = &as->a_wpage;
4308 
4309 	tpw.wp_vaddr = vaddr =
4310 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4311 	pwp = avl_find(tree, &tpw, &where);
4312 	if (pwp == NULL)
4313 		pwp = avl_nearest(tree, where, AVL_AFTER);
4314 
4315 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
4316 		ASSERT(vaddr <=  pwp->wp_vaddr);
4317 
4318 		if (flags & WA_READ)
4319 			pwp->wp_read--;
4320 		if (flags & WA_WRITE)
4321 			pwp->wp_write--;
4322 		if (flags & WA_EXEC)
4323 			pwp->wp_exec--;
4324 
4325 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
4326 			/*
4327 			 * Reset the hat layer's protections on this page.
4328 			 */
4329 			if (pwp->wp_oprot != 0) {
4330 				uint_t prot = pwp->wp_oprot;
4331 
4332 				if (pwp->wp_read)
4333 					prot &=
4334 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4335 				if (pwp->wp_write)
4336 					prot &= ~PROT_WRITE;
4337 				if (pwp->wp_exec)
4338 					prot &=
4339 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4340 				if (!(pwp->wp_flags & WP_NOWATCH) &&
4341 				    pwp->wp_prot != prot &&
4342 				    (pwp->wp_flags & WP_SETPROT) == 0) {
4343 					pwp->wp_flags |= WP_SETPROT;
4344 					pwp->wp_list = p->p_wprot;
4345 					p->p_wprot = pwp;
4346 				}
4347 				pwp->wp_prot = (uchar_t)prot;
4348 			}
4349 		} else {
4350 			/*
4351 			 * No watched areas remain in this page.
4352 			 * Reset everything to normal.
4353 			 */
4354 			if (pwp->wp_oprot != 0) {
4355 				pwp->wp_prot = pwp->wp_oprot;
4356 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
4357 					pwp->wp_flags |= WP_SETPROT;
4358 					pwp->wp_list = p->p_wprot;
4359 					p->p_wprot = pwp;
4360 				}
4361 			}
4362 		}
4363 
4364 		pwp = AVL_NEXT(tree, pwp);
4365 	}
4366 
4367 	AS_LOCK_EXIT(as);
4368 }
4369 
4370 /*
4371  * Return the original protections for the specified page.
4372  */
4373 static void
getwatchprot(struct as * as,caddr_t addr,uint_t * prot)4374 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
4375 {
4376 	struct watched_page *pwp;
4377 	struct watched_page tpw;
4378 
4379 	ASSERT(AS_LOCK_HELD(as));
4380 
4381 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
4382 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
4383 		*prot = pwp->wp_oprot;
4384 }
4385 
4386 static prpagev_t *
pr_pagev_create(struct seg * seg,int check_noreserve)4387 pr_pagev_create(struct seg *seg, int check_noreserve)
4388 {
4389 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
4390 	size_t total_pages = seg_pages(seg);
4391 
4392 	/*
4393 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
4394 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
4395 	 * to about a megabyte of kernel heap by default.
4396 	 */
4397 	pagev->pg_npages = MIN(total_pages, pagev_lim);
4398 	pagev->pg_pnbase = 0;
4399 
4400 	pagev->pg_protv =
4401 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
4402 
4403 	if (check_noreserve)
4404 		pagev->pg_incore =
4405 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
4406 	else
4407 		pagev->pg_incore = NULL;
4408 
4409 	return (pagev);
4410 }
4411 
4412 static void
pr_pagev_destroy(prpagev_t * pagev)4413 pr_pagev_destroy(prpagev_t *pagev)
4414 {
4415 	if (pagev->pg_incore != NULL)
4416 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
4417 
4418 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
4419 	kmem_free(pagev, sizeof (prpagev_t));
4420 }
4421 
4422 static caddr_t
pr_pagev_fill(prpagev_t * pagev,struct seg * seg,caddr_t addr,caddr_t eaddr)4423 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
4424 {
4425 	ulong_t lastpg = seg_page(seg, eaddr - 1);
4426 	ulong_t pn, pnlim;
4427 	caddr_t saddr;
4428 	size_t len;
4429 
4430 	ASSERT(addr >= seg->s_base && addr <= eaddr);
4431 
4432 	if (addr == eaddr)
4433 		return (eaddr);
4434 
4435 refill:
4436 	ASSERT(addr < eaddr);
4437 	pagev->pg_pnbase = seg_page(seg, addr);
4438 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
4439 	saddr = addr;
4440 
4441 	if (lastpg < pnlim)
4442 		len = (size_t)(eaddr - addr);
4443 	else
4444 		len = pagev->pg_npages * PAGESIZE;
4445 
4446 	if (pagev->pg_incore != NULL) {
4447 		/*
4448 		 * INCORE cleverly has different semantics than GETPROT:
4449 		 * it returns info on pages up to but NOT including addr + len.
4450 		 */
4451 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
4452 		pn = pagev->pg_pnbase;
4453 
4454 		do {
4455 			/*
4456 			 * Guilty knowledge here:  We know that segvn_incore
4457 			 * returns more than just the low-order bit that
4458 			 * indicates the page is actually in memory.  If any
4459 			 * bits are set, then the page has backing store.
4460 			 */
4461 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
4462 				goto out;
4463 
4464 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
4465 
4466 		/*
4467 		 * If we examined all the pages in the vector but we're not
4468 		 * at the end of the segment, take another lap.
4469 		 */
4470 		if (addr < eaddr)
4471 			goto refill;
4472 	}
4473 
4474 	/*
4475 	 * Need to take len - 1 because addr + len is the address of the
4476 	 * first byte of the page just past the end of what we want.
4477 	 */
4478 out:
4479 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
4480 	return (addr);
4481 }
4482 
4483 static caddr_t
pr_pagev_nextprot(prpagev_t * pagev,struct seg * seg,caddr_t * saddrp,caddr_t eaddr,uint_t * protp)4484 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
4485     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
4486 {
4487 	/*
4488 	 * Our starting address is either the specified address, or the base
4489 	 * address from the start of the pagev.  If the latter is greater,
4490 	 * this means a previous call to pr_pagev_fill has already scanned
4491 	 * further than the end of the previous mapping.
4492 	 */
4493 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
4494 	caddr_t addr = MAX(*saddrp, base);
4495 	ulong_t pn = seg_page(seg, addr);
4496 	uint_t prot, nprot;
4497 
4498 	/*
4499 	 * If we're dealing with noreserve pages, then advance addr to
4500 	 * the address of the next page which has backing store.
4501 	 */
4502 	if (pagev->pg_incore != NULL) {
4503 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
4504 			if ((addr += PAGESIZE) == eaddr) {
4505 				*saddrp = addr;
4506 				prot = 0;
4507 				goto out;
4508 			}
4509 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4510 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
4511 				if (addr == eaddr) {
4512 					*saddrp = addr;
4513 					prot = 0;
4514 					goto out;
4515 				}
4516 				pn = seg_page(seg, addr);
4517 			}
4518 		}
4519 	}
4520 
4521 	/*
4522 	 * Get the protections on the page corresponding to addr.
4523 	 */
4524 	pn = seg_page(seg, addr);
4525 	ASSERT(pn >= pagev->pg_pnbase);
4526 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
4527 
4528 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
4529 	getwatchprot(seg->s_as, addr, &prot);
4530 	*saddrp = addr;
4531 
4532 	/*
4533 	 * Now loop until we find a backed page with different protections
4534 	 * or we reach the end of this segment.
4535 	 */
4536 	while ((addr += PAGESIZE) < eaddr) {
4537 		/*
4538 		 * If pn has advanced to the page number following what we
4539 		 * have information on, refill the page vector and reset
4540 		 * addr and pn.  If pr_pagev_fill does not return the
4541 		 * address of the next page, we have a discontiguity and
4542 		 * thus have reached the end of the current mapping.
4543 		 */
4544 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4545 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
4546 			if (naddr != addr)
4547 				goto out;
4548 			pn = seg_page(seg, addr);
4549 		}
4550 
4551 		/*
4552 		 * The previous page's protections are in prot, and it has
4553 		 * backing.  If this page is MAP_NORESERVE and has no backing,
4554 		 * then end this mapping and return the previous protections.
4555 		 */
4556 		if (pagev->pg_incore != NULL &&
4557 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
4558 			break;
4559 
4560 		/*
4561 		 * Otherwise end the mapping if this page's protections (nprot)
4562 		 * are different than those in the previous page (prot).
4563 		 */
4564 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
4565 		getwatchprot(seg->s_as, addr, &nprot);
4566 
4567 		if (nprot != prot)
4568 			break;
4569 	}
4570 
4571 out:
4572 	*protp = prot;
4573 	return (addr);
4574 }
4575 
4576 size_t
pr_getsegsize(struct seg * seg,int reserved)4577 pr_getsegsize(struct seg *seg, int reserved)
4578 {
4579 	size_t size = seg->s_size;
4580 
4581 	/*
4582 	 * If we're interested in the reserved space, return the size of the
4583 	 * segment itself.  Everything else in this function is a special case
4584 	 * to determine the actual underlying size of various segment types.
4585 	 */
4586 	if (reserved)
4587 		return (size);
4588 
4589 	/*
4590 	 * If this is a segvn mapping of a regular file, return the smaller
4591 	 * of the segment size and the remaining size of the file beyond
4592 	 * the file offset corresponding to seg->s_base.
4593 	 */
4594 	if (seg->s_ops == &segvn_ops) {
4595 		vattr_t vattr;
4596 		vnode_t *vp;
4597 
4598 		vattr.va_mask = AT_SIZE;
4599 
4600 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
4601 		    vp != NULL && vp->v_type == VREG &&
4602 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
4603 
4604 			u_offset_t fsize = vattr.va_size;
4605 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
4606 
4607 			if (fsize < offset)
4608 				fsize = 0;
4609 			else
4610 				fsize -= offset;
4611 
4612 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
4613 
4614 			if (fsize < (u_offset_t)size)
4615 				size = (size_t)fsize;
4616 		}
4617 
4618 		return (size);
4619 	}
4620 
4621 	/*
4622 	 * If this is an ISM shared segment, don't include pages that are
4623 	 * beyond the real size of the spt segment that backs it.
4624 	 */
4625 	if (seg->s_ops == &segspt_shmops)
4626 		return (MIN(spt_realsize(seg), size));
4627 
4628 	/*
4629 	 * If this is segment is a mapping from /dev/null, then this is a
4630 	 * reservation of virtual address space and has no actual size.
4631 	 * Such segments are backed by segdev and have type set to neither
4632 	 * MAP_SHARED nor MAP_PRIVATE.
4633 	 */
4634 	if (seg->s_ops == &segdev_ops &&
4635 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
4636 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
4637 		return (0);
4638 
4639 	/*
4640 	 * If this segment doesn't match one of the special types we handle,
4641 	 * just return the size of the segment itself.
4642 	 */
4643 	return (size);
4644 }
4645 
4646 uint_t
pr_getprot(struct seg * seg,int reserved,void ** tmp,caddr_t * saddrp,caddr_t * naddrp,caddr_t eaddr)4647 pr_getprot(struct seg *seg, int reserved, void **tmp,
4648     caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
4649 {
4650 	struct as *as = seg->s_as;
4651 
4652 	caddr_t saddr = *saddrp;
4653 	caddr_t naddr;
4654 
4655 	int check_noreserve;
4656 	uint_t prot;
4657 
4658 	union {
4659 		struct segvn_data *svd;
4660 		struct segdev_data *sdp;
4661 		void *data;
4662 	} s;
4663 
4664 	s.data = seg->s_data;
4665 
4666 	ASSERT(AS_WRITE_HELD(as));
4667 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
4668 	ASSERT(eaddr <= seg->s_base + seg->s_size);
4669 
4670 	/*
4671 	 * Don't include MAP_NORESERVE pages in the address range
4672 	 * unless their mappings have actually materialized.
4673 	 * We cheat by knowing that segvn is the only segment
4674 	 * driver that supports MAP_NORESERVE.
4675 	 */
4676 	check_noreserve =
4677 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
4678 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
4679 	    (s.svd->flags & MAP_NORESERVE));
4680 
4681 	/*
4682 	 * Examine every page only as a last resort.  We use guilty knowledge
4683 	 * of segvn and segdev to avoid this: if there are no per-page
4684 	 * protections present in the segment and we don't care about
4685 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
4686 	 */
4687 	if (!check_noreserve && saddr == seg->s_base &&
4688 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
4689 		prot = s.svd->prot;
4690 		getwatchprot(as, saddr, &prot);
4691 		naddr = eaddr;
4692 
4693 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
4694 	    s.sdp != NULL && s.sdp->pageprot == 0) {
4695 		prot = s.sdp->prot;
4696 		getwatchprot(as, saddr, &prot);
4697 		naddr = eaddr;
4698 
4699 	} else {
4700 		prpagev_t *pagev;
4701 
4702 		/*
4703 		 * If addr is sitting at the start of the segment, then
4704 		 * create a page vector to store protection and incore
4705 		 * information for pages in the segment, and fill it.
4706 		 * Otherwise, we expect *tmp to address the prpagev_t
4707 		 * allocated by a previous call to this function.
4708 		 */
4709 		if (saddr == seg->s_base) {
4710 			pagev = pr_pagev_create(seg, check_noreserve);
4711 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
4712 
4713 			ASSERT(*tmp == NULL);
4714 			*tmp = pagev;
4715 
4716 			ASSERT(saddr <= eaddr);
4717 			*saddrp = saddr;
4718 
4719 			if (saddr == eaddr) {
4720 				naddr = saddr;
4721 				prot = 0;
4722 				goto out;
4723 			}
4724 
4725 		} else {
4726 			ASSERT(*tmp != NULL);
4727 			pagev = (prpagev_t *)*tmp;
4728 		}
4729 
4730 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
4731 		ASSERT(naddr <= eaddr);
4732 	}
4733 
4734 out:
4735 	if (naddr == eaddr)
4736 		pr_getprot_done(tmp);
4737 	*naddrp = naddr;
4738 	return (prot);
4739 }
4740 
4741 void
pr_getprot_done(void ** tmp)4742 pr_getprot_done(void **tmp)
4743 {
4744 	if (*tmp != NULL) {
4745 		pr_pagev_destroy((prpagev_t *)*tmp);
4746 		*tmp = NULL;
4747 	}
4748 }
4749 
4750 /*
4751  * Return true iff the vnode is a /proc file from the object directory.
4752  */
4753 int
pr_isobject(vnode_t * vp)4754 pr_isobject(vnode_t *vp)
4755 {
4756 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
4757 }
4758 
4759 /*
4760  * Return true iff the vnode is a /proc file opened by the process itself.
4761  */
4762 int
pr_isself(vnode_t * vp)4763 pr_isself(vnode_t *vp)
4764 {
4765 	/*
4766 	 * XXX: To retain binary compatibility with the old
4767 	 * ioctl()-based version of /proc, we exempt self-opens
4768 	 * of /proc/<pid> from being marked close-on-exec.
4769 	 */
4770 	return (vn_matchops(vp, prvnodeops) &&
4771 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
4772 	    VTOP(vp)->pr_type != PR_PIDDIR);
4773 }
4774 
4775 static ssize_t
pr_getpagesize(struct seg * seg,caddr_t saddr,caddr_t * naddrp,caddr_t eaddr)4776 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
4777 {
4778 	ssize_t pagesize, hatsize;
4779 
4780 	ASSERT(AS_WRITE_HELD(seg->s_as));
4781 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
4782 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
4783 	ASSERT(saddr < eaddr);
4784 
4785 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
4786 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
4787 	ASSERT(pagesize != 0);
4788 
4789 	if (pagesize == -1)
4790 		pagesize = PAGESIZE;
4791 
4792 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
4793 
4794 	while (saddr < eaddr) {
4795 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
4796 			break;
4797 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
4798 		saddr += pagesize;
4799 	}
4800 
4801 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
4802 	return (hatsize);
4803 }
4804 
4805 /*
4806  * Return an array of structures with extended memory map information.
4807  * We allocate here; the caller must deallocate.
4808  */
4809 int
prgetxmap(proc_t * p,list_t * iolhead)4810 prgetxmap(proc_t *p, list_t *iolhead)
4811 {
4812 	struct as *as = p->p_as;
4813 	prxmap_t *mp;
4814 	struct seg *seg;
4815 	struct seg *brkseg, *stkseg;
4816 	struct vnode *vp;
4817 	struct vattr vattr;
4818 	uint_t prot;
4819 
4820 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4821 
4822 	/*
4823 	 * Request an initial buffer size that doesn't waste memory
4824 	 * if the address space has only a small number of segments.
4825 	 */
4826 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4827 
4828 	if ((seg = AS_SEGFIRST(as)) == NULL)
4829 		return (0);
4830 
4831 	brkseg = break_seg(p);
4832 	stkseg = as_segat(as, prgetstackbase(p));
4833 
4834 	do {
4835 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4836 		caddr_t saddr, naddr, baddr;
4837 		void *tmp = NULL;
4838 		ssize_t psz;
4839 		char *parr;
4840 		uint64_t npages;
4841 		uint64_t pagenum;
4842 
4843 		if ((seg->s_flags & S_HOLE) != 0) {
4844 			continue;
4845 		}
4846 		/*
4847 		 * Segment loop part one: iterate from the base of the segment
4848 		 * to its end, pausing at each address boundary (baddr) between
4849 		 * ranges that have different virtual memory protections.
4850 		 */
4851 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4852 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4853 			ASSERT(baddr >= saddr && baddr <= eaddr);
4854 
4855 			/*
4856 			 * Segment loop part two: iterate from the current
4857 			 * position to the end of the protection boundary,
4858 			 * pausing at each address boundary (naddr) between
4859 			 * ranges that have different underlying page sizes.
4860 			 */
4861 			for (; saddr < baddr; saddr = naddr) {
4862 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4863 				ASSERT(naddr >= saddr && naddr <= baddr);
4864 
4865 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4866 
4867 				mp->pr_vaddr = (uintptr_t)saddr;
4868 				mp->pr_size = naddr - saddr;
4869 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4870 				mp->pr_mflags = 0;
4871 				if (prot & PROT_READ)
4872 					mp->pr_mflags |= MA_READ;
4873 				if (prot & PROT_WRITE)
4874 					mp->pr_mflags |= MA_WRITE;
4875 				if (prot & PROT_EXEC)
4876 					mp->pr_mflags |= MA_EXEC;
4877 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4878 					mp->pr_mflags |= MA_SHARED;
4879 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4880 					mp->pr_mflags |= MA_NORESERVE;
4881 				if (seg->s_ops == &segspt_shmops ||
4882 				    (seg->s_ops == &segvn_ops &&
4883 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4884 				    vp == NULL)))
4885 					mp->pr_mflags |= MA_ANON;
4886 				if (seg == brkseg)
4887 					mp->pr_mflags |= MA_BREAK;
4888 				else if (seg == stkseg)
4889 					mp->pr_mflags |= MA_STACK;
4890 				if (seg->s_ops == &segspt_shmops)
4891 					mp->pr_mflags |= MA_ISM | MA_SHM;
4892 
4893 				mp->pr_pagesize = PAGESIZE;
4894 				if (psz == -1) {
4895 					mp->pr_hatpagesize = 0;
4896 				} else {
4897 					mp->pr_hatpagesize = psz;
4898 				}
4899 
4900 				/*
4901 				 * Manufacture a filename for the "object" dir.
4902 				 */
4903 				mp->pr_dev = PRNODEV;
4904 				vattr.va_mask = AT_FSID|AT_NODEID;
4905 				if (seg->s_ops == &segvn_ops &&
4906 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4907 				    vp != NULL && vp->v_type == VREG &&
4908 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4909 				    NULL) == 0) {
4910 					mp->pr_dev = vattr.va_fsid;
4911 					mp->pr_ino = vattr.va_nodeid;
4912 					if (vp == p->p_exec)
4913 						(void) strcpy(mp->pr_mapname,
4914 						    "a.out");
4915 					else
4916 						pr_object_name(mp->pr_mapname,
4917 						    vp, &vattr);
4918 				}
4919 
4920 				/*
4921 				 * Get the SysV shared memory id, if any.
4922 				 */
4923 				if ((mp->pr_mflags & MA_SHARED) &&
4924 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4925 				    seg->s_base)) != SHMID_NONE) {
4926 					if (mp->pr_shmid == SHMID_FREE)
4927 						mp->pr_shmid = -1;
4928 
4929 					mp->pr_mflags |= MA_SHM;
4930 				} else {
4931 					mp->pr_shmid = -1;
4932 				}
4933 
4934 				npages = ((uintptr_t)(naddr - saddr)) >>
4935 				    PAGESHIFT;
4936 				parr = kmem_zalloc(npages, KM_SLEEP);
4937 
4938 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4939 
4940 				for (pagenum = 0; pagenum < npages; pagenum++) {
4941 					if (parr[pagenum] & SEG_PAGE_INCORE)
4942 						mp->pr_rss++;
4943 					if (parr[pagenum] & SEG_PAGE_ANON)
4944 						mp->pr_anon++;
4945 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4946 						mp->pr_locked++;
4947 				}
4948 				kmem_free(parr, npages);
4949 			}
4950 		}
4951 		ASSERT(tmp == NULL);
4952 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4953 
4954 	return (0);
4955 }
4956 
4957 /*
4958  * Return the process's credentials.  We don't need a 32-bit equivalent of
4959  * this function because prcred_t and prcred32_t are actually the same.
4960  */
4961 void
prgetcred(proc_t * p,prcred_t * pcrp)4962 prgetcred(proc_t *p, prcred_t *pcrp)
4963 {
4964 	mutex_enter(&p->p_crlock);
4965 	cred2prcred(p->p_cred, pcrp);
4966 	mutex_exit(&p->p_crlock);
4967 }
4968 
4969 void
prgetsecflags(proc_t * p,prsecflags_t * psfp)4970 prgetsecflags(proc_t *p, prsecflags_t *psfp)
4971 {
4972 	ASSERT(psfp != NULL);
4973 
4974 	bzero(psfp, sizeof (*psfp));
4975 	psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
4976 	psfp->pr_lower = p->p_secflags.psf_lower;
4977 	psfp->pr_upper = p->p_secflags.psf_upper;
4978 	psfp->pr_effective = p->p_secflags.psf_effective;
4979 	psfp->pr_inherit = p->p_secflags.psf_inherit;
4980 }
4981 
4982 /*
4983  * Compute actual size of the prpriv_t structure.
4984  */
4985 
4986 size_t
prgetprivsize(void)4987 prgetprivsize(void)
4988 {
4989 	return (priv_prgetprivsize(NULL));
4990 }
4991 
4992 /*
4993  * Return the process's privileges.  We don't need a 32-bit equivalent of
4994  * this function because prpriv_t and prpriv32_t are actually the same.
4995  */
4996 void
prgetpriv(proc_t * p,prpriv_t * pprp)4997 prgetpriv(proc_t *p, prpriv_t *pprp)
4998 {
4999 	mutex_enter(&p->p_crlock);
5000 	cred2prpriv(p->p_cred, pprp);
5001 	mutex_exit(&p->p_crlock);
5002 }
5003 
5004 #ifdef _SYSCALL32_IMPL
5005 /*
5006  * Return an array of structures with HAT memory map information.
5007  * We allocate here; the caller must deallocate.
5008  */
5009 int
prgetxmap32(proc_t * p,list_t * iolhead)5010 prgetxmap32(proc_t *p, list_t *iolhead)
5011 {
5012 	struct as *as = p->p_as;
5013 	prxmap32_t *mp;
5014 	struct seg *seg;
5015 	struct seg *brkseg, *stkseg;
5016 	struct vnode *vp;
5017 	struct vattr vattr;
5018 	uint_t prot;
5019 
5020 	ASSERT(as != &kas && AS_WRITE_HELD(as));
5021 
5022 	/*
5023 	 * Request an initial buffer size that doesn't waste memory
5024 	 * if the address space has only a small number of segments.
5025 	 */
5026 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
5027 
5028 	if ((seg = AS_SEGFIRST(as)) == NULL)
5029 		return (0);
5030 
5031 	brkseg = break_seg(p);
5032 	stkseg = as_segat(as, prgetstackbase(p));
5033 
5034 	do {
5035 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
5036 		caddr_t saddr, naddr, baddr;
5037 		void *tmp = NULL;
5038 		ssize_t psz;
5039 		char *parr;
5040 		uint64_t npages;
5041 		uint64_t pagenum;
5042 
5043 		if ((seg->s_flags & S_HOLE) != 0) {
5044 			continue;
5045 		}
5046 
5047 		/*
5048 		 * Segment loop part one: iterate from the base of the segment
5049 		 * to its end, pausing at each address boundary (baddr) between
5050 		 * ranges that have different virtual memory protections.
5051 		 */
5052 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
5053 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
5054 			ASSERT(baddr >= saddr && baddr <= eaddr);
5055 
5056 			/*
5057 			 * Segment loop part two: iterate from the current
5058 			 * position to the end of the protection boundary,
5059 			 * pausing at each address boundary (naddr) between
5060 			 * ranges that have different underlying page sizes.
5061 			 */
5062 			for (; saddr < baddr; saddr = naddr) {
5063 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
5064 				ASSERT(naddr >= saddr && naddr <= baddr);
5065 
5066 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
5067 
5068 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
5069 				mp->pr_size = (size32_t)(naddr - saddr);
5070 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
5071 				mp->pr_mflags = 0;
5072 				if (prot & PROT_READ)
5073 					mp->pr_mflags |= MA_READ;
5074 				if (prot & PROT_WRITE)
5075 					mp->pr_mflags |= MA_WRITE;
5076 				if (prot & PROT_EXEC)
5077 					mp->pr_mflags |= MA_EXEC;
5078 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
5079 					mp->pr_mflags |= MA_SHARED;
5080 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
5081 					mp->pr_mflags |= MA_NORESERVE;
5082 				if (seg->s_ops == &segspt_shmops ||
5083 				    (seg->s_ops == &segvn_ops &&
5084 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
5085 				    vp == NULL)))
5086 					mp->pr_mflags |= MA_ANON;
5087 				if (seg == brkseg)
5088 					mp->pr_mflags |= MA_BREAK;
5089 				else if (seg == stkseg)
5090 					mp->pr_mflags |= MA_STACK;
5091 				if (seg->s_ops == &segspt_shmops)
5092 					mp->pr_mflags |= MA_ISM | MA_SHM;
5093 
5094 				mp->pr_pagesize = PAGESIZE;
5095 				if (psz == -1) {
5096 					mp->pr_hatpagesize = 0;
5097 				} else {
5098 					mp->pr_hatpagesize = psz;
5099 				}
5100 
5101 				/*
5102 				 * Manufacture a filename for the "object" dir.
5103 				 */
5104 				mp->pr_dev = PRNODEV32;
5105 				vattr.va_mask = AT_FSID|AT_NODEID;
5106 				if (seg->s_ops == &segvn_ops &&
5107 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
5108 				    vp != NULL && vp->v_type == VREG &&
5109 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
5110 				    NULL) == 0) {
5111 					(void) cmpldev(&mp->pr_dev,
5112 					    vattr.va_fsid);
5113 					mp->pr_ino = vattr.va_nodeid;
5114 					if (vp == p->p_exec)
5115 						(void) strcpy(mp->pr_mapname,
5116 						    "a.out");
5117 					else
5118 						pr_object_name(mp->pr_mapname,
5119 						    vp, &vattr);
5120 				}
5121 
5122 				/*
5123 				 * Get the SysV shared memory id, if any.
5124 				 */
5125 				if ((mp->pr_mflags & MA_SHARED) &&
5126 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
5127 				    seg->s_base)) != SHMID_NONE) {
5128 					if (mp->pr_shmid == SHMID_FREE)
5129 						mp->pr_shmid = -1;
5130 
5131 					mp->pr_mflags |= MA_SHM;
5132 				} else {
5133 					mp->pr_shmid = -1;
5134 				}
5135 
5136 				npages = ((uintptr_t)(naddr - saddr)) >>
5137 				    PAGESHIFT;
5138 				parr = kmem_zalloc(npages, KM_SLEEP);
5139 
5140 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
5141 
5142 				for (pagenum = 0; pagenum < npages; pagenum++) {
5143 					if (parr[pagenum] & SEG_PAGE_INCORE)
5144 						mp->pr_rss++;
5145 					if (parr[pagenum] & SEG_PAGE_ANON)
5146 						mp->pr_anon++;
5147 					if (parr[pagenum] & SEG_PAGE_LOCKED)
5148 						mp->pr_locked++;
5149 				}
5150 				kmem_free(parr, npages);
5151 			}
5152 		}
5153 		ASSERT(tmp == NULL);
5154 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
5155 
5156 	return (0);
5157 }
5158 #endif	/* _SYSCALL32_IMPL */
5159