xref: /titanic_50/usr/src/uts/common/fs/proc/prsubr.c (revision 2c164fafa089aa352e513b095e1ecd0abd29c61f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #include <sys/types.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/cmn_err.h>
34 #include <sys/cred.h>
35 #include <sys/priv.h>
36 #include <sys/debug.h>
37 #include <sys/errno.h>
38 #include <sys/inline.h>
39 #include <sys/kmem.h>
40 #include <sys/mman.h>
41 #include <sys/proc.h>
42 #include <sys/brand.h>
43 #include <sys/sobject.h>
44 #include <sys/sysmacros.h>
45 #include <sys/systm.h>
46 #include <sys/uio.h>
47 #include <sys/var.h>
48 #include <sys/vfs.h>
49 #include <sys/vnode.h>
50 #include <sys/session.h>
51 #include <sys/pcb.h>
52 #include <sys/signal.h>
53 #include <sys/user.h>
54 #include <sys/disp.h>
55 #include <sys/class.h>
56 #include <sys/ts.h>
57 #include <sys/bitmap.h>
58 #include <sys/poll.h>
59 #include <sys/shm_impl.h>
60 #include <sys/fault.h>
61 #include <sys/syscall.h>
62 #include <sys/procfs.h>
63 #include <sys/processor.h>
64 #include <sys/cpuvar.h>
65 #include <sys/copyops.h>
66 #include <sys/time.h>
67 #include <sys/msacct.h>
68 #include <vm/as.h>
69 #include <vm/rm.h>
70 #include <vm/seg.h>
71 #include <vm/seg_vn.h>
72 #include <vm/seg_dev.h>
73 #include <vm/seg_spt.h>
74 #include <vm/page.h>
75 #include <sys/vmparam.h>
76 #include <sys/swap.h>
77 #include <fs/proc/prdata.h>
78 #include <sys/task.h>
79 #include <sys/project.h>
80 #include <sys/contract_impl.h>
81 #include <sys/contract/process.h>
82 #include <sys/contract/process_impl.h>
83 #include <sys/schedctl.h>
84 #include <sys/pool.h>
85 #include <sys/zone.h>
86 #include <sys/atomic.h>
87 #include <sys/sdt.h>
88 
89 #define	MAX_ITERS_SPIN	5
90 
91 typedef struct prpagev {
92 	uint_t *pg_protv;	/* vector of page permissions */
93 	char *pg_incore;	/* vector of incore flags */
94 	size_t pg_npages;	/* number of pages in protv and incore */
95 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
96 } prpagev_t;
97 
98 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
99 
100 extern struct seg_ops segdev_ops;	/* needs a header file */
101 extern struct seg_ops segspt_shmops;	/* needs a header file */
102 
103 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
105 
106 /*
107  * Choose an lwp from the complete set of lwps for the process.
108  * This is called for any operation applied to the process
109  * file descriptor that requires an lwp to operate upon.
110  *
111  * Returns a pointer to the thread for the selected LWP,
112  * and with the dispatcher lock held for the thread.
113  *
114  * The algorithm for choosing an lwp is critical for /proc semantics;
115  * don't touch this code unless you know all of the implications.
116  */
117 kthread_t *
prchoose(proc_t * p)118 prchoose(proc_t *p)
119 {
120 	kthread_t *t;
121 	kthread_t *t_onproc = NULL;	/* running on processor */
122 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
123 	kthread_t *t_sleep = NULL;	/* sleeping */
124 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
125 	kthread_t *t_susp = NULL;	/* suspended stop */
126 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
127 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
128 	kthread_t *t_req = NULL;	/* requested stop */
129 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
130 	kthread_t *t_dtrace = NULL;	/* DTrace stop */
131 
132 	ASSERT(MUTEX_HELD(&p->p_lock));
133 
134 	/*
135 	 * If the agent lwp exists, it takes precedence over all others.
136 	 */
137 	if ((t = p->p_agenttp) != NULL) {
138 		thread_lock(t);
139 		return (t);
140 	}
141 
142 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
143 		return (t);
144 	do {		/* for eacn lwp in the process */
145 		if (VSTOPPED(t)) {	/* virtually stopped */
146 			if (t_req == NULL)
147 				t_req = t;
148 			continue;
149 		}
150 
151 		thread_lock(t);		/* make sure thread is in good state */
152 		switch (t->t_state) {
153 		default:
154 			panic("prchoose: bad thread state %d, thread 0x%p",
155 			    t->t_state, (void *)t);
156 			/*NOTREACHED*/
157 		case TS_SLEEP:
158 			/* this is filthy */
159 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160 			    t->t_wchan0 == NULL) {
161 				if (t_hold == NULL)
162 					t_hold = t;
163 			} else {
164 				if (t_sleep == NULL)
165 					t_sleep = t;
166 			}
167 			break;
168 		case TS_RUN:
169 		case TS_WAIT:
170 			if (t_run == NULL)
171 				t_run = t;
172 			break;
173 		case TS_ONPROC:
174 			if (t_onproc == NULL)
175 				t_onproc = t;
176 			break;
177 		case TS_ZOMB:		/* last possible choice */
178 			break;
179 		case TS_STOPPED:
180 			switch (t->t_whystop) {
181 			case PR_SUSPENDED:
182 				if (t_susp == NULL)
183 					t_susp = t;
184 				break;
185 			case PR_JOBCONTROL:
186 				if (t->t_proc_flag & TP_PRSTOP) {
187 					if (t_jdstop == NULL)
188 						t_jdstop = t;
189 				} else {
190 					if (t_jstop == NULL)
191 						t_jstop = t;
192 				}
193 				break;
194 			case PR_REQUESTED:
195 				if (t->t_dtrace_stop && t_dtrace == NULL)
196 					t_dtrace = t;
197 				else if (t_req == NULL)
198 					t_req = t;
199 				break;
200 			case PR_SYSENTRY:
201 			case PR_SYSEXIT:
202 			case PR_SIGNALLED:
203 			case PR_FAULTED:
204 				/*
205 				 * Make an lwp calling exit() be the
206 				 * last lwp seen in the process.
207 				 */
208 				if (t_istop == NULL ||
209 				    (t_istop->t_whystop == PR_SYSENTRY &&
210 				    t_istop->t_whatstop == SYS_exit))
211 					t_istop = t;
212 				break;
213 			case PR_CHECKPOINT:	/* can't happen? */
214 				break;
215 			default:
216 				panic("prchoose: bad t_whystop %d, thread 0x%p",
217 				    t->t_whystop, (void *)t);
218 				/*NOTREACHED*/
219 			}
220 			break;
221 		}
222 		thread_unlock(t);
223 	} while ((t = t->t_forw) != p->p_tlist);
224 
225 	if (t_onproc)
226 		t = t_onproc;
227 	else if (t_run)
228 		t = t_run;
229 	else if (t_sleep)
230 		t = t_sleep;
231 	else if (t_jstop)
232 		t = t_jstop;
233 	else if (t_jdstop)
234 		t = t_jdstop;
235 	else if (t_istop)
236 		t = t_istop;
237 	else if (t_dtrace)
238 		t = t_dtrace;
239 	else if (t_req)
240 		t = t_req;
241 	else if (t_hold)
242 		t = t_hold;
243 	else if (t_susp)
244 		t = t_susp;
245 	else			/* TS_ZOMB */
246 		t = p->p_tlist;
247 
248 	if (t != NULL)
249 		thread_lock(t);
250 	return (t);
251 }
252 
253 /*
254  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
255  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
256  * on the /proc file descriptor.  Called from stop() when a traced
257  * process stops on an event of interest.  Also called from exit()
258  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
259  */
260 void
prnotify(struct vnode * vp)261 prnotify(struct vnode *vp)
262 {
263 	prcommon_t *pcp = VTOP(vp)->pr_common;
264 
265 	mutex_enter(&pcp->prc_mutex);
266 	cv_broadcast(&pcp->prc_wait);
267 	mutex_exit(&pcp->prc_mutex);
268 	if (pcp->prc_flags & PRC_POLL) {
269 		/*
270 		 * We call pollwakeup() with POLLHUP to ensure that
271 		 * the pollers are awakened even if they are polling
272 		 * for nothing (i.e., waiting for the process to exit).
273 		 * This enables the use of the PRC_POLL flag for optimization
274 		 * (we can turn off PRC_POLL only if we know no pollers remain).
275 		 */
276 		pcp->prc_flags &= ~PRC_POLL;
277 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
278 	}
279 }
280 
281 /* called immediately below, in prfree() */
282 static void
prfreenotify(vnode_t * vp)283 prfreenotify(vnode_t *vp)
284 {
285 	prnode_t *pnp;
286 	prcommon_t *pcp;
287 
288 	while (vp != NULL) {
289 		pnp = VTOP(vp);
290 		pcp = pnp->pr_common;
291 		ASSERT(pcp->prc_thread == NULL);
292 		pcp->prc_proc = NULL;
293 		/*
294 		 * We can't call prnotify() here because we are holding
295 		 * pidlock.  We assert that there is no need to.
296 		 */
297 		mutex_enter(&pcp->prc_mutex);
298 		cv_broadcast(&pcp->prc_wait);
299 		mutex_exit(&pcp->prc_mutex);
300 		ASSERT(!(pcp->prc_flags & PRC_POLL));
301 
302 		vp = pnp->pr_next;
303 		pnp->pr_next = NULL;
304 	}
305 }
306 
307 /*
308  * Called from a hook in freeproc() when a traced process is removed
309  * from the process table.  The proc-table pointers of all associated
310  * /proc vnodes are cleared to indicate that the process has gone away.
311  */
312 void
prfree(proc_t * p)313 prfree(proc_t *p)
314 {
315 	uint_t slot = p->p_slot;
316 
317 	ASSERT(MUTEX_HELD(&pidlock));
318 
319 	/*
320 	 * Block the process against /proc so it can be freed.
321 	 * It cannot be freed while locked by some controlling process.
322 	 * Lock ordering:
323 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
324 	 */
325 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
326 	mutex_enter(&p->p_lock);
327 	while (p->p_proc_flag & P_PR_LOCK) {
328 		mutex_exit(&pr_pidlock);
329 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
330 		mutex_exit(&p->p_lock);
331 		mutex_enter(&pr_pidlock);
332 		mutex_enter(&p->p_lock);
333 	}
334 
335 	ASSERT(p->p_tlist == NULL);
336 
337 	prfreenotify(p->p_plist);
338 	p->p_plist = NULL;
339 
340 	prfreenotify(p->p_trace);
341 	p->p_trace = NULL;
342 
343 	/*
344 	 * We broadcast to wake up everyone waiting for this process.
345 	 * No one can reach this process from this point on.
346 	 */
347 	cv_broadcast(&pr_pid_cv[slot]);
348 
349 	mutex_exit(&p->p_lock);
350 	mutex_exit(&pr_pidlock);
351 }
352 
353 /*
354  * Called from a hook in exit() when a traced process is becoming a zombie.
355  */
356 void
prexit(proc_t * p)357 prexit(proc_t *p)
358 {
359 	ASSERT(MUTEX_HELD(&p->p_lock));
360 
361 	if (pr_watch_active(p)) {
362 		pr_free_watchpoints(p);
363 		watch_disable(curthread);
364 	}
365 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
366 	if (p->p_trace) {
367 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
368 		prnotify(p->p_trace);
369 	}
370 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
371 }
372 
373 /*
374  * Called when a thread calls lwp_exit().
375  */
376 void
prlwpexit(kthread_t * t)377 prlwpexit(kthread_t *t)
378 {
379 	vnode_t *vp;
380 	prnode_t *pnp;
381 	prcommon_t *pcp;
382 	proc_t *p = ttoproc(t);
383 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
384 
385 	ASSERT(t == curthread);
386 	ASSERT(MUTEX_HELD(&p->p_lock));
387 
388 	/*
389 	 * The process must be blocked against /proc to do this safely.
390 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
391 	 * It is the caller's responsibility to have called prbarrier(p).
392 	 */
393 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
394 
395 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
396 		pnp = VTOP(vp);
397 		pcp = pnp->pr_common;
398 		if (pcp->prc_thread == t) {
399 			pcp->prc_thread = NULL;
400 			pcp->prc_flags |= PRC_DESTROY;
401 		}
402 	}
403 
404 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
405 		pnp = VTOP(vp);
406 		pcp = pnp->pr_common;
407 		pcp->prc_thread = NULL;
408 		pcp->prc_flags |= PRC_DESTROY;
409 		prnotify(vp);
410 	}
411 
412 	if (p->p_trace)
413 		prnotify(p->p_trace);
414 }
415 
416 /*
417  * Called when a zombie thread is joined or when a
418  * detached lwp exits.  Called from lwp_hash_out().
419  */
420 void
prlwpfree(proc_t * p,lwpent_t * lep)421 prlwpfree(proc_t *p, lwpent_t *lep)
422 {
423 	vnode_t *vp;
424 	prnode_t *pnp;
425 	prcommon_t *pcp;
426 
427 	ASSERT(MUTEX_HELD(&p->p_lock));
428 
429 	/*
430 	 * The process must be blocked against /proc to do this safely.
431 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
432 	 * It is the caller's responsibility to have called prbarrier(p).
433 	 */
434 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
435 
436 	vp = lep->le_trace;
437 	lep->le_trace = NULL;
438 	while (vp) {
439 		prnotify(vp);
440 		pnp = VTOP(vp);
441 		pcp = pnp->pr_common;
442 		ASSERT(pcp->prc_thread == NULL &&
443 		    (pcp->prc_flags & PRC_DESTROY));
444 		pcp->prc_tslot = -1;
445 		vp = pnp->pr_next;
446 		pnp->pr_next = NULL;
447 	}
448 
449 	if (p->p_trace)
450 		prnotify(p->p_trace);
451 }
452 
453 /*
454  * Called from a hook in exec() when a thread starts exec().
455  */
456 void
prexecstart(void)457 prexecstart(void)
458 {
459 	proc_t *p = ttoproc(curthread);
460 	klwp_t *lwp = ttolwp(curthread);
461 
462 	/*
463 	 * The P_PR_EXEC flag blocks /proc operations for
464 	 * the duration of the exec().
465 	 * We can't start exec() while the process is
466 	 * locked by /proc, so we call prbarrier().
467 	 * lwp_nostop keeps the process from being stopped
468 	 * via job control for the duration of the exec().
469 	 */
470 
471 	ASSERT(MUTEX_HELD(&p->p_lock));
472 	prbarrier(p);
473 	lwp->lwp_nostop++;
474 	p->p_proc_flag |= P_PR_EXEC;
475 }
476 
477 /*
478  * Called from a hook in exec() when a thread finishes exec().
479  * The thread may or may not have succeeded.  Some other thread
480  * may have beat it to the punch.
481  */
482 void
prexecend(void)483 prexecend(void)
484 {
485 	proc_t *p = ttoproc(curthread);
486 	klwp_t *lwp = ttolwp(curthread);
487 	vnode_t *vp;
488 	prnode_t *pnp;
489 	prcommon_t *pcp;
490 	model_t model = p->p_model;
491 	id_t tid = curthread->t_tid;
492 	int tslot = curthread->t_dslot;
493 
494 	ASSERT(MUTEX_HELD(&p->p_lock));
495 
496 	lwp->lwp_nostop--;
497 	if (p->p_flag & SEXITLWPS) {
498 		/*
499 		 * We are on our way to exiting because some
500 		 * other thread beat us in the race to exec().
501 		 * Don't clear the P_PR_EXEC flag in this case.
502 		 */
503 		return;
504 	}
505 
506 	/*
507 	 * Wake up anyone waiting in /proc for the process to complete exec().
508 	 */
509 	p->p_proc_flag &= ~P_PR_EXEC;
510 	if ((vp = p->p_trace) != NULL) {
511 		pcp = VTOP(vp)->pr_common;
512 		mutex_enter(&pcp->prc_mutex);
513 		cv_broadcast(&pcp->prc_wait);
514 		mutex_exit(&pcp->prc_mutex);
515 		for (; vp != NULL; vp = pnp->pr_next) {
516 			pnp = VTOP(vp);
517 			pnp->pr_common->prc_datamodel = model;
518 		}
519 	}
520 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
521 		/*
522 		 * We dealt with the process common above.
523 		 */
524 		ASSERT(p->p_trace != NULL);
525 		pcp = VTOP(vp)->pr_common;
526 		mutex_enter(&pcp->prc_mutex);
527 		cv_broadcast(&pcp->prc_wait);
528 		mutex_exit(&pcp->prc_mutex);
529 		for (; vp != NULL; vp = pnp->pr_next) {
530 			pnp = VTOP(vp);
531 			pcp = pnp->pr_common;
532 			pcp->prc_datamodel = model;
533 			pcp->prc_tid = tid;
534 			pcp->prc_tslot = tslot;
535 		}
536 	}
537 }
538 
539 /*
540  * Called from a hook in relvm() just before freeing the address space.
541  * We free all the watched areas now.
542  */
543 void
prrelvm(void)544 prrelvm(void)
545 {
546 	proc_t *p = ttoproc(curthread);
547 
548 	mutex_enter(&p->p_lock);
549 	prbarrier(p);	/* block all other /proc operations */
550 	if (pr_watch_active(p)) {
551 		pr_free_watchpoints(p);
552 		watch_disable(curthread);
553 	}
554 	mutex_exit(&p->p_lock);
555 	pr_free_watched_pages(p);
556 }
557 
558 /*
559  * Called from hooks in exec-related code when a traced process
560  * attempts to exec(2) a setuid/setgid program or an unreadable
561  * file.  Rather than fail the exec we invalidate the associated
562  * /proc vnodes so that subsequent attempts to use them will fail.
563  *
564  * All /proc vnodes, except directory vnodes, are retained on a linked
565  * list (rooted at p_plist in the process structure) until last close.
566  *
567  * A controlling process must re-open the /proc files in order to
568  * regain control.
569  */
570 void
prinvalidate(struct user * up)571 prinvalidate(struct user *up)
572 {
573 	kthread_t *t = curthread;
574 	proc_t *p = ttoproc(t);
575 	vnode_t *vp;
576 	prnode_t *pnp;
577 	int writers = 0;
578 
579 	mutex_enter(&p->p_lock);
580 	prbarrier(p);	/* block all other /proc operations */
581 
582 	/*
583 	 * At this moment, there can be only one lwp in the process.
584 	 */
585 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
586 
587 	/*
588 	 * Invalidate any currently active /proc vnodes.
589 	 */
590 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
591 		pnp = VTOP(vp);
592 		switch (pnp->pr_type) {
593 		case PR_PSINFO:		/* these files can read by anyone */
594 		case PR_LPSINFO:
595 		case PR_LWPSINFO:
596 		case PR_LWPDIR:
597 		case PR_LWPIDDIR:
598 		case PR_USAGE:
599 		case PR_LUSAGE:
600 		case PR_LWPUSAGE:
601 			break;
602 		default:
603 			pnp->pr_flags |= PR_INVAL;
604 			break;
605 		}
606 	}
607 	/*
608 	 * Wake up anyone waiting for the process or lwp.
609 	 * p->p_trace is guaranteed to be non-NULL if there
610 	 * are any open /proc files for this process.
611 	 */
612 	if ((vp = p->p_trace) != NULL) {
613 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
614 
615 		prnotify(vp);
616 		/*
617 		 * Are there any writers?
618 		 */
619 		if ((writers = pcp->prc_writers) != 0) {
620 			/*
621 			 * Clear the exclusive open flag (old /proc interface).
622 			 * Set prc_selfopens equal to prc_writers so that
623 			 * the next O_EXCL|O_WRITE open will succeed
624 			 * even with existing (though invalid) writers.
625 			 * prclose() must decrement prc_selfopens when
626 			 * the invalid files are closed.
627 			 */
628 			pcp->prc_flags &= ~PRC_EXCL;
629 			ASSERT(pcp->prc_selfopens <= writers);
630 			pcp->prc_selfopens = writers;
631 		}
632 	}
633 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
634 	while (vp != NULL) {
635 		/*
636 		 * We should not invalidate the lwpiddir vnodes,
637 		 * but the necessities of maintaining the old
638 		 * ioctl()-based version of /proc require it.
639 		 */
640 		pnp = VTOP(vp);
641 		pnp->pr_flags |= PR_INVAL;
642 		prnotify(vp);
643 		vp = pnp->pr_next;
644 	}
645 
646 	/*
647 	 * If any tracing flags are in effect and any vnodes are open for
648 	 * writing then set the requested-stop and run-on-last-close flags.
649 	 * Otherwise, clear all tracing flags.
650 	 */
651 	t->t_proc_flag &= ~TP_PAUSE;
652 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
653 		t->t_proc_flag |= TP_PRSTOP;
654 		aston(t);		/* so ISSIG will see the flag */
655 		p->p_proc_flag |= P_PR_RUNLCL;
656 	} else {
657 		premptyset(&up->u_entrymask);		/* syscalls */
658 		premptyset(&up->u_exitmask);
659 		up->u_systrap = 0;
660 		premptyset(&p->p_sigmask);		/* signals */
661 		premptyset(&p->p_fltmask);		/* faults */
662 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
663 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
664 		prnostep(ttolwp(t));
665 	}
666 
667 	mutex_exit(&p->p_lock);
668 }
669 
670 /*
671  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
672  * Return with pr_pidlock held in all cases.
673  * Return with p_lock held if the the process still exists.
674  * Return value is the process pointer if the process still exists, else NULL.
675  * If we lock the process, give ourself kernel priority to avoid deadlocks;
676  * this is undone in prunlock().
677  */
678 proc_t *
pr_p_lock(prnode_t * pnp)679 pr_p_lock(prnode_t *pnp)
680 {
681 	proc_t *p;
682 	prcommon_t *pcp;
683 
684 	mutex_enter(&pr_pidlock);
685 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
686 		return (NULL);
687 	mutex_enter(&p->p_lock);
688 	while (p->p_proc_flag & P_PR_LOCK) {
689 		/*
690 		 * This cv/mutex pair is persistent even if
691 		 * the process disappears while we sleep.
692 		 */
693 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
694 		kmutex_t *mp = &p->p_lock;
695 
696 		mutex_exit(&pr_pidlock);
697 		cv_wait(cv, mp);
698 		mutex_exit(mp);
699 		mutex_enter(&pr_pidlock);
700 		if (pcp->prc_proc == NULL)
701 			return (NULL);
702 		ASSERT(p == pcp->prc_proc);
703 		mutex_enter(&p->p_lock);
704 	}
705 	p->p_proc_flag |= P_PR_LOCK;
706 	return (p);
707 }
708 
709 /*
710  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
711  * This prevents any lwp of the process from disappearing and
712  * blocks most operations that a process can perform on itself.
713  * Returns 0 on success, a non-zero error number on failure.
714  *
715  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
716  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
717  *
718  * error returns:
719  *	ENOENT: process or lwp has disappeared or process is exiting
720  *		(or has become a zombie and zdisp == ZNO).
721  *	EAGAIN: procfs vnode has become invalid.
722  *	EINTR:  signal arrived while waiting for exec to complete.
723  */
724 int
prlock(prnode_t * pnp,int zdisp)725 prlock(prnode_t *pnp, int zdisp)
726 {
727 	prcommon_t *pcp;
728 	proc_t *p;
729 
730 again:
731 	pcp = pnp->pr_common;
732 	p = pr_p_lock(pnp);
733 	mutex_exit(&pr_pidlock);
734 
735 	/*
736 	 * Return ENOENT immediately if there is no process.
737 	 */
738 	if (p == NULL)
739 		return (ENOENT);
740 
741 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
742 
743 	/*
744 	 * Return ENOENT if process entered zombie state or is exiting
745 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
746 	 */
747 	if (zdisp == ZNO &&
748 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
749 		prunlock(pnp);
750 		return (ENOENT);
751 	}
752 
753 	/*
754 	 * If lwp-specific, check to see if lwp has disappeared.
755 	 */
756 	if (pcp->prc_flags & PRC_LWP) {
757 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
758 		    pcp->prc_tslot == -1) {
759 			prunlock(pnp);
760 			return (ENOENT);
761 		}
762 	}
763 
764 	/*
765 	 * Return EAGAIN if we have encountered a security violation.
766 	 * (The process exec'd a set-id or unreadable executable file.)
767 	 */
768 	if (pnp->pr_flags & PR_INVAL) {
769 		prunlock(pnp);
770 		return (EAGAIN);
771 	}
772 
773 	/*
774 	 * If process is undergoing an exec(), wait for
775 	 * completion and then start all over again.
776 	 */
777 	if (p->p_proc_flag & P_PR_EXEC) {
778 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
779 		mutex_enter(&pcp->prc_mutex);
780 		prunlock(pnp);
781 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
782 			mutex_exit(&pcp->prc_mutex);
783 			return (EINTR);
784 		}
785 		mutex_exit(&pcp->prc_mutex);
786 		goto again;
787 	}
788 
789 	/*
790 	 * We return holding p->p_lock.
791 	 */
792 	return (0);
793 }
794 
795 /*
796  * Undo prlock() and pr_p_lock().
797  * p->p_lock is still held; pr_pidlock is no longer held.
798  *
799  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
800  * if any, waiting for the flag to be dropped; it retains p->p_lock.
801  *
802  * prunlock() calls prunmark() and then drops p->p_lock.
803  */
804 void
prunmark(proc_t * p)805 prunmark(proc_t *p)
806 {
807 	ASSERT(p->p_proc_flag & P_PR_LOCK);
808 	ASSERT(MUTEX_HELD(&p->p_lock));
809 
810 	cv_signal(&pr_pid_cv[p->p_slot]);
811 	p->p_proc_flag &= ~P_PR_LOCK;
812 }
813 
814 void
prunlock(prnode_t * pnp)815 prunlock(prnode_t *pnp)
816 {
817 	prcommon_t *pcp = pnp->pr_common;
818 	proc_t *p = pcp->prc_proc;
819 
820 	/*
821 	 * If we (or someone) gave it a SIGKILL, and it is not
822 	 * already a zombie, set it running unconditionally.
823 	 */
824 	if ((p->p_flag & SKILLED) &&
825 	    !(p->p_flag & SEXITING) &&
826 	    !(pcp->prc_flags & PRC_DESTROY) &&
827 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
828 		(void) pr_setrun(pnp, 0);
829 	prunmark(p);
830 	mutex_exit(&p->p_lock);
831 }
832 
833 /*
834  * Called while holding p->p_lock to delay until the process is unlocked.
835  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
836  * The process cannot become locked again until p->p_lock is dropped.
837  */
838 void
prbarrier(proc_t * p)839 prbarrier(proc_t *p)
840 {
841 	ASSERT(MUTEX_HELD(&p->p_lock));
842 
843 	if (p->p_proc_flag & P_PR_LOCK) {
844 		/* The process is locked; delay until not locked */
845 		uint_t slot = p->p_slot;
846 
847 		while (p->p_proc_flag & P_PR_LOCK)
848 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
849 		cv_signal(&pr_pid_cv[slot]);
850 	}
851 }
852 
853 /*
854  * Return process/lwp status.
855  * The u-block is mapped in by this routine and unmapped at the end.
856  */
857 void
prgetstatus(proc_t * p,pstatus_t * sp,zone_t * zp)858 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
859 {
860 	kthread_t *t;
861 
862 	ASSERT(MUTEX_HELD(&p->p_lock));
863 
864 	t = prchoose(p);	/* returns locked thread */
865 	ASSERT(t != NULL);
866 	thread_unlock(t);
867 
868 	/* just bzero the process part, prgetlwpstatus() does the rest */
869 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
870 	sp->pr_nlwp = p->p_lwpcnt;
871 	sp->pr_nzomb = p->p_zombcnt;
872 	prassignset(&sp->pr_sigpend, &p->p_sig);
873 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
874 	sp->pr_brksize = p->p_brksize;
875 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
876 	sp->pr_stksize = p->p_stksize;
877 	sp->pr_pid = p->p_pid;
878 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
879 	    (p->p_flag & SZONETOP)) {
880 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
881 		/*
882 		 * Inside local zones, fake zsched's pid as parent pids for
883 		 * processes which reference processes outside of the zone.
884 		 */
885 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
886 	} else {
887 		sp->pr_ppid = p->p_ppid;
888 	}
889 	sp->pr_pgid  = p->p_pgrp;
890 	sp->pr_sid   = p->p_sessp->s_sid;
891 	sp->pr_taskid = p->p_task->tk_tkid;
892 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
893 	sp->pr_zoneid = p->p_zone->zone_id;
894 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
895 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
896 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
897 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
898 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
899 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
900 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
901 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
902 	switch (p->p_model) {
903 	case DATAMODEL_ILP32:
904 		sp->pr_dmodel = PR_MODEL_ILP32;
905 		break;
906 	case DATAMODEL_LP64:
907 		sp->pr_dmodel = PR_MODEL_LP64;
908 		break;
909 	}
910 	if (p->p_agenttp)
911 		sp->pr_agentid = p->p_agenttp->t_tid;
912 
913 	/* get the chosen lwp's status */
914 	prgetlwpstatus(t, &sp->pr_lwp, zp);
915 
916 	/* replicate the flags */
917 	sp->pr_flags = sp->pr_lwp.pr_flags;
918 }
919 
920 #ifdef _SYSCALL32_IMPL
921 void
prgetlwpstatus32(kthread_t * t,lwpstatus32_t * sp,zone_t * zp)922 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
923 {
924 	proc_t *p = ttoproc(t);
925 	klwp_t *lwp = ttolwp(t);
926 	struct mstate *ms = &lwp->lwp_mstate;
927 	hrtime_t usr, sys;
928 	int flags;
929 	ulong_t instr;
930 
931 	ASSERT(MUTEX_HELD(&p->p_lock));
932 
933 	bzero(sp, sizeof (*sp));
934 	flags = 0L;
935 	if (t->t_state == TS_STOPPED) {
936 		flags |= PR_STOPPED;
937 		if ((t->t_schedflag & TS_PSTART) == 0)
938 			flags |= PR_ISTOP;
939 	} else if (VSTOPPED(t)) {
940 		flags |= PR_STOPPED|PR_ISTOP;
941 	}
942 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
943 		flags |= PR_DSTOP;
944 	if (lwp->lwp_asleep)
945 		flags |= PR_ASLEEP;
946 	if (t == p->p_agenttp)
947 		flags |= PR_AGENT;
948 	if (!(t->t_proc_flag & TP_TWAIT))
949 		flags |= PR_DETACH;
950 	if (t->t_proc_flag & TP_DAEMON)
951 		flags |= PR_DAEMON;
952 	if (p->p_proc_flag & P_PR_FORK)
953 		flags |= PR_FORK;
954 	if (p->p_proc_flag & P_PR_RUNLCL)
955 		flags |= PR_RLC;
956 	if (p->p_proc_flag & P_PR_KILLCL)
957 		flags |= PR_KLC;
958 	if (p->p_proc_flag & P_PR_ASYNC)
959 		flags |= PR_ASYNC;
960 	if (p->p_proc_flag & P_PR_BPTADJ)
961 		flags |= PR_BPTADJ;
962 	if (p->p_proc_flag & P_PR_PTRACE)
963 		flags |= PR_PTRACE;
964 	if (p->p_flag & SMSACCT)
965 		flags |= PR_MSACCT;
966 	if (p->p_flag & SMSFORK)
967 		flags |= PR_MSFORK;
968 	if (p->p_flag & SVFWAIT)
969 		flags |= PR_VFORKP;
970 	sp->pr_flags = flags;
971 	if (VSTOPPED(t)) {
972 		sp->pr_why   = PR_REQUESTED;
973 		sp->pr_what  = 0;
974 	} else {
975 		sp->pr_why   = t->t_whystop;
976 		sp->pr_what  = t->t_whatstop;
977 	}
978 	sp->pr_lwpid = t->t_tid;
979 	sp->pr_cursig  = lwp->lwp_cursig;
980 	prassignset(&sp->pr_lwppend, &t->t_sig);
981 	schedctl_finish_sigblock(t);
982 	prassignset(&sp->pr_lwphold, &t->t_hold);
983 	if (t->t_whystop == PR_FAULTED) {
984 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
985 		if (t->t_whatstop == FLTPAGE)
986 			sp->pr_info.si_addr =
987 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
988 	} else if (lwp->lwp_curinfo)
989 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
990 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
991 	    sp->pr_info.si_zoneid != zp->zone_id) {
992 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
993 		sp->pr_info.si_uid = 0;
994 		sp->pr_info.si_ctid = -1;
995 		sp->pr_info.si_zoneid = zp->zone_id;
996 	}
997 	sp->pr_altstack.ss_sp =
998 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
999 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1000 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1001 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1002 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1003 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1004 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1005 	    sizeof (sp->pr_clname) - 1);
1006 	if (flags & PR_STOPPED)
1007 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1008 	usr = ms->ms_acct[LMS_USER];
1009 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1010 	scalehrtime(&usr);
1011 	scalehrtime(&sys);
1012 	hrt2ts32(usr, &sp->pr_utime);
1013 	hrt2ts32(sys, &sp->pr_stime);
1014 
1015 	/*
1016 	 * Fetch the current instruction, if not a system process.
1017 	 * We don't attempt this unless the lwp is stopped.
1018 	 */
1019 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1020 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1021 	else if (!(flags & PR_STOPPED))
1022 		sp->pr_flags |= PR_PCINVAL;
1023 	else if (!prfetchinstr(lwp, &instr))
1024 		sp->pr_flags |= PR_PCINVAL;
1025 	else
1026 		sp->pr_instr = (uint32_t)instr;
1027 
1028 	/*
1029 	 * Drop p_lock while touching the lwp's stack.
1030 	 */
1031 	mutex_exit(&p->p_lock);
1032 	if (prisstep(lwp))
1033 		sp->pr_flags |= PR_STEP;
1034 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1035 		int i;
1036 
1037 		sp->pr_syscall = get_syscall32_args(lwp,
1038 		    (int *)sp->pr_sysarg, &i);
1039 		sp->pr_nsysarg = (ushort_t)i;
1040 	}
1041 	if ((flags & PR_STOPPED) || t == curthread)
1042 		prgetprregs32(lwp, sp->pr_reg);
1043 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1044 	    (flags & PR_VFORKP)) {
1045 		long r1, r2;
1046 		user_t *up;
1047 		auxv_t *auxp;
1048 		int i;
1049 
1050 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1051 		if (sp->pr_errno == 0) {
1052 			sp->pr_rval1 = (int32_t)r1;
1053 			sp->pr_rval2 = (int32_t)r2;
1054 			sp->pr_errpriv = PRIV_NONE;
1055 		} else
1056 			sp->pr_errpriv = lwp->lwp_badpriv;
1057 
1058 		if (t->t_sysnum == SYS_execve) {
1059 			up = PTOU(p);
1060 			sp->pr_sysarg[0] = 0;
1061 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1062 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1063 			for (i = 0, auxp = up->u_auxv;
1064 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1065 			    i++, auxp++) {
1066 				if (auxp->a_type == AT_SUN_EXECNAME) {
1067 					sp->pr_sysarg[0] =
1068 					    (caddr32_t)
1069 					    (uintptr_t)auxp->a_un.a_ptr;
1070 					break;
1071 				}
1072 			}
1073 		}
1074 	}
1075 	if (prhasfp())
1076 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1077 	mutex_enter(&p->p_lock);
1078 }
1079 
1080 void
prgetstatus32(proc_t * p,pstatus32_t * sp,zone_t * zp)1081 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1082 {
1083 	kthread_t *t;
1084 
1085 	ASSERT(MUTEX_HELD(&p->p_lock));
1086 
1087 	t = prchoose(p);	/* returns locked thread */
1088 	ASSERT(t != NULL);
1089 	thread_unlock(t);
1090 
1091 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1092 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1093 	sp->pr_nlwp = p->p_lwpcnt;
1094 	sp->pr_nzomb = p->p_zombcnt;
1095 	prassignset(&sp->pr_sigpend, &p->p_sig);
1096 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1097 	sp->pr_brksize = (uint32_t)p->p_brksize;
1098 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1099 	sp->pr_stksize = (uint32_t)p->p_stksize;
1100 	sp->pr_pid   = p->p_pid;
1101 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1102 	    (p->p_flag & SZONETOP)) {
1103 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1104 		/*
1105 		 * Inside local zones, fake zsched's pid as parent pids for
1106 		 * processes which reference processes outside of the zone.
1107 		 */
1108 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1109 	} else {
1110 		sp->pr_ppid = p->p_ppid;
1111 	}
1112 	sp->pr_pgid  = p->p_pgrp;
1113 	sp->pr_sid   = p->p_sessp->s_sid;
1114 	sp->pr_taskid = p->p_task->tk_tkid;
1115 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1116 	sp->pr_zoneid = p->p_zone->zone_id;
1117 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1118 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1119 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1120 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1121 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1122 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1123 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1124 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1125 	switch (p->p_model) {
1126 	case DATAMODEL_ILP32:
1127 		sp->pr_dmodel = PR_MODEL_ILP32;
1128 		break;
1129 	case DATAMODEL_LP64:
1130 		sp->pr_dmodel = PR_MODEL_LP64;
1131 		break;
1132 	}
1133 	if (p->p_agenttp)
1134 		sp->pr_agentid = p->p_agenttp->t_tid;
1135 
1136 	/* get the chosen lwp's status */
1137 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1138 
1139 	/* replicate the flags */
1140 	sp->pr_flags = sp->pr_lwp.pr_flags;
1141 }
1142 #endif	/* _SYSCALL32_IMPL */
1143 
1144 /*
1145  * Return lwp status.
1146  */
1147 void
prgetlwpstatus(kthread_t * t,lwpstatus_t * sp,zone_t * zp)1148 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1149 {
1150 	proc_t *p = ttoproc(t);
1151 	klwp_t *lwp = ttolwp(t);
1152 	struct mstate *ms = &lwp->lwp_mstate;
1153 	hrtime_t usr, sys;
1154 	int flags;
1155 	ulong_t instr;
1156 
1157 	ASSERT(MUTEX_HELD(&p->p_lock));
1158 
1159 	bzero(sp, sizeof (*sp));
1160 	flags = 0L;
1161 	if (t->t_state == TS_STOPPED) {
1162 		flags |= PR_STOPPED;
1163 		if ((t->t_schedflag & TS_PSTART) == 0)
1164 			flags |= PR_ISTOP;
1165 	} else if (VSTOPPED(t)) {
1166 		flags |= PR_STOPPED|PR_ISTOP;
1167 	}
1168 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1169 		flags |= PR_DSTOP;
1170 	if (lwp->lwp_asleep)
1171 		flags |= PR_ASLEEP;
1172 	if (t == p->p_agenttp)
1173 		flags |= PR_AGENT;
1174 	if (!(t->t_proc_flag & TP_TWAIT))
1175 		flags |= PR_DETACH;
1176 	if (t->t_proc_flag & TP_DAEMON)
1177 		flags |= PR_DAEMON;
1178 	if (p->p_proc_flag & P_PR_FORK)
1179 		flags |= PR_FORK;
1180 	if (p->p_proc_flag & P_PR_RUNLCL)
1181 		flags |= PR_RLC;
1182 	if (p->p_proc_flag & P_PR_KILLCL)
1183 		flags |= PR_KLC;
1184 	if (p->p_proc_flag & P_PR_ASYNC)
1185 		flags |= PR_ASYNC;
1186 	if (p->p_proc_flag & P_PR_BPTADJ)
1187 		flags |= PR_BPTADJ;
1188 	if (p->p_proc_flag & P_PR_PTRACE)
1189 		flags |= PR_PTRACE;
1190 	if (p->p_flag & SMSACCT)
1191 		flags |= PR_MSACCT;
1192 	if (p->p_flag & SMSFORK)
1193 		flags |= PR_MSFORK;
1194 	if (p->p_flag & SVFWAIT)
1195 		flags |= PR_VFORKP;
1196 	if (p->p_pgidp->pid_pgorphaned)
1197 		flags |= PR_ORPHAN;
1198 	if (p->p_pidflag & CLDNOSIGCHLD)
1199 		flags |= PR_NOSIGCHLD;
1200 	if (p->p_pidflag & CLDWAITPID)
1201 		flags |= PR_WAITPID;
1202 	sp->pr_flags = flags;
1203 	if (VSTOPPED(t)) {
1204 		sp->pr_why   = PR_REQUESTED;
1205 		sp->pr_what  = 0;
1206 	} else {
1207 		sp->pr_why   = t->t_whystop;
1208 		sp->pr_what  = t->t_whatstop;
1209 	}
1210 	sp->pr_lwpid = t->t_tid;
1211 	sp->pr_cursig  = lwp->lwp_cursig;
1212 	prassignset(&sp->pr_lwppend, &t->t_sig);
1213 	schedctl_finish_sigblock(t);
1214 	prassignset(&sp->pr_lwphold, &t->t_hold);
1215 	if (t->t_whystop == PR_FAULTED)
1216 		bcopy(&lwp->lwp_siginfo,
1217 		    &sp->pr_info, sizeof (k_siginfo_t));
1218 	else if (lwp->lwp_curinfo)
1219 		bcopy(&lwp->lwp_curinfo->sq_info,
1220 		    &sp->pr_info, sizeof (k_siginfo_t));
1221 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1222 	    sp->pr_info.si_zoneid != zp->zone_id) {
1223 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1224 		sp->pr_info.si_uid = 0;
1225 		sp->pr_info.si_ctid = -1;
1226 		sp->pr_info.si_zoneid = zp->zone_id;
1227 	}
1228 	sp->pr_altstack = lwp->lwp_sigaltstack;
1229 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1230 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1231 	sp->pr_ustack = lwp->lwp_ustack;
1232 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1233 	    sizeof (sp->pr_clname) - 1);
1234 	if (flags & PR_STOPPED)
1235 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1236 	usr = ms->ms_acct[LMS_USER];
1237 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1238 	scalehrtime(&usr);
1239 	scalehrtime(&sys);
1240 	hrt2ts(usr, &sp->pr_utime);
1241 	hrt2ts(sys, &sp->pr_stime);
1242 
1243 	/*
1244 	 * Fetch the current instruction, if not a system process.
1245 	 * We don't attempt this unless the lwp is stopped.
1246 	 */
1247 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1248 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1249 	else if (!(flags & PR_STOPPED))
1250 		sp->pr_flags |= PR_PCINVAL;
1251 	else if (!prfetchinstr(lwp, &instr))
1252 		sp->pr_flags |= PR_PCINVAL;
1253 	else
1254 		sp->pr_instr = instr;
1255 
1256 	/*
1257 	 * Drop p_lock while touching the lwp's stack.
1258 	 */
1259 	mutex_exit(&p->p_lock);
1260 	if (prisstep(lwp))
1261 		sp->pr_flags |= PR_STEP;
1262 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1263 		int i;
1264 
1265 		sp->pr_syscall = get_syscall_args(lwp,
1266 		    (long *)sp->pr_sysarg, &i);
1267 		sp->pr_nsysarg = (ushort_t)i;
1268 	}
1269 	if ((flags & PR_STOPPED) || t == curthread)
1270 		prgetprregs(lwp, sp->pr_reg);
1271 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1272 	    (flags & PR_VFORKP)) {
1273 		user_t *up;
1274 		auxv_t *auxp;
1275 		int i;
1276 
1277 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1278 		if (sp->pr_errno == 0)
1279 			sp->pr_errpriv = PRIV_NONE;
1280 		else
1281 			sp->pr_errpriv = lwp->lwp_badpriv;
1282 
1283 		if (t->t_sysnum == SYS_execve) {
1284 			up = PTOU(p);
1285 			sp->pr_sysarg[0] = 0;
1286 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1287 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1288 			for (i = 0, auxp = up->u_auxv;
1289 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1290 			    i++, auxp++) {
1291 				if (auxp->a_type == AT_SUN_EXECNAME) {
1292 					sp->pr_sysarg[0] =
1293 					    (uintptr_t)auxp->a_un.a_ptr;
1294 					break;
1295 				}
1296 			}
1297 		}
1298 	}
1299 	if (prhasfp())
1300 		prgetprfpregs(lwp, &sp->pr_fpreg);
1301 	mutex_enter(&p->p_lock);
1302 }
1303 
1304 /*
1305  * Get the sigaction structure for the specified signal.  The u-block
1306  * must already have been mapped in by the caller.
1307  */
1308 void
prgetaction(proc_t * p,user_t * up,uint_t sig,struct sigaction * sp)1309 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1310 {
1311 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1312 
1313 	bzero(sp, sizeof (*sp));
1314 
1315 	if (sig != 0 && (unsigned)sig < nsig) {
1316 		sp->sa_handler = up->u_signal[sig-1];
1317 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1318 		if (sigismember(&up->u_sigonstack, sig))
1319 			sp->sa_flags |= SA_ONSTACK;
1320 		if (sigismember(&up->u_sigresethand, sig))
1321 			sp->sa_flags |= SA_RESETHAND;
1322 		if (sigismember(&up->u_sigrestart, sig))
1323 			sp->sa_flags |= SA_RESTART;
1324 		if (sigismember(&p->p_siginfo, sig))
1325 			sp->sa_flags |= SA_SIGINFO;
1326 		if (sigismember(&up->u_signodefer, sig))
1327 			sp->sa_flags |= SA_NODEFER;
1328 		if (sig == SIGCLD) {
1329 			if (p->p_flag & SNOWAIT)
1330 				sp->sa_flags |= SA_NOCLDWAIT;
1331 			if ((p->p_flag & SJCTL) == 0)
1332 				sp->sa_flags |= SA_NOCLDSTOP;
1333 		}
1334 	}
1335 }
1336 
1337 #ifdef _SYSCALL32_IMPL
1338 void
prgetaction32(proc_t * p,user_t * up,uint_t sig,struct sigaction32 * sp)1339 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1340 {
1341 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1342 
1343 	bzero(sp, sizeof (*sp));
1344 
1345 	if (sig != 0 && (unsigned)sig < nsig) {
1346 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1347 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1348 		if (sigismember(&up->u_sigonstack, sig))
1349 			sp->sa_flags |= SA_ONSTACK;
1350 		if (sigismember(&up->u_sigresethand, sig))
1351 			sp->sa_flags |= SA_RESETHAND;
1352 		if (sigismember(&up->u_sigrestart, sig))
1353 			sp->sa_flags |= SA_RESTART;
1354 		if (sigismember(&p->p_siginfo, sig))
1355 			sp->sa_flags |= SA_SIGINFO;
1356 		if (sigismember(&up->u_signodefer, sig))
1357 			sp->sa_flags |= SA_NODEFER;
1358 		if (sig == SIGCLD) {
1359 			if (p->p_flag & SNOWAIT)
1360 				sp->sa_flags |= SA_NOCLDWAIT;
1361 			if ((p->p_flag & SJCTL) == 0)
1362 				sp->sa_flags |= SA_NOCLDSTOP;
1363 		}
1364 	}
1365 }
1366 #endif	/* _SYSCALL32_IMPL */
1367 
1368 /*
1369  * Count the number of segments in this process's address space.
1370  */
1371 int
prnsegs(struct as * as,int reserved)1372 prnsegs(struct as *as, int reserved)
1373 {
1374 	int n = 0;
1375 	struct seg *seg;
1376 
1377 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1378 
1379 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1380 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1381 		caddr_t saddr, naddr;
1382 		void *tmp = NULL;
1383 
1384 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1385 			(void) pr_getprot(seg, reserved, &tmp,
1386 			    &saddr, &naddr, eaddr);
1387 			if (saddr != naddr)
1388 				n++;
1389 		}
1390 
1391 		ASSERT(tmp == NULL);
1392 	}
1393 
1394 	return (n);
1395 }
1396 
1397 /*
1398  * Convert uint32_t to decimal string w/o leading zeros.
1399  * Add trailing null characters if 'len' is greater than string length.
1400  * Return the string length.
1401  */
1402 int
pr_u32tos(uint32_t n,char * s,int len)1403 pr_u32tos(uint32_t n, char *s, int len)
1404 {
1405 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1406 	char *cp = cbuf;
1407 	char *end = s + len;
1408 
1409 	do {
1410 		*cp++ = (char)(n % 10 + '0');
1411 		n /= 10;
1412 	} while (n);
1413 
1414 	len = (int)(cp - cbuf);
1415 
1416 	do {
1417 		*s++ = *--cp;
1418 	} while (cp > cbuf);
1419 
1420 	while (s < end)		/* optional pad */
1421 		*s++ = '\0';
1422 
1423 	return (len);
1424 }
1425 
1426 /*
1427  * Convert uint64_t to decimal string w/o leading zeros.
1428  * Return the string length.
1429  */
1430 static int
pr_u64tos(uint64_t n,char * s)1431 pr_u64tos(uint64_t n, char *s)
1432 {
1433 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1434 	char *cp = cbuf;
1435 	int len;
1436 
1437 	do {
1438 		*cp++ = (char)(n % 10 + '0');
1439 		n /= 10;
1440 	} while (n);
1441 
1442 	len = (int)(cp - cbuf);
1443 
1444 	do {
1445 		*s++ = *--cp;
1446 	} while (cp > cbuf);
1447 
1448 	return (len);
1449 }
1450 
1451 void
pr_object_name(char * name,vnode_t * vp,struct vattr * vattr)1452 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1453 {
1454 	char *s = name;
1455 	struct vfs *vfsp;
1456 	struct vfssw *vfsswp;
1457 
1458 	if ((vfsp = vp->v_vfsp) != NULL &&
1459 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1460 	    *vfsswp->vsw_name) {
1461 		(void) strcpy(s, vfsswp->vsw_name);
1462 		s += strlen(s);
1463 		*s++ = '.';
1464 	}
1465 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1466 	*s++ = '.';
1467 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1468 	*s++ = '.';
1469 	s += pr_u64tos(vattr->va_nodeid, s);
1470 	*s++ = '\0';
1471 }
1472 
1473 struct seg *
break_seg(proc_t * p)1474 break_seg(proc_t *p)
1475 {
1476 	caddr_t addr = p->p_brkbase;
1477 	struct seg *seg;
1478 	struct vnode *vp;
1479 
1480 	if (p->p_brksize != 0)
1481 		addr += p->p_brksize - 1;
1482 	seg = as_segat(p->p_as, addr);
1483 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1484 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1485 		return (seg);
1486 	return (NULL);
1487 }
1488 
1489 /*
1490  * Implementation of service functions to handle procfs generic chained
1491  * copyout buffers.
1492  */
1493 typedef struct pr_iobuf_list {
1494 	list_node_t	piol_link;	/* buffer linkage */
1495 	size_t		piol_size;	/* total size (header + data) */
1496 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1497 } piol_t;
1498 
1499 #define	MAPSIZE	(64 * 1024)
1500 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1501 
1502 void
pr_iol_initlist(list_t * iolhead,size_t itemsize,int n)1503 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1504 {
1505 	piol_t	*iol;
1506 	size_t	initial_size = MIN(1, n) * itemsize;
1507 
1508 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1509 
1510 	ASSERT(list_head(iolhead) == NULL);
1511 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1512 	ASSERT(initial_size > 0);
1513 
1514 	/*
1515 	 * Someone creating chained copyout buffers may ask for less than
1516 	 * MAPSIZE if the amount of data to be buffered is known to be
1517 	 * smaller than that.
1518 	 * But in order to prevent involuntary self-denial of service,
1519 	 * the requested input size is clamped at MAPSIZE.
1520 	 */
1521 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1522 	iol = kmem_alloc(initial_size, KM_SLEEP);
1523 	list_insert_head(iolhead, iol);
1524 	iol->piol_usedsize = 0;
1525 	iol->piol_size = initial_size;
1526 }
1527 
1528 void *
pr_iol_newbuf(list_t * iolhead,size_t itemsize)1529 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1530 {
1531 	piol_t	*iol;
1532 	char	*new;
1533 
1534 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1535 	ASSERT(list_head(iolhead) != NULL);
1536 
1537 	iol = (piol_t *)list_tail(iolhead);
1538 
1539 	if (iol->piol_size <
1540 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1541 		/*
1542 		 * Out of space in the current buffer. Allocate more.
1543 		 */
1544 		piol_t *newiol;
1545 
1546 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1547 		newiol->piol_size = MAPSIZE;
1548 		newiol->piol_usedsize = 0;
1549 
1550 		list_insert_after(iolhead, iol, newiol);
1551 		iol = list_next(iolhead, iol);
1552 		ASSERT(iol == newiol);
1553 	}
1554 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1555 	iol->piol_usedsize += itemsize;
1556 	bzero(new, itemsize);
1557 	return (new);
1558 }
1559 
1560 int
pr_iol_copyout_and_free(list_t * iolhead,caddr_t * tgt,int errin)1561 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1562 {
1563 	int error = errin;
1564 	piol_t	*iol;
1565 
1566 	while ((iol = list_head(iolhead)) != NULL) {
1567 		list_remove(iolhead, iol);
1568 		if (!error) {
1569 			if (copyout(PIOL_DATABUF(iol), *tgt,
1570 			    iol->piol_usedsize))
1571 				error = EFAULT;
1572 			*tgt += iol->piol_usedsize;
1573 		}
1574 		kmem_free(iol, iol->piol_size);
1575 	}
1576 	list_destroy(iolhead);
1577 
1578 	return (error);
1579 }
1580 
1581 int
pr_iol_uiomove_and_free(list_t * iolhead,uio_t * uiop,int errin)1582 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1583 {
1584 	offset_t	off = uiop->uio_offset;
1585 	char		*base;
1586 	size_t		size;
1587 	piol_t		*iol;
1588 	int		error = errin;
1589 
1590 	while ((iol = list_head(iolhead)) != NULL) {
1591 		list_remove(iolhead, iol);
1592 		base = PIOL_DATABUF(iol);
1593 		size = iol->piol_usedsize;
1594 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1595 			error = uiomove(base + off, size - off,
1596 			    UIO_READ, uiop);
1597 		off = MAX(0, off - (offset_t)size);
1598 		kmem_free(iol, iol->piol_size);
1599 	}
1600 	list_destroy(iolhead);
1601 
1602 	return (error);
1603 }
1604 
1605 /*
1606  * Return an array of structures with memory map information.
1607  * We allocate here; the caller must deallocate.
1608  */
1609 int
prgetmap(proc_t * p,int reserved,list_t * iolhead)1610 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1611 {
1612 	struct as *as = p->p_as;
1613 	prmap_t *mp;
1614 	struct seg *seg;
1615 	struct seg *brkseg, *stkseg;
1616 	struct vnode *vp;
1617 	struct vattr vattr;
1618 	uint_t prot;
1619 
1620 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1621 
1622 	/*
1623 	 * Request an initial buffer size that doesn't waste memory
1624 	 * if the address space has only a small number of segments.
1625 	 */
1626 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1627 
1628 	if ((seg = AS_SEGFIRST(as)) == NULL)
1629 		return (0);
1630 
1631 	brkseg = break_seg(p);
1632 	stkseg = as_segat(as, prgetstackbase(p));
1633 
1634 	do {
1635 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1636 		caddr_t saddr, naddr;
1637 		void *tmp = NULL;
1638 
1639 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1640 			prot = pr_getprot(seg, reserved, &tmp,
1641 			    &saddr, &naddr, eaddr);
1642 			if (saddr == naddr)
1643 				continue;
1644 
1645 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1646 
1647 			mp->pr_vaddr = (uintptr_t)saddr;
1648 			mp->pr_size = naddr - saddr;
1649 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1650 			mp->pr_mflags = 0;
1651 			if (prot & PROT_READ)
1652 				mp->pr_mflags |= MA_READ;
1653 			if (prot & PROT_WRITE)
1654 				mp->pr_mflags |= MA_WRITE;
1655 			if (prot & PROT_EXEC)
1656 				mp->pr_mflags |= MA_EXEC;
1657 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1658 				mp->pr_mflags |= MA_SHARED;
1659 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1660 				mp->pr_mflags |= MA_NORESERVE;
1661 			if (seg->s_ops == &segspt_shmops ||
1662 			    (seg->s_ops == &segvn_ops &&
1663 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1664 				mp->pr_mflags |= MA_ANON;
1665 			if (seg == brkseg)
1666 				mp->pr_mflags |= MA_BREAK;
1667 			else if (seg == stkseg) {
1668 				mp->pr_mflags |= MA_STACK;
1669 				if (reserved) {
1670 					size_t maxstack =
1671 					    ((size_t)p->p_stk_ctl +
1672 					    PAGEOFFSET) & PAGEMASK;
1673 					mp->pr_vaddr =
1674 					    (uintptr_t)prgetstackbase(p) +
1675 					    p->p_stksize - maxstack;
1676 					mp->pr_size = (uintptr_t)naddr -
1677 					    mp->pr_vaddr;
1678 				}
1679 			}
1680 			if (seg->s_ops == &segspt_shmops)
1681 				mp->pr_mflags |= MA_ISM | MA_SHM;
1682 			mp->pr_pagesize = PAGESIZE;
1683 
1684 			/*
1685 			 * Manufacture a filename for the "object" directory.
1686 			 */
1687 			vattr.va_mask = AT_FSID|AT_NODEID;
1688 			if (seg->s_ops == &segvn_ops &&
1689 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1690 			    vp != NULL && vp->v_type == VREG &&
1691 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1692 				if (vp == p->p_exec)
1693 					(void) strcpy(mp->pr_mapname, "a.out");
1694 				else
1695 					pr_object_name(mp->pr_mapname,
1696 					    vp, &vattr);
1697 			}
1698 
1699 			/*
1700 			 * Get the SysV shared memory id, if any.
1701 			 */
1702 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1703 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1704 			    SHMID_NONE) {
1705 				if (mp->pr_shmid == SHMID_FREE)
1706 					mp->pr_shmid = -1;
1707 
1708 				mp->pr_mflags |= MA_SHM;
1709 			} else {
1710 				mp->pr_shmid = -1;
1711 			}
1712 		}
1713 		ASSERT(tmp == NULL);
1714 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1715 
1716 	return (0);
1717 }
1718 
1719 #ifdef _SYSCALL32_IMPL
1720 int
prgetmap32(proc_t * p,int reserved,list_t * iolhead)1721 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1722 {
1723 	struct as *as = p->p_as;
1724 	prmap32_t *mp;
1725 	struct seg *seg;
1726 	struct seg *brkseg, *stkseg;
1727 	struct vnode *vp;
1728 	struct vattr vattr;
1729 	uint_t prot;
1730 
1731 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1732 
1733 	/*
1734 	 * Request an initial buffer size that doesn't waste memory
1735 	 * if the address space has only a small number of segments.
1736 	 */
1737 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1738 
1739 	if ((seg = AS_SEGFIRST(as)) == NULL)
1740 		return (0);
1741 
1742 	brkseg = break_seg(p);
1743 	stkseg = as_segat(as, prgetstackbase(p));
1744 
1745 	do {
1746 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1747 		caddr_t saddr, naddr;
1748 		void *tmp = NULL;
1749 
1750 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1751 			prot = pr_getprot(seg, reserved, &tmp,
1752 			    &saddr, &naddr, eaddr);
1753 			if (saddr == naddr)
1754 				continue;
1755 
1756 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1757 
1758 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1759 			mp->pr_size = (size32_t)(naddr - saddr);
1760 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1761 			mp->pr_mflags = 0;
1762 			if (prot & PROT_READ)
1763 				mp->pr_mflags |= MA_READ;
1764 			if (prot & PROT_WRITE)
1765 				mp->pr_mflags |= MA_WRITE;
1766 			if (prot & PROT_EXEC)
1767 				mp->pr_mflags |= MA_EXEC;
1768 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1769 				mp->pr_mflags |= MA_SHARED;
1770 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1771 				mp->pr_mflags |= MA_NORESERVE;
1772 			if (seg->s_ops == &segspt_shmops ||
1773 			    (seg->s_ops == &segvn_ops &&
1774 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1775 				mp->pr_mflags |= MA_ANON;
1776 			if (seg == brkseg)
1777 				mp->pr_mflags |= MA_BREAK;
1778 			else if (seg == stkseg) {
1779 				mp->pr_mflags |= MA_STACK;
1780 				if (reserved) {
1781 					size_t maxstack =
1782 					    ((size_t)p->p_stk_ctl +
1783 					    PAGEOFFSET) & PAGEMASK;
1784 					uintptr_t vaddr =
1785 					    (uintptr_t)prgetstackbase(p) +
1786 					    p->p_stksize - maxstack;
1787 					mp->pr_vaddr = (caddr32_t)vaddr;
1788 					mp->pr_size = (size32_t)
1789 					    ((uintptr_t)naddr - vaddr);
1790 				}
1791 			}
1792 			if (seg->s_ops == &segspt_shmops)
1793 				mp->pr_mflags |= MA_ISM | MA_SHM;
1794 			mp->pr_pagesize = PAGESIZE;
1795 
1796 			/*
1797 			 * Manufacture a filename for the "object" directory.
1798 			 */
1799 			vattr.va_mask = AT_FSID|AT_NODEID;
1800 			if (seg->s_ops == &segvn_ops &&
1801 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1802 			    vp != NULL && vp->v_type == VREG &&
1803 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1804 				if (vp == p->p_exec)
1805 					(void) strcpy(mp->pr_mapname, "a.out");
1806 				else
1807 					pr_object_name(mp->pr_mapname,
1808 					    vp, &vattr);
1809 			}
1810 
1811 			/*
1812 			 * Get the SysV shared memory id, if any.
1813 			 */
1814 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1815 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1816 			    SHMID_NONE) {
1817 				if (mp->pr_shmid == SHMID_FREE)
1818 					mp->pr_shmid = -1;
1819 
1820 				mp->pr_mflags |= MA_SHM;
1821 			} else {
1822 				mp->pr_shmid = -1;
1823 			}
1824 		}
1825 		ASSERT(tmp == NULL);
1826 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1827 
1828 	return (0);
1829 }
1830 #endif	/* _SYSCALL32_IMPL */
1831 
1832 /*
1833  * Return the size of the /proc page data file.
1834  */
1835 size_t
prpdsize(struct as * as)1836 prpdsize(struct as *as)
1837 {
1838 	struct seg *seg;
1839 	size_t size;
1840 
1841 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1842 
1843 	if ((seg = AS_SEGFIRST(as)) == NULL)
1844 		return (0);
1845 
1846 	size = sizeof (prpageheader_t);
1847 	do {
1848 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1849 		caddr_t saddr, naddr;
1850 		void *tmp = NULL;
1851 		size_t npage;
1852 
1853 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1854 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1855 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1856 				size += sizeof (prasmap_t) + round8(npage);
1857 		}
1858 		ASSERT(tmp == NULL);
1859 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1860 
1861 	return (size);
1862 }
1863 
1864 #ifdef _SYSCALL32_IMPL
1865 size_t
prpdsize32(struct as * as)1866 prpdsize32(struct as *as)
1867 {
1868 	struct seg *seg;
1869 	size_t size;
1870 
1871 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1872 
1873 	if ((seg = AS_SEGFIRST(as)) == NULL)
1874 		return (0);
1875 
1876 	size = sizeof (prpageheader32_t);
1877 	do {
1878 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1879 		caddr_t saddr, naddr;
1880 		void *tmp = NULL;
1881 		size_t npage;
1882 
1883 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1884 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1885 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1886 				size += sizeof (prasmap32_t) + round8(npage);
1887 		}
1888 		ASSERT(tmp == NULL);
1889 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1890 
1891 	return (size);
1892 }
1893 #endif	/* _SYSCALL32_IMPL */
1894 
1895 /*
1896  * Read page data information.
1897  */
1898 int
prpdread(proc_t * p,uint_t hatid,struct uio * uiop)1899 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1900 {
1901 	struct as *as = p->p_as;
1902 	caddr_t buf;
1903 	size_t size;
1904 	prpageheader_t *php;
1905 	prasmap_t *pmp;
1906 	struct seg *seg;
1907 	int error;
1908 
1909 again:
1910 	AS_LOCK_ENTER(as, RW_WRITER);
1911 
1912 	if ((seg = AS_SEGFIRST(as)) == NULL) {
1913 		AS_LOCK_EXIT(as);
1914 		return (0);
1915 	}
1916 	size = prpdsize(as);
1917 	if (uiop->uio_resid < size) {
1918 		AS_LOCK_EXIT(as);
1919 		return (E2BIG);
1920 	}
1921 
1922 	buf = kmem_zalloc(size, KM_SLEEP);
1923 	php = (prpageheader_t *)buf;
1924 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1925 
1926 	hrt2ts(gethrtime(), &php->pr_tstamp);
1927 	php->pr_nmap = 0;
1928 	php->pr_npage = 0;
1929 	do {
1930 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1931 		caddr_t saddr, naddr;
1932 		void *tmp = NULL;
1933 
1934 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1935 			struct vnode *vp;
1936 			struct vattr vattr;
1937 			size_t len;
1938 			size_t npage;
1939 			uint_t prot;
1940 			uintptr_t next;
1941 
1942 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1943 			if ((len = (size_t)(naddr - saddr)) == 0)
1944 				continue;
1945 			npage = len / PAGESIZE;
1946 			next = (uintptr_t)(pmp + 1) + round8(npage);
1947 			/*
1948 			 * It's possible that the address space can change
1949 			 * subtlely even though we're holding as->a_lock
1950 			 * due to the nondeterminism of page_exists() in
1951 			 * the presence of asychronously flushed pages or
1952 			 * mapped files whose sizes are changing.
1953 			 * page_exists() may be called indirectly from
1954 			 * pr_getprot() by a SEGOP_INCORE() routine.
1955 			 * If this happens we need to make sure we don't
1956 			 * overrun the buffer whose size we computed based
1957 			 * on the initial iteration through the segments.
1958 			 * Once we've detected an overflow, we need to clean
1959 			 * up the temporary memory allocated in pr_getprot()
1960 			 * and retry. If there's a pending signal, we return
1961 			 * EINTR so that this thread can be dislodged if
1962 			 * a latent bug causes us to spin indefinitely.
1963 			 */
1964 			if (next > (uintptr_t)buf + size) {
1965 				pr_getprot_done(&tmp);
1966 				AS_LOCK_EXIT(as);
1967 
1968 				kmem_free(buf, size);
1969 
1970 				if (ISSIG(curthread, JUSTLOOKING))
1971 					return (EINTR);
1972 
1973 				goto again;
1974 			}
1975 
1976 			php->pr_nmap++;
1977 			php->pr_npage += npage;
1978 			pmp->pr_vaddr = (uintptr_t)saddr;
1979 			pmp->pr_npage = npage;
1980 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1981 			pmp->pr_mflags = 0;
1982 			if (prot & PROT_READ)
1983 				pmp->pr_mflags |= MA_READ;
1984 			if (prot & PROT_WRITE)
1985 				pmp->pr_mflags |= MA_WRITE;
1986 			if (prot & PROT_EXEC)
1987 				pmp->pr_mflags |= MA_EXEC;
1988 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1989 				pmp->pr_mflags |= MA_SHARED;
1990 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1991 				pmp->pr_mflags |= MA_NORESERVE;
1992 			if (seg->s_ops == &segspt_shmops ||
1993 			    (seg->s_ops == &segvn_ops &&
1994 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1995 				pmp->pr_mflags |= MA_ANON;
1996 			if (seg->s_ops == &segspt_shmops)
1997 				pmp->pr_mflags |= MA_ISM | MA_SHM;
1998 			pmp->pr_pagesize = PAGESIZE;
1999 			/*
2000 			 * Manufacture a filename for the "object" directory.
2001 			 */
2002 			vattr.va_mask = AT_FSID|AT_NODEID;
2003 			if (seg->s_ops == &segvn_ops &&
2004 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2005 			    vp != NULL && vp->v_type == VREG &&
2006 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2007 				if (vp == p->p_exec)
2008 					(void) strcpy(pmp->pr_mapname, "a.out");
2009 				else
2010 					pr_object_name(pmp->pr_mapname,
2011 					    vp, &vattr);
2012 			}
2013 
2014 			/*
2015 			 * Get the SysV shared memory id, if any.
2016 			 */
2017 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2018 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2019 			    SHMID_NONE) {
2020 				if (pmp->pr_shmid == SHMID_FREE)
2021 					pmp->pr_shmid = -1;
2022 
2023 				pmp->pr_mflags |= MA_SHM;
2024 			} else {
2025 				pmp->pr_shmid = -1;
2026 			}
2027 
2028 			hat_getstat(as, saddr, len, hatid,
2029 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2030 			pmp = (prasmap_t *)next;
2031 		}
2032 		ASSERT(tmp == NULL);
2033 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2034 
2035 	AS_LOCK_EXIT(as);
2036 
2037 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2038 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2039 	kmem_free(buf, size);
2040 
2041 	return (error);
2042 }
2043 
2044 #ifdef _SYSCALL32_IMPL
2045 int
prpdread32(proc_t * p,uint_t hatid,struct uio * uiop)2046 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2047 {
2048 	struct as *as = p->p_as;
2049 	caddr_t buf;
2050 	size_t size;
2051 	prpageheader32_t *php;
2052 	prasmap32_t *pmp;
2053 	struct seg *seg;
2054 	int error;
2055 
2056 again:
2057 	AS_LOCK_ENTER(as, RW_WRITER);
2058 
2059 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2060 		AS_LOCK_EXIT(as);
2061 		return (0);
2062 	}
2063 	size = prpdsize32(as);
2064 	if (uiop->uio_resid < size) {
2065 		AS_LOCK_EXIT(as);
2066 		return (E2BIG);
2067 	}
2068 
2069 	buf = kmem_zalloc(size, KM_SLEEP);
2070 	php = (prpageheader32_t *)buf;
2071 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2072 
2073 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2074 	php->pr_nmap = 0;
2075 	php->pr_npage = 0;
2076 	do {
2077 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2078 		caddr_t saddr, naddr;
2079 		void *tmp = NULL;
2080 
2081 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2082 			struct vnode *vp;
2083 			struct vattr vattr;
2084 			size_t len;
2085 			size_t npage;
2086 			uint_t prot;
2087 			uintptr_t next;
2088 
2089 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2090 			if ((len = (size_t)(naddr - saddr)) == 0)
2091 				continue;
2092 			npage = len / PAGESIZE;
2093 			next = (uintptr_t)(pmp + 1) + round8(npage);
2094 			/*
2095 			 * It's possible that the address space can change
2096 			 * subtlely even though we're holding as->a_lock
2097 			 * due to the nondeterminism of page_exists() in
2098 			 * the presence of asychronously flushed pages or
2099 			 * mapped files whose sizes are changing.
2100 			 * page_exists() may be called indirectly from
2101 			 * pr_getprot() by a SEGOP_INCORE() routine.
2102 			 * If this happens we need to make sure we don't
2103 			 * overrun the buffer whose size we computed based
2104 			 * on the initial iteration through the segments.
2105 			 * Once we've detected an overflow, we need to clean
2106 			 * up the temporary memory allocated in pr_getprot()
2107 			 * and retry. If there's a pending signal, we return
2108 			 * EINTR so that this thread can be dislodged if
2109 			 * a latent bug causes us to spin indefinitely.
2110 			 */
2111 			if (next > (uintptr_t)buf + size) {
2112 				pr_getprot_done(&tmp);
2113 				AS_LOCK_EXIT(as);
2114 
2115 				kmem_free(buf, size);
2116 
2117 				if (ISSIG(curthread, JUSTLOOKING))
2118 					return (EINTR);
2119 
2120 				goto again;
2121 			}
2122 
2123 			php->pr_nmap++;
2124 			php->pr_npage += npage;
2125 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2126 			pmp->pr_npage = (size32_t)npage;
2127 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2128 			pmp->pr_mflags = 0;
2129 			if (prot & PROT_READ)
2130 				pmp->pr_mflags |= MA_READ;
2131 			if (prot & PROT_WRITE)
2132 				pmp->pr_mflags |= MA_WRITE;
2133 			if (prot & PROT_EXEC)
2134 				pmp->pr_mflags |= MA_EXEC;
2135 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2136 				pmp->pr_mflags |= MA_SHARED;
2137 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2138 				pmp->pr_mflags |= MA_NORESERVE;
2139 			if (seg->s_ops == &segspt_shmops ||
2140 			    (seg->s_ops == &segvn_ops &&
2141 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2142 				pmp->pr_mflags |= MA_ANON;
2143 			if (seg->s_ops == &segspt_shmops)
2144 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2145 			pmp->pr_pagesize = PAGESIZE;
2146 			/*
2147 			 * Manufacture a filename for the "object" directory.
2148 			 */
2149 			vattr.va_mask = AT_FSID|AT_NODEID;
2150 			if (seg->s_ops == &segvn_ops &&
2151 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2152 			    vp != NULL && vp->v_type == VREG &&
2153 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2154 				if (vp == p->p_exec)
2155 					(void) strcpy(pmp->pr_mapname, "a.out");
2156 				else
2157 					pr_object_name(pmp->pr_mapname,
2158 					    vp, &vattr);
2159 			}
2160 
2161 			/*
2162 			 * Get the SysV shared memory id, if any.
2163 			 */
2164 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2165 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2166 			    SHMID_NONE) {
2167 				if (pmp->pr_shmid == SHMID_FREE)
2168 					pmp->pr_shmid = -1;
2169 
2170 				pmp->pr_mflags |= MA_SHM;
2171 			} else {
2172 				pmp->pr_shmid = -1;
2173 			}
2174 
2175 			hat_getstat(as, saddr, len, hatid,
2176 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2177 			pmp = (prasmap32_t *)next;
2178 		}
2179 		ASSERT(tmp == NULL);
2180 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2181 
2182 	AS_LOCK_EXIT(as);
2183 
2184 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2185 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2186 	kmem_free(buf, size);
2187 
2188 	return (error);
2189 }
2190 #endif	/* _SYSCALL32_IMPL */
2191 
2192 ushort_t
prgetpctcpu(uint64_t pct)2193 prgetpctcpu(uint64_t pct)
2194 {
2195 	/*
2196 	 * The value returned will be relevant in the zone of the examiner,
2197 	 * which may not be the same as the zone which performed the procfs
2198 	 * mount.
2199 	 */
2200 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2201 
2202 	/*
2203 	 * Prorate over online cpus so we don't exceed 100%
2204 	 */
2205 	if (nonline > 1)
2206 		pct /= nonline;
2207 	pct >>= 16;		/* convert to 16-bit scaled integer */
2208 	if (pct > 0x8000)	/* might happen, due to rounding */
2209 		pct = 0x8000;
2210 	return ((ushort_t)pct);
2211 }
2212 
2213 /*
2214  * Return information used by ps(1).
2215  */
2216 void
prgetpsinfo(proc_t * p,psinfo_t * psp)2217 prgetpsinfo(proc_t *p, psinfo_t *psp)
2218 {
2219 	kthread_t *t;
2220 	struct cred *cred;
2221 	hrtime_t hrutime, hrstime;
2222 
2223 	ASSERT(MUTEX_HELD(&p->p_lock));
2224 
2225 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2226 		bzero(psp, sizeof (*psp));
2227 	else {
2228 		thread_unlock(t);
2229 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2230 	}
2231 
2232 	/*
2233 	 * only export SSYS and SMSACCT; everything else is off-limits to
2234 	 * userland apps.
2235 	 */
2236 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2237 	psp->pr_nlwp = p->p_lwpcnt;
2238 	psp->pr_nzomb = p->p_zombcnt;
2239 	mutex_enter(&p->p_crlock);
2240 	cred = p->p_cred;
2241 	psp->pr_uid = crgetruid(cred);
2242 	psp->pr_euid = crgetuid(cred);
2243 	psp->pr_gid = crgetrgid(cred);
2244 	psp->pr_egid = crgetgid(cred);
2245 	mutex_exit(&p->p_crlock);
2246 	psp->pr_pid = p->p_pid;
2247 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2248 	    (p->p_flag & SZONETOP)) {
2249 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2250 		/*
2251 		 * Inside local zones, fake zsched's pid as parent pids for
2252 		 * processes which reference processes outside of the zone.
2253 		 */
2254 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2255 	} else {
2256 		psp->pr_ppid = p->p_ppid;
2257 	}
2258 	psp->pr_pgid = p->p_pgrp;
2259 	psp->pr_sid = p->p_sessp->s_sid;
2260 	psp->pr_taskid = p->p_task->tk_tkid;
2261 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2262 	psp->pr_poolid = p->p_pool->pool_id;
2263 	psp->pr_zoneid = p->p_zone->zone_id;
2264 	if ((psp->pr_contract = PRCTID(p)) == 0)
2265 		psp->pr_contract = -1;
2266 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2267 	switch (p->p_model) {
2268 	case DATAMODEL_ILP32:
2269 		psp->pr_dmodel = PR_MODEL_ILP32;
2270 		break;
2271 	case DATAMODEL_LP64:
2272 		psp->pr_dmodel = PR_MODEL_LP64;
2273 		break;
2274 	}
2275 	hrutime = mstate_aggr_state(p, LMS_USER);
2276 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2277 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2278 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2279 
2280 	if (t == NULL) {
2281 		int wcode = p->p_wcode;		/* must be atomic read */
2282 
2283 		if (wcode)
2284 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2285 		psp->pr_ttydev = PRNODEV;
2286 		psp->pr_lwp.pr_state = SZOMB;
2287 		psp->pr_lwp.pr_sname = 'Z';
2288 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2289 		psp->pr_lwp.pr_bindpset = PS_NONE;
2290 	} else {
2291 		user_t *up = PTOU(p);
2292 		struct as *as;
2293 		dev_t d;
2294 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2295 
2296 		d = cttydev(p);
2297 		/*
2298 		 * If the controlling terminal is the real
2299 		 * or workstation console device, map to what the
2300 		 * user thinks is the console device. Handle case when
2301 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2302 		 */
2303 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2304 			d = uconsdev;
2305 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2306 		psp->pr_start = up->u_start;
2307 		bcopy(up->u_comm, psp->pr_fname,
2308 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2309 		bcopy(up->u_psargs, psp->pr_psargs,
2310 		    MIN(PRARGSZ-1, PSARGSZ));
2311 		psp->pr_argc = up->u_argc;
2312 		psp->pr_argv = up->u_argv;
2313 		psp->pr_envp = up->u_envp;
2314 
2315 		/* get the chosen lwp's lwpsinfo */
2316 		prgetlwpsinfo(t, &psp->pr_lwp);
2317 
2318 		/* compute %cpu for the process */
2319 		if (p->p_lwpcnt == 1)
2320 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2321 		else {
2322 			uint64_t pct = 0;
2323 			hrtime_t cur_time = gethrtime_unscaled();
2324 
2325 			t = p->p_tlist;
2326 			do {
2327 				pct += cpu_update_pct(t, cur_time);
2328 			} while ((t = t->t_forw) != p->p_tlist);
2329 
2330 			psp->pr_pctcpu = prgetpctcpu(pct);
2331 		}
2332 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2333 			psp->pr_size = 0;
2334 			psp->pr_rssize = 0;
2335 		} else {
2336 			mutex_exit(&p->p_lock);
2337 			AS_LOCK_ENTER(as, RW_READER);
2338 			psp->pr_size = btopr(as->a_resvsize) *
2339 			    (PAGESIZE / 1024);
2340 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2341 			psp->pr_pctmem = rm_pctmemory(as);
2342 			AS_LOCK_EXIT(as);
2343 			mutex_enter(&p->p_lock);
2344 		}
2345 	}
2346 }
2347 
2348 #ifdef _SYSCALL32_IMPL
2349 void
prgetpsinfo32(proc_t * p,psinfo32_t * psp)2350 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2351 {
2352 	kthread_t *t;
2353 	struct cred *cred;
2354 	hrtime_t hrutime, hrstime;
2355 
2356 	ASSERT(MUTEX_HELD(&p->p_lock));
2357 
2358 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2359 		bzero(psp, sizeof (*psp));
2360 	else {
2361 		thread_unlock(t);
2362 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2363 	}
2364 
2365 	/*
2366 	 * only export SSYS and SMSACCT; everything else is off-limits to
2367 	 * userland apps.
2368 	 */
2369 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2370 	psp->pr_nlwp = p->p_lwpcnt;
2371 	psp->pr_nzomb = p->p_zombcnt;
2372 	mutex_enter(&p->p_crlock);
2373 	cred = p->p_cred;
2374 	psp->pr_uid = crgetruid(cred);
2375 	psp->pr_euid = crgetuid(cred);
2376 	psp->pr_gid = crgetrgid(cred);
2377 	psp->pr_egid = crgetgid(cred);
2378 	mutex_exit(&p->p_crlock);
2379 	psp->pr_pid = p->p_pid;
2380 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2381 	    (p->p_flag & SZONETOP)) {
2382 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2383 		/*
2384 		 * Inside local zones, fake zsched's pid as parent pids for
2385 		 * processes which reference processes outside of the zone.
2386 		 */
2387 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2388 	} else {
2389 		psp->pr_ppid = p->p_ppid;
2390 	}
2391 	psp->pr_pgid = p->p_pgrp;
2392 	psp->pr_sid = p->p_sessp->s_sid;
2393 	psp->pr_taskid = p->p_task->tk_tkid;
2394 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2395 	psp->pr_poolid = p->p_pool->pool_id;
2396 	psp->pr_zoneid = p->p_zone->zone_id;
2397 	if ((psp->pr_contract = PRCTID(p)) == 0)
2398 		psp->pr_contract = -1;
2399 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2400 	switch (p->p_model) {
2401 	case DATAMODEL_ILP32:
2402 		psp->pr_dmodel = PR_MODEL_ILP32;
2403 		break;
2404 	case DATAMODEL_LP64:
2405 		psp->pr_dmodel = PR_MODEL_LP64;
2406 		break;
2407 	}
2408 	hrutime = mstate_aggr_state(p, LMS_USER);
2409 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2410 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2411 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2412 
2413 	if (t == NULL) {
2414 		extern int wstat(int, int);	/* needs a header file */
2415 		int wcode = p->p_wcode;		/* must be atomic read */
2416 
2417 		if (wcode)
2418 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2419 		psp->pr_ttydev = PRNODEV32;
2420 		psp->pr_lwp.pr_state = SZOMB;
2421 		psp->pr_lwp.pr_sname = 'Z';
2422 	} else {
2423 		user_t *up = PTOU(p);
2424 		struct as *as;
2425 		dev_t d;
2426 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2427 
2428 		d = cttydev(p);
2429 		/*
2430 		 * If the controlling terminal is the real
2431 		 * or workstation console device, map to what the
2432 		 * user thinks is the console device. Handle case when
2433 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2434 		 */
2435 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2436 			d = uconsdev;
2437 		(void) cmpldev(&psp->pr_ttydev, d);
2438 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2439 		bcopy(up->u_comm, psp->pr_fname,
2440 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2441 		bcopy(up->u_psargs, psp->pr_psargs,
2442 		    MIN(PRARGSZ-1, PSARGSZ));
2443 		psp->pr_argc = up->u_argc;
2444 		psp->pr_argv = (caddr32_t)up->u_argv;
2445 		psp->pr_envp = (caddr32_t)up->u_envp;
2446 
2447 		/* get the chosen lwp's lwpsinfo */
2448 		prgetlwpsinfo32(t, &psp->pr_lwp);
2449 
2450 		/* compute %cpu for the process */
2451 		if (p->p_lwpcnt == 1)
2452 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2453 		else {
2454 			uint64_t pct = 0;
2455 			hrtime_t cur_time;
2456 
2457 			t = p->p_tlist;
2458 			cur_time = gethrtime_unscaled();
2459 			do {
2460 				pct += cpu_update_pct(t, cur_time);
2461 			} while ((t = t->t_forw) != p->p_tlist);
2462 
2463 			psp->pr_pctcpu = prgetpctcpu(pct);
2464 		}
2465 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2466 			psp->pr_size = 0;
2467 			psp->pr_rssize = 0;
2468 		} else {
2469 			mutex_exit(&p->p_lock);
2470 			AS_LOCK_ENTER(as, RW_READER);
2471 			psp->pr_size = (size32_t)
2472 			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2473 			psp->pr_rssize = (size32_t)
2474 			    (rm_asrss(as) * (PAGESIZE / 1024));
2475 			psp->pr_pctmem = rm_pctmemory(as);
2476 			AS_LOCK_EXIT(as);
2477 			mutex_enter(&p->p_lock);
2478 		}
2479 	}
2480 
2481 	/*
2482 	 * If we are looking at an LP64 process, zero out
2483 	 * the fields that cannot be represented in ILP32.
2484 	 */
2485 	if (p->p_model != DATAMODEL_ILP32) {
2486 		psp->pr_size = 0;
2487 		psp->pr_rssize = 0;
2488 		psp->pr_argv = 0;
2489 		psp->pr_envp = 0;
2490 	}
2491 }
2492 
2493 #endif	/* _SYSCALL32_IMPL */
2494 
2495 void
prgetlwpsinfo(kthread_t * t,lwpsinfo_t * psp)2496 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2497 {
2498 	klwp_t *lwp = ttolwp(t);
2499 	sobj_ops_t *sobj;
2500 	char c, state;
2501 	uint64_t pct;
2502 	int retval, niceval;
2503 	hrtime_t hrutime, hrstime;
2504 
2505 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2506 
2507 	bzero(psp, sizeof (*psp));
2508 
2509 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2510 	psp->pr_lwpid = t->t_tid;
2511 	psp->pr_addr = (uintptr_t)t;
2512 	psp->pr_wchan = (uintptr_t)t->t_wchan;
2513 
2514 	/* map the thread state enum into a process state enum */
2515 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2516 	switch (state) {
2517 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2518 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2519 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2520 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2521 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2522 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2523 	default:		state = 0;		c = '?';	break;
2524 	}
2525 	psp->pr_state = state;
2526 	psp->pr_sname = c;
2527 	if ((sobj = t->t_sobj_ops) != NULL)
2528 		psp->pr_stype = SOBJ_TYPE(sobj);
2529 	retval = CL_DONICE(t, NULL, 0, &niceval);
2530 	if (retval == 0) {
2531 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2532 		psp->pr_nice = niceval + NZERO;
2533 	}
2534 	psp->pr_syscall = t->t_sysnum;
2535 	psp->pr_pri = t->t_pri;
2536 	psp->pr_start.tv_sec = t->t_start;
2537 	psp->pr_start.tv_nsec = 0L;
2538 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2539 	scalehrtime(&hrutime);
2540 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2541 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2542 	scalehrtime(&hrstime);
2543 	hrt2ts(hrutime + hrstime, &psp->pr_time);
2544 	/* compute %cpu for the lwp */
2545 	pct = cpu_update_pct(t, gethrtime_unscaled());
2546 	psp->pr_pctcpu = prgetpctcpu(pct);
2547 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2548 	if (psp->pr_cpu > 99)
2549 		psp->pr_cpu = 99;
2550 
2551 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2552 	    sizeof (psp->pr_clname) - 1);
2553 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2554 	psp->pr_onpro = t->t_cpu->cpu_id;
2555 	psp->pr_bindpro = t->t_bind_cpu;
2556 	psp->pr_bindpset = t->t_bind_pset;
2557 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2558 }
2559 
2560 #ifdef _SYSCALL32_IMPL
2561 void
prgetlwpsinfo32(kthread_t * t,lwpsinfo32_t * psp)2562 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2563 {
2564 	proc_t *p = ttoproc(t);
2565 	klwp_t *lwp = ttolwp(t);
2566 	sobj_ops_t *sobj;
2567 	char c, state;
2568 	uint64_t pct;
2569 	int retval, niceval;
2570 	hrtime_t hrutime, hrstime;
2571 
2572 	ASSERT(MUTEX_HELD(&p->p_lock));
2573 
2574 	bzero(psp, sizeof (*psp));
2575 
2576 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2577 	psp->pr_lwpid = t->t_tid;
2578 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2579 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
2580 
2581 	/* map the thread state enum into a process state enum */
2582 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2583 	switch (state) {
2584 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2585 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2586 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2587 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2588 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2589 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2590 	default:		state = 0;		c = '?';	break;
2591 	}
2592 	psp->pr_state = state;
2593 	psp->pr_sname = c;
2594 	if ((sobj = t->t_sobj_ops) != NULL)
2595 		psp->pr_stype = SOBJ_TYPE(sobj);
2596 	retval = CL_DONICE(t, NULL, 0, &niceval);
2597 	if (retval == 0) {
2598 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2599 		psp->pr_nice = niceval + NZERO;
2600 	} else {
2601 		psp->pr_oldpri = 0;
2602 		psp->pr_nice = 0;
2603 	}
2604 	psp->pr_syscall = t->t_sysnum;
2605 	psp->pr_pri = t->t_pri;
2606 	psp->pr_start.tv_sec = (time32_t)t->t_start;
2607 	psp->pr_start.tv_nsec = 0L;
2608 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2609 	scalehrtime(&hrutime);
2610 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2611 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2612 	scalehrtime(&hrstime);
2613 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2614 	/* compute %cpu for the lwp */
2615 	pct = cpu_update_pct(t, gethrtime_unscaled());
2616 	psp->pr_pctcpu = prgetpctcpu(pct);
2617 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2618 	if (psp->pr_cpu > 99)
2619 		psp->pr_cpu = 99;
2620 
2621 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2622 	    sizeof (psp->pr_clname) - 1);
2623 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2624 	psp->pr_onpro = t->t_cpu->cpu_id;
2625 	psp->pr_bindpro = t->t_bind_cpu;
2626 	psp->pr_bindpset = t->t_bind_pset;
2627 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2628 }
2629 #endif	/* _SYSCALL32_IMPL */
2630 
2631 #ifdef _SYSCALL32_IMPL
2632 
2633 #define	PR_COPY_FIELD(s, d, field)	 d->field = s->field
2634 
2635 #define	PR_COPY_FIELD_ILP32(s, d, field)				\
2636 	if (s->pr_dmodel == PR_MODEL_ILP32) {			\
2637 		d->field = s->field;				\
2638 	}
2639 
2640 #define	PR_COPY_TIMESPEC(s, d, field)				\
2641 	TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
2642 
2643 #define	PR_COPY_BUF(s, d, field)	 			\
2644 	bcopy(s->field, d->field, sizeof (d->field));
2645 
2646 #define	PR_IGNORE_FIELD(s, d, field)
2647 
2648 void
lwpsinfo_kto32(const struct lwpsinfo * src,struct lwpsinfo32 * dest)2649 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
2650 {
2651 	bzero(dest, sizeof (*dest));
2652 
2653 	PR_COPY_FIELD(src, dest, pr_flag);
2654 	PR_COPY_FIELD(src, dest, pr_lwpid);
2655 	PR_IGNORE_FIELD(src, dest, pr_addr);
2656 	PR_IGNORE_FIELD(src, dest, pr_wchan);
2657 	PR_COPY_FIELD(src, dest, pr_stype);
2658 	PR_COPY_FIELD(src, dest, pr_state);
2659 	PR_COPY_FIELD(src, dest, pr_sname);
2660 	PR_COPY_FIELD(src, dest, pr_nice);
2661 	PR_COPY_FIELD(src, dest, pr_syscall);
2662 	PR_COPY_FIELD(src, dest, pr_oldpri);
2663 	PR_COPY_FIELD(src, dest, pr_cpu);
2664 	PR_COPY_FIELD(src, dest, pr_pri);
2665 	PR_COPY_FIELD(src, dest, pr_pctcpu);
2666 	PR_COPY_TIMESPEC(src, dest, pr_start);
2667 	PR_COPY_BUF(src, dest, pr_clname);
2668 	PR_COPY_BUF(src, dest, pr_name);
2669 	PR_COPY_FIELD(src, dest, pr_onpro);
2670 	PR_COPY_FIELD(src, dest, pr_bindpro);
2671 	PR_COPY_FIELD(src, dest, pr_bindpset);
2672 	PR_COPY_FIELD(src, dest, pr_lgrp);
2673 }
2674 
2675 void
psinfo_kto32(const struct psinfo * src,struct psinfo32 * dest)2676 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
2677 {
2678 	bzero(dest, sizeof (*dest));
2679 
2680 	PR_COPY_FIELD(src, dest, pr_flag);
2681 	PR_COPY_FIELD(src, dest, pr_nlwp);
2682 	PR_COPY_FIELD(src, dest, pr_pid);
2683 	PR_COPY_FIELD(src, dest, pr_ppid);
2684 	PR_COPY_FIELD(src, dest, pr_pgid);
2685 	PR_COPY_FIELD(src, dest, pr_sid);
2686 	PR_COPY_FIELD(src, dest, pr_uid);
2687 	PR_COPY_FIELD(src, dest, pr_euid);
2688 	PR_COPY_FIELD(src, dest, pr_gid);
2689 	PR_COPY_FIELD(src, dest, pr_egid);
2690 	PR_IGNORE_FIELD(src, dest, pr_addr);
2691 	PR_COPY_FIELD_ILP32(src, dest, pr_size);
2692 	PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
2693 	PR_COPY_FIELD(src, dest, pr_ttydev);
2694 	PR_COPY_FIELD(src, dest, pr_pctcpu);
2695 	PR_COPY_FIELD(src, dest, pr_pctmem);
2696 	PR_COPY_TIMESPEC(src, dest, pr_start);
2697 	PR_COPY_TIMESPEC(src, dest, pr_time);
2698 	PR_COPY_TIMESPEC(src, dest, pr_ctime);
2699 	PR_COPY_BUF(src, dest, pr_fname);
2700 	PR_COPY_BUF(src, dest, pr_psargs);
2701 	PR_COPY_FIELD(src, dest, pr_wstat);
2702 	PR_COPY_FIELD(src, dest, pr_argc);
2703 	PR_COPY_FIELD_ILP32(src, dest, pr_argv);
2704 	PR_COPY_FIELD_ILP32(src, dest, pr_envp);
2705 	PR_COPY_FIELD(src, dest, pr_dmodel);
2706 	PR_COPY_FIELD(src, dest, pr_taskid);
2707 	PR_COPY_FIELD(src, dest, pr_projid);
2708 	PR_COPY_FIELD(src, dest, pr_nzomb);
2709 	PR_COPY_FIELD(src, dest, pr_poolid);
2710 	PR_COPY_FIELD(src, dest, pr_contract);
2711 	PR_COPY_FIELD(src, dest, pr_poolid);
2712 	PR_COPY_FIELD(src, dest, pr_poolid);
2713 
2714 	lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
2715 }
2716 
2717 #undef	PR_COPY_FIELD
2718 #undef	PR_COPY_FIELD_ILP32
2719 #undef	PR_COPY_TIMESPEC
2720 #undef	PR_COPY_BUF
2721 #undef	PR_IGNORE_FIELD
2722 
2723 #endif	/* _SYSCALL32_IMPL */
2724 
2725 /*
2726  * This used to get called when microstate accounting was disabled but
2727  * microstate information was requested.  Since Microstate accounting is on
2728  * regardless of the proc flags, this simply makes it appear to procfs that
2729  * microstate accounting is on.  This is relatively meaningless since you
2730  * can't turn it off, but this is here for the sake of appearances.
2731  */
2732 
2733 /*ARGSUSED*/
2734 void
estimate_msacct(kthread_t * t,hrtime_t curtime)2735 estimate_msacct(kthread_t *t, hrtime_t curtime)
2736 {
2737 	proc_t *p;
2738 
2739 	if (t == NULL)
2740 		return;
2741 
2742 	p = ttoproc(t);
2743 	ASSERT(MUTEX_HELD(&p->p_lock));
2744 
2745 	/*
2746 	 * A system process (p0) could be referenced if the thread is
2747 	 * in the process of exiting.  Don't turn on microstate accounting
2748 	 * in that case.
2749 	 */
2750 	if (p->p_flag & SSYS)
2751 		return;
2752 
2753 	/*
2754 	 * Loop through all the LWPs (kernel threads) in the process.
2755 	 */
2756 	t = p->p_tlist;
2757 	do {
2758 		t->t_proc_flag |= TP_MSACCT;
2759 	} while ((t = t->t_forw) != p->p_tlist);
2760 
2761 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
2762 }
2763 
2764 /*
2765  * It's not really possible to disable microstate accounting anymore.
2766  * However, this routine simply turns off the ms accounting flags in a process
2767  * This way procfs can still pretend to turn microstate accounting on and
2768  * off for a process, but it actually doesn't do anything.  This is
2769  * a neutered form of preemptive idiot-proofing.
2770  */
2771 void
disable_msacct(proc_t * p)2772 disable_msacct(proc_t *p)
2773 {
2774 	kthread_t *t;
2775 
2776 	ASSERT(MUTEX_HELD(&p->p_lock));
2777 
2778 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
2779 	/*
2780 	 * Loop through all the LWPs (kernel threads) in the process.
2781 	 */
2782 	if ((t = p->p_tlist) != NULL) {
2783 		do {
2784 			/* clear per-thread flag */
2785 			t->t_proc_flag &= ~TP_MSACCT;
2786 		} while ((t = t->t_forw) != p->p_tlist);
2787 	}
2788 }
2789 
2790 /*
2791  * Return resource usage information.
2792  */
2793 void
prgetusage(kthread_t * t,prhusage_t * pup)2794 prgetusage(kthread_t *t, prhusage_t *pup)
2795 {
2796 	klwp_t *lwp = ttolwp(t);
2797 	hrtime_t *mstimep;
2798 	struct mstate *ms = &lwp->lwp_mstate;
2799 	int state;
2800 	int i;
2801 	hrtime_t curtime;
2802 	hrtime_t waitrq;
2803 	hrtime_t tmp1;
2804 
2805 	curtime = gethrtime_unscaled();
2806 
2807 	pup->pr_lwpid	= t->t_tid;
2808 	pup->pr_count	= 1;
2809 	pup->pr_create	= ms->ms_start;
2810 	pup->pr_term    = ms->ms_term;
2811 	scalehrtime(&pup->pr_create);
2812 	scalehrtime(&pup->pr_term);
2813 	if (ms->ms_term == 0) {
2814 		pup->pr_rtime = curtime - ms->ms_start;
2815 		scalehrtime(&pup->pr_rtime);
2816 	} else {
2817 		pup->pr_rtime = ms->ms_term - ms->ms_start;
2818 		scalehrtime(&pup->pr_rtime);
2819 	}
2820 
2821 
2822 	pup->pr_utime    = ms->ms_acct[LMS_USER];
2823 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
2824 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
2825 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
2826 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
2827 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
2828 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
2829 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
2830 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
2831 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2832 
2833 	prscaleusage(pup);
2834 
2835 	/*
2836 	 * Adjust for time waiting in the dispatcher queue.
2837 	 */
2838 	waitrq = t->t_waitrq;	/* hopefully atomic */
2839 	if (waitrq != 0) {
2840 		if (waitrq > curtime) {
2841 			curtime = gethrtime_unscaled();
2842 		}
2843 		tmp1 = curtime - waitrq;
2844 		scalehrtime(&tmp1);
2845 		pup->pr_wtime += tmp1;
2846 		curtime = waitrq;
2847 	}
2848 
2849 	/*
2850 	 * Adjust for time spent in current microstate.
2851 	 */
2852 	if (ms->ms_state_start > curtime) {
2853 		curtime = gethrtime_unscaled();
2854 	}
2855 
2856 	i = 0;
2857 	do {
2858 		switch (state = t->t_mstate) {
2859 		case LMS_SLEEP:
2860 			/*
2861 			 * Update the timer for the current sleep state.
2862 			 */
2863 			switch (state = ms->ms_prev) {
2864 			case LMS_TFAULT:
2865 			case LMS_DFAULT:
2866 			case LMS_KFAULT:
2867 			case LMS_USER_LOCK:
2868 				break;
2869 			default:
2870 				state = LMS_SLEEP;
2871 				break;
2872 			}
2873 			break;
2874 		case LMS_TFAULT:
2875 		case LMS_DFAULT:
2876 		case LMS_KFAULT:
2877 		case LMS_USER_LOCK:
2878 			state = LMS_SYSTEM;
2879 			break;
2880 		}
2881 		switch (state) {
2882 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2883 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2884 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2885 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2886 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2887 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2888 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2889 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2890 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2891 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2892 		default:		panic("prgetusage: unknown microstate");
2893 		}
2894 		tmp1 = curtime - ms->ms_state_start;
2895 		if (tmp1 < 0) {
2896 			curtime = gethrtime_unscaled();
2897 			i++;
2898 			continue;
2899 		}
2900 		scalehrtime(&tmp1);
2901 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2902 
2903 	*mstimep += tmp1;
2904 
2905 	/* update pup timestamp */
2906 	pup->pr_tstamp = curtime;
2907 	scalehrtime(&pup->pr_tstamp);
2908 
2909 	/*
2910 	 * Resource usage counters.
2911 	 */
2912 	pup->pr_minf  = lwp->lwp_ru.minflt;
2913 	pup->pr_majf  = lwp->lwp_ru.majflt;
2914 	pup->pr_nswap = lwp->lwp_ru.nswap;
2915 	pup->pr_inblk = lwp->lwp_ru.inblock;
2916 	pup->pr_oublk = lwp->lwp_ru.oublock;
2917 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
2918 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
2919 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
2920 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
2921 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
2922 	pup->pr_sysc  = lwp->lwp_ru.sysc;
2923 	pup->pr_ioch  = lwp->lwp_ru.ioch;
2924 }
2925 
2926 /*
2927  * Convert ms_acct stats from unscaled high-res time to nanoseconds
2928  */
2929 void
prscaleusage(prhusage_t * usg)2930 prscaleusage(prhusage_t *usg)
2931 {
2932 	scalehrtime(&usg->pr_utime);
2933 	scalehrtime(&usg->pr_stime);
2934 	scalehrtime(&usg->pr_ttime);
2935 	scalehrtime(&usg->pr_tftime);
2936 	scalehrtime(&usg->pr_dftime);
2937 	scalehrtime(&usg->pr_kftime);
2938 	scalehrtime(&usg->pr_ltime);
2939 	scalehrtime(&usg->pr_slptime);
2940 	scalehrtime(&usg->pr_wtime);
2941 	scalehrtime(&usg->pr_stoptime);
2942 }
2943 
2944 
2945 /*
2946  * Sum resource usage information.
2947  */
2948 void
praddusage(kthread_t * t,prhusage_t * pup)2949 praddusage(kthread_t *t, prhusage_t *pup)
2950 {
2951 	klwp_t *lwp = ttolwp(t);
2952 	hrtime_t *mstimep;
2953 	struct mstate *ms = &lwp->lwp_mstate;
2954 	int state;
2955 	int i;
2956 	hrtime_t curtime;
2957 	hrtime_t waitrq;
2958 	hrtime_t tmp;
2959 	prhusage_t conv;
2960 
2961 	curtime = gethrtime_unscaled();
2962 
2963 	if (ms->ms_term == 0) {
2964 		tmp = curtime - ms->ms_start;
2965 		scalehrtime(&tmp);
2966 		pup->pr_rtime += tmp;
2967 	} else {
2968 		tmp = ms->ms_term - ms->ms_start;
2969 		scalehrtime(&tmp);
2970 		pup->pr_rtime += tmp;
2971 	}
2972 
2973 	conv.pr_utime = ms->ms_acct[LMS_USER];
2974 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2975 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2976 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2977 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2978 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2979 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2980 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2981 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2982 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2983 
2984 	prscaleusage(&conv);
2985 
2986 	pup->pr_utime	+= conv.pr_utime;
2987 	pup->pr_stime	+= conv.pr_stime;
2988 	pup->pr_ttime	+= conv.pr_ttime;
2989 	pup->pr_tftime	+= conv.pr_tftime;
2990 	pup->pr_dftime	+= conv.pr_dftime;
2991 	pup->pr_kftime	+= conv.pr_kftime;
2992 	pup->pr_ltime	+= conv.pr_ltime;
2993 	pup->pr_slptime	+= conv.pr_slptime;
2994 	pup->pr_wtime	+= conv.pr_wtime;
2995 	pup->pr_stoptime += conv.pr_stoptime;
2996 
2997 	/*
2998 	 * Adjust for time waiting in the dispatcher queue.
2999 	 */
3000 	waitrq = t->t_waitrq;	/* hopefully atomic */
3001 	if (waitrq != 0) {
3002 		if (waitrq > curtime) {
3003 			curtime = gethrtime_unscaled();
3004 		}
3005 		tmp = curtime - waitrq;
3006 		scalehrtime(&tmp);
3007 		pup->pr_wtime += tmp;
3008 		curtime = waitrq;
3009 	}
3010 
3011 	/*
3012 	 * Adjust for time spent in current microstate.
3013 	 */
3014 	if (ms->ms_state_start > curtime) {
3015 		curtime = gethrtime_unscaled();
3016 	}
3017 
3018 	i = 0;
3019 	do {
3020 		switch (state = t->t_mstate) {
3021 		case LMS_SLEEP:
3022 			/*
3023 			 * Update the timer for the current sleep state.
3024 			 */
3025 			switch (state = ms->ms_prev) {
3026 			case LMS_TFAULT:
3027 			case LMS_DFAULT:
3028 			case LMS_KFAULT:
3029 			case LMS_USER_LOCK:
3030 				break;
3031 			default:
3032 				state = LMS_SLEEP;
3033 				break;
3034 			}
3035 			break;
3036 		case LMS_TFAULT:
3037 		case LMS_DFAULT:
3038 		case LMS_KFAULT:
3039 		case LMS_USER_LOCK:
3040 			state = LMS_SYSTEM;
3041 			break;
3042 		}
3043 		switch (state) {
3044 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3045 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3046 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3047 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3048 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3049 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3050 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3051 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3052 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3053 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3054 		default:		panic("praddusage: unknown microstate");
3055 		}
3056 		tmp = curtime - ms->ms_state_start;
3057 		if (tmp < 0) {
3058 			curtime = gethrtime_unscaled();
3059 			i++;
3060 			continue;
3061 		}
3062 		scalehrtime(&tmp);
3063 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
3064 
3065 	*mstimep += tmp;
3066 
3067 	/* update pup timestamp */
3068 	pup->pr_tstamp = curtime;
3069 	scalehrtime(&pup->pr_tstamp);
3070 
3071 	/*
3072 	 * Resource usage counters.
3073 	 */
3074 	pup->pr_minf  += lwp->lwp_ru.minflt;
3075 	pup->pr_majf  += lwp->lwp_ru.majflt;
3076 	pup->pr_nswap += lwp->lwp_ru.nswap;
3077 	pup->pr_inblk += lwp->lwp_ru.inblock;
3078 	pup->pr_oublk += lwp->lwp_ru.oublock;
3079 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
3080 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
3081 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
3082 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
3083 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
3084 	pup->pr_sysc  += lwp->lwp_ru.sysc;
3085 	pup->pr_ioch  += lwp->lwp_ru.ioch;
3086 }
3087 
3088 /*
3089  * Convert a prhusage_t to a prusage_t.
3090  * This means convert each hrtime_t to a timestruc_t
3091  * and copy the count fields uint64_t => ulong_t.
3092  */
3093 void
prcvtusage(prhusage_t * pup,prusage_t * upup)3094 prcvtusage(prhusage_t *pup, prusage_t *upup)
3095 {
3096 	uint64_t *ullp;
3097 	ulong_t *ulp;
3098 	int i;
3099 
3100 	upup->pr_lwpid = pup->pr_lwpid;
3101 	upup->pr_count = pup->pr_count;
3102 
3103 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3104 	hrt2ts(pup->pr_create,	&upup->pr_create);
3105 	hrt2ts(pup->pr_term,	&upup->pr_term);
3106 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3107 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3108 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3109 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3110 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3111 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3112 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3113 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3114 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3115 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3116 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3117 	bzero(upup->filltime, sizeof (upup->filltime));
3118 
3119 	ullp = &pup->pr_minf;
3120 	ulp = &upup->pr_minf;
3121 	for (i = 0; i < 22; i++)
3122 		*ulp++ = (ulong_t)*ullp++;
3123 }
3124 
3125 #ifdef _SYSCALL32_IMPL
3126 void
prcvtusage32(prhusage_t * pup,prusage32_t * upup)3127 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3128 {
3129 	uint64_t *ullp;
3130 	uint32_t *ulp;
3131 	int i;
3132 
3133 	upup->pr_lwpid = pup->pr_lwpid;
3134 	upup->pr_count = pup->pr_count;
3135 
3136 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3137 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3138 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3139 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3140 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3141 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3142 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3143 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3144 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3145 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3146 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3147 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3148 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3149 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3150 	bzero(upup->filltime, sizeof (upup->filltime));
3151 
3152 	ullp = &pup->pr_minf;
3153 	ulp = &upup->pr_minf;
3154 	for (i = 0; i < 22; i++)
3155 		*ulp++ = (uint32_t)*ullp++;
3156 }
3157 #endif	/* _SYSCALL32_IMPL */
3158 
3159 /*
3160  * Determine whether a set is empty.
3161  */
3162 int
setisempty(uint32_t * sp,uint_t n)3163 setisempty(uint32_t *sp, uint_t n)
3164 {
3165 	while (n--)
3166 		if (*sp++)
3167 			return (0);
3168 	return (1);
3169 }
3170 
3171 /*
3172  * Utility routine for establishing a watched area in the process.
3173  * Keep the list of watched areas sorted by virtual address.
3174  */
3175 int
set_watched_area(proc_t * p,struct watched_area * pwa)3176 set_watched_area(proc_t *p, struct watched_area *pwa)
3177 {
3178 	caddr_t vaddr = pwa->wa_vaddr;
3179 	caddr_t eaddr = pwa->wa_eaddr;
3180 	ulong_t flags = pwa->wa_flags;
3181 	struct watched_area *target;
3182 	avl_index_t where;
3183 	int error = 0;
3184 
3185 	/* we must not be holding p->p_lock, but the process must be locked */
3186 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3187 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3188 
3189 	/*
3190 	 * If this is our first watchpoint, enable watchpoints for the process.
3191 	 */
3192 	if (!pr_watch_active(p)) {
3193 		kthread_t *t;
3194 
3195 		mutex_enter(&p->p_lock);
3196 		if ((t = p->p_tlist) != NULL) {
3197 			do {
3198 				watch_enable(t);
3199 			} while ((t = t->t_forw) != p->p_tlist);
3200 		}
3201 		mutex_exit(&p->p_lock);
3202 	}
3203 
3204 	target = pr_find_watched_area(p, pwa, &where);
3205 	if (target != NULL) {
3206 		/*
3207 		 * We discovered an existing, overlapping watched area.
3208 		 * Allow it only if it is an exact match.
3209 		 */
3210 		if (target->wa_vaddr != vaddr ||
3211 		    target->wa_eaddr != eaddr)
3212 			error = EINVAL;
3213 		else if (target->wa_flags != flags) {
3214 			error = set_watched_page(p, vaddr, eaddr,
3215 			    flags, target->wa_flags);
3216 			target->wa_flags = flags;
3217 		}
3218 		kmem_free(pwa, sizeof (struct watched_area));
3219 	} else {
3220 		avl_insert(&p->p_warea, pwa, where);
3221 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3222 	}
3223 
3224 	return (error);
3225 }
3226 
3227 /*
3228  * Utility routine for clearing a watched area in the process.
3229  * Must be an exact match of the virtual address.
3230  * size and flags don't matter.
3231  */
3232 int
clear_watched_area(proc_t * p,struct watched_area * pwa)3233 clear_watched_area(proc_t *p, struct watched_area *pwa)
3234 {
3235 	struct watched_area *found;
3236 
3237 	/* we must not be holding p->p_lock, but the process must be locked */
3238 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3239 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3240 
3241 
3242 	if (!pr_watch_active(p)) {
3243 		kmem_free(pwa, sizeof (struct watched_area));
3244 		return (0);
3245 	}
3246 
3247 	/*
3248 	 * Look for a matching address in the watched areas.  If a match is
3249 	 * found, clear the old watched area and adjust the watched page(s).  It
3250 	 * is not an error if there is no match.
3251 	 */
3252 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3253 	    found->wa_vaddr == pwa->wa_vaddr) {
3254 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3255 		    found->wa_flags);
3256 		avl_remove(&p->p_warea, found);
3257 		kmem_free(found, sizeof (struct watched_area));
3258 	}
3259 
3260 	kmem_free(pwa, sizeof (struct watched_area));
3261 
3262 	/*
3263 	 * If we removed the last watched area from the process, disable
3264 	 * watchpoints.
3265 	 */
3266 	if (!pr_watch_active(p)) {
3267 		kthread_t *t;
3268 
3269 		mutex_enter(&p->p_lock);
3270 		if ((t = p->p_tlist) != NULL) {
3271 			do {
3272 				watch_disable(t);
3273 			} while ((t = t->t_forw) != p->p_tlist);
3274 		}
3275 		mutex_exit(&p->p_lock);
3276 	}
3277 
3278 	return (0);
3279 }
3280 
3281 /*
3282  * Frees all the watched_area structures
3283  */
3284 void
pr_free_watchpoints(proc_t * p)3285 pr_free_watchpoints(proc_t *p)
3286 {
3287 	struct watched_area *delp;
3288 	void *cookie;
3289 
3290 	cookie = NULL;
3291 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3292 		kmem_free(delp, sizeof (struct watched_area));
3293 
3294 	avl_destroy(&p->p_warea);
3295 }
3296 
3297 /*
3298  * This one is called by the traced process to unwatch all the
3299  * pages while deallocating the list of watched_page structs.
3300  */
3301 void
pr_free_watched_pages(proc_t * p)3302 pr_free_watched_pages(proc_t *p)
3303 {
3304 	struct as *as = p->p_as;
3305 	struct watched_page *pwp;
3306 	uint_t prot;
3307 	int    retrycnt, err;
3308 	void *cookie;
3309 
3310 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3311 		return;
3312 
3313 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3314 	AS_LOCK_ENTER(as, RW_WRITER);
3315 
3316 	pwp = avl_first(&as->a_wpage);
3317 
3318 	cookie = NULL;
3319 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3320 		retrycnt = 0;
3321 		if ((prot = pwp->wp_oprot) != 0) {
3322 			caddr_t addr = pwp->wp_vaddr;
3323 			struct seg *seg;
3324 		retry:
3325 
3326 			if ((pwp->wp_prot != prot ||
3327 			    (pwp->wp_flags & WP_NOWATCH)) &&
3328 			    (seg = as_segat(as, addr)) != NULL) {
3329 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3330 				if (err == IE_RETRY) {
3331 					ASSERT(retrycnt == 0);
3332 					retrycnt++;
3333 					goto retry;
3334 				}
3335 			}
3336 		}
3337 		kmem_free(pwp, sizeof (struct watched_page));
3338 	}
3339 
3340 	avl_destroy(&as->a_wpage);
3341 	p->p_wprot = NULL;
3342 
3343 	AS_LOCK_EXIT(as);
3344 }
3345 
3346 /*
3347  * Insert a watched area into the list of watched pages.
3348  * If oflags is zero then we are adding a new watched area.
3349  * Otherwise we are changing the flags of an existing watched area.
3350  */
3351 static int
set_watched_page(proc_t * p,caddr_t vaddr,caddr_t eaddr,ulong_t flags,ulong_t oflags)3352 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3353 	ulong_t flags, ulong_t oflags)
3354 {
3355 	struct as *as = p->p_as;
3356 	avl_tree_t *pwp_tree;
3357 	struct watched_page *pwp, *newpwp;
3358 	struct watched_page tpw;
3359 	avl_index_t where;
3360 	struct seg *seg;
3361 	uint_t prot;
3362 	caddr_t addr;
3363 
3364 	/*
3365 	 * We need to pre-allocate a list of structures before we grab the
3366 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3367 	 * held.
3368 	 */
3369 	newpwp = NULL;
3370 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3371 	    addr < eaddr; addr += PAGESIZE) {
3372 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3373 		pwp->wp_list = newpwp;
3374 		newpwp = pwp;
3375 	}
3376 
3377 	AS_LOCK_ENTER(as, RW_WRITER);
3378 
3379 	/*
3380 	 * Search for an existing watched page to contain the watched area.
3381 	 * If none is found, grab a new one from the available list
3382 	 * and insert it in the active list, keeping the list sorted
3383 	 * by user-level virtual address.
3384 	 */
3385 	if (p->p_flag & SVFWAIT)
3386 		pwp_tree = &p->p_wpage;
3387 	else
3388 		pwp_tree = &as->a_wpage;
3389 
3390 again:
3391 	if (avl_numnodes(pwp_tree) > prnwatch) {
3392 		AS_LOCK_EXIT(as);
3393 		while (newpwp != NULL) {
3394 			pwp = newpwp->wp_list;
3395 			kmem_free(newpwp, sizeof (struct watched_page));
3396 			newpwp = pwp;
3397 		}
3398 		return (E2BIG);
3399 	}
3400 
3401 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3402 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3403 		pwp = newpwp;
3404 		newpwp = newpwp->wp_list;
3405 		pwp->wp_list = NULL;
3406 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3407 		    (uintptr_t)PAGEMASK);
3408 		avl_insert(pwp_tree, pwp, where);
3409 	}
3410 
3411 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3412 
3413 	if (oflags & WA_READ)
3414 		pwp->wp_read--;
3415 	if (oflags & WA_WRITE)
3416 		pwp->wp_write--;
3417 	if (oflags & WA_EXEC)
3418 		pwp->wp_exec--;
3419 
3420 	ASSERT(pwp->wp_read >= 0);
3421 	ASSERT(pwp->wp_write >= 0);
3422 	ASSERT(pwp->wp_exec >= 0);
3423 
3424 	if (flags & WA_READ)
3425 		pwp->wp_read++;
3426 	if (flags & WA_WRITE)
3427 		pwp->wp_write++;
3428 	if (flags & WA_EXEC)
3429 		pwp->wp_exec++;
3430 
3431 	if (!(p->p_flag & SVFWAIT)) {
3432 		vaddr = pwp->wp_vaddr;
3433 		if (pwp->wp_oprot == 0 &&
3434 		    (seg = as_segat(as, vaddr)) != NULL) {
3435 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
3436 			pwp->wp_oprot = (uchar_t)prot;
3437 			pwp->wp_prot = (uchar_t)prot;
3438 		}
3439 		if (pwp->wp_oprot != 0) {
3440 			prot = pwp->wp_oprot;
3441 			if (pwp->wp_read)
3442 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3443 			if (pwp->wp_write)
3444 				prot &= ~PROT_WRITE;
3445 			if (pwp->wp_exec)
3446 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3447 			if (!(pwp->wp_flags & WP_NOWATCH) &&
3448 			    pwp->wp_prot != prot &&
3449 			    (pwp->wp_flags & WP_SETPROT) == 0) {
3450 				pwp->wp_flags |= WP_SETPROT;
3451 				pwp->wp_list = p->p_wprot;
3452 				p->p_wprot = pwp;
3453 			}
3454 			pwp->wp_prot = (uchar_t)prot;
3455 		}
3456 	}
3457 
3458 	/*
3459 	 * If the watched area extends into the next page then do
3460 	 * it over again with the virtual address of the next page.
3461 	 */
3462 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3463 		goto again;
3464 
3465 	AS_LOCK_EXIT(as);
3466 
3467 	/*
3468 	 * Free any pages we may have over-allocated
3469 	 */
3470 	while (newpwp != NULL) {
3471 		pwp = newpwp->wp_list;
3472 		kmem_free(newpwp, sizeof (struct watched_page));
3473 		newpwp = pwp;
3474 	}
3475 
3476 	return (0);
3477 }
3478 
3479 /*
3480  * Remove a watched area from the list of watched pages.
3481  * A watched area may extend over more than one page.
3482  */
3483 static void
clear_watched_page(proc_t * p,caddr_t vaddr,caddr_t eaddr,ulong_t flags)3484 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3485 {
3486 	struct as *as = p->p_as;
3487 	struct watched_page *pwp;
3488 	struct watched_page tpw;
3489 	avl_tree_t *tree;
3490 	avl_index_t where;
3491 
3492 	AS_LOCK_ENTER(as, RW_WRITER);
3493 
3494 	if (p->p_flag & SVFWAIT)
3495 		tree = &p->p_wpage;
3496 	else
3497 		tree = &as->a_wpage;
3498 
3499 	tpw.wp_vaddr = vaddr =
3500 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3501 	pwp = avl_find(tree, &tpw, &where);
3502 	if (pwp == NULL)
3503 		pwp = avl_nearest(tree, where, AVL_AFTER);
3504 
3505 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3506 		ASSERT(vaddr <=  pwp->wp_vaddr);
3507 
3508 		if (flags & WA_READ)
3509 			pwp->wp_read--;
3510 		if (flags & WA_WRITE)
3511 			pwp->wp_write--;
3512 		if (flags & WA_EXEC)
3513 			pwp->wp_exec--;
3514 
3515 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3516 			/*
3517 			 * Reset the hat layer's protections on this page.
3518 			 */
3519 			if (pwp->wp_oprot != 0) {
3520 				uint_t prot = pwp->wp_oprot;
3521 
3522 				if (pwp->wp_read)
3523 					prot &=
3524 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3525 				if (pwp->wp_write)
3526 					prot &= ~PROT_WRITE;
3527 				if (pwp->wp_exec)
3528 					prot &=
3529 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3530 				if (!(pwp->wp_flags & WP_NOWATCH) &&
3531 				    pwp->wp_prot != prot &&
3532 				    (pwp->wp_flags & WP_SETPROT) == 0) {
3533 					pwp->wp_flags |= WP_SETPROT;
3534 					pwp->wp_list = p->p_wprot;
3535 					p->p_wprot = pwp;
3536 				}
3537 				pwp->wp_prot = (uchar_t)prot;
3538 			}
3539 		} else {
3540 			/*
3541 			 * No watched areas remain in this page.
3542 			 * Reset everything to normal.
3543 			 */
3544 			if (pwp->wp_oprot != 0) {
3545 				pwp->wp_prot = pwp->wp_oprot;
3546 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
3547 					pwp->wp_flags |= WP_SETPROT;
3548 					pwp->wp_list = p->p_wprot;
3549 					p->p_wprot = pwp;
3550 				}
3551 			}
3552 		}
3553 
3554 		pwp = AVL_NEXT(tree, pwp);
3555 	}
3556 
3557 	AS_LOCK_EXIT(as);
3558 }
3559 
3560 /*
3561  * Return the original protections for the specified page.
3562  */
3563 static void
getwatchprot(struct as * as,caddr_t addr,uint_t * prot)3564 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3565 {
3566 	struct watched_page *pwp;
3567 	struct watched_page tpw;
3568 
3569 	ASSERT(AS_LOCK_HELD(as));
3570 
3571 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3572 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3573 		*prot = pwp->wp_oprot;
3574 }
3575 
3576 static prpagev_t *
pr_pagev_create(struct seg * seg,int check_noreserve)3577 pr_pagev_create(struct seg *seg, int check_noreserve)
3578 {
3579 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3580 	size_t total_pages = seg_pages(seg);
3581 
3582 	/*
3583 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
3584 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
3585 	 * to about a megabyte of kernel heap by default.
3586 	 */
3587 	pagev->pg_npages = MIN(total_pages, pagev_lim);
3588 	pagev->pg_pnbase = 0;
3589 
3590 	pagev->pg_protv =
3591 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3592 
3593 	if (check_noreserve)
3594 		pagev->pg_incore =
3595 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3596 	else
3597 		pagev->pg_incore = NULL;
3598 
3599 	return (pagev);
3600 }
3601 
3602 static void
pr_pagev_destroy(prpagev_t * pagev)3603 pr_pagev_destroy(prpagev_t *pagev)
3604 {
3605 	if (pagev->pg_incore != NULL)
3606 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3607 
3608 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3609 	kmem_free(pagev, sizeof (prpagev_t));
3610 }
3611 
3612 static caddr_t
pr_pagev_fill(prpagev_t * pagev,struct seg * seg,caddr_t addr,caddr_t eaddr)3613 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3614 {
3615 	ulong_t lastpg = seg_page(seg, eaddr - 1);
3616 	ulong_t pn, pnlim;
3617 	caddr_t saddr;
3618 	size_t len;
3619 
3620 	ASSERT(addr >= seg->s_base && addr <= eaddr);
3621 
3622 	if (addr == eaddr)
3623 		return (eaddr);
3624 
3625 refill:
3626 	ASSERT(addr < eaddr);
3627 	pagev->pg_pnbase = seg_page(seg, addr);
3628 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
3629 	saddr = addr;
3630 
3631 	if (lastpg < pnlim)
3632 		len = (size_t)(eaddr - addr);
3633 	else
3634 		len = pagev->pg_npages * PAGESIZE;
3635 
3636 	if (pagev->pg_incore != NULL) {
3637 		/*
3638 		 * INCORE cleverly has different semantics than GETPROT:
3639 		 * it returns info on pages up to but NOT including addr + len.
3640 		 */
3641 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3642 		pn = pagev->pg_pnbase;
3643 
3644 		do {
3645 			/*
3646 			 * Guilty knowledge here:  We know that segvn_incore
3647 			 * returns more than just the low-order bit that
3648 			 * indicates the page is actually in memory.  If any
3649 			 * bits are set, then the page has backing store.
3650 			 */
3651 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3652 				goto out;
3653 
3654 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3655 
3656 		/*
3657 		 * If we examined all the pages in the vector but we're not
3658 		 * at the end of the segment, take another lap.
3659 		 */
3660 		if (addr < eaddr)
3661 			goto refill;
3662 	}
3663 
3664 	/*
3665 	 * Need to take len - 1 because addr + len is the address of the
3666 	 * first byte of the page just past the end of what we want.
3667 	 */
3668 out:
3669 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3670 	return (addr);
3671 }
3672 
3673 static caddr_t
pr_pagev_nextprot(prpagev_t * pagev,struct seg * seg,caddr_t * saddrp,caddr_t eaddr,uint_t * protp)3674 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3675     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3676 {
3677 	/*
3678 	 * Our starting address is either the specified address, or the base
3679 	 * address from the start of the pagev.  If the latter is greater,
3680 	 * this means a previous call to pr_pagev_fill has already scanned
3681 	 * further than the end of the previous mapping.
3682 	 */
3683 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3684 	caddr_t addr = MAX(*saddrp, base);
3685 	ulong_t pn = seg_page(seg, addr);
3686 	uint_t prot, nprot;
3687 
3688 	/*
3689 	 * If we're dealing with noreserve pages, then advance addr to
3690 	 * the address of the next page which has backing store.
3691 	 */
3692 	if (pagev->pg_incore != NULL) {
3693 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3694 			if ((addr += PAGESIZE) == eaddr) {
3695 				*saddrp = addr;
3696 				prot = 0;
3697 				goto out;
3698 			}
3699 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3700 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3701 				if (addr == eaddr) {
3702 					*saddrp = addr;
3703 					prot = 0;
3704 					goto out;
3705 				}
3706 				pn = seg_page(seg, addr);
3707 			}
3708 		}
3709 	}
3710 
3711 	/*
3712 	 * Get the protections on the page corresponding to addr.
3713 	 */
3714 	pn = seg_page(seg, addr);
3715 	ASSERT(pn >= pagev->pg_pnbase);
3716 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3717 
3718 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3719 	getwatchprot(seg->s_as, addr, &prot);
3720 	*saddrp = addr;
3721 
3722 	/*
3723 	 * Now loop until we find a backed page with different protections
3724 	 * or we reach the end of this segment.
3725 	 */
3726 	while ((addr += PAGESIZE) < eaddr) {
3727 		/*
3728 		 * If pn has advanced to the page number following what we
3729 		 * have information on, refill the page vector and reset
3730 		 * addr and pn.  If pr_pagev_fill does not return the
3731 		 * address of the next page, we have a discontiguity and
3732 		 * thus have reached the end of the current mapping.
3733 		 */
3734 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3735 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3736 			if (naddr != addr)
3737 				goto out;
3738 			pn = seg_page(seg, addr);
3739 		}
3740 
3741 		/*
3742 		 * The previous page's protections are in prot, and it has
3743 		 * backing.  If this page is MAP_NORESERVE and has no backing,
3744 		 * then end this mapping and return the previous protections.
3745 		 */
3746 		if (pagev->pg_incore != NULL &&
3747 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3748 			break;
3749 
3750 		/*
3751 		 * Otherwise end the mapping if this page's protections (nprot)
3752 		 * are different than those in the previous page (prot).
3753 		 */
3754 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3755 		getwatchprot(seg->s_as, addr, &nprot);
3756 
3757 		if (nprot != prot)
3758 			break;
3759 	}
3760 
3761 out:
3762 	*protp = prot;
3763 	return (addr);
3764 }
3765 
3766 size_t
pr_getsegsize(struct seg * seg,int reserved)3767 pr_getsegsize(struct seg *seg, int reserved)
3768 {
3769 	size_t size = seg->s_size;
3770 
3771 	/*
3772 	 * If we're interested in the reserved space, return the size of the
3773 	 * segment itself.  Everything else in this function is a special case
3774 	 * to determine the actual underlying size of various segment types.
3775 	 */
3776 	if (reserved)
3777 		return (size);
3778 
3779 	/*
3780 	 * If this is a segvn mapping of a regular file, return the smaller
3781 	 * of the segment size and the remaining size of the file beyond
3782 	 * the file offset corresponding to seg->s_base.
3783 	 */
3784 	if (seg->s_ops == &segvn_ops) {
3785 		vattr_t vattr;
3786 		vnode_t *vp;
3787 
3788 		vattr.va_mask = AT_SIZE;
3789 
3790 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3791 		    vp != NULL && vp->v_type == VREG &&
3792 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3793 
3794 			u_offset_t fsize = vattr.va_size;
3795 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3796 
3797 			if (fsize < offset)
3798 				fsize = 0;
3799 			else
3800 				fsize -= offset;
3801 
3802 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3803 
3804 			if (fsize < (u_offset_t)size)
3805 				size = (size_t)fsize;
3806 		}
3807 
3808 		return (size);
3809 	}
3810 
3811 	/*
3812 	 * If this is an ISM shared segment, don't include pages that are
3813 	 * beyond the real size of the spt segment that backs it.
3814 	 */
3815 	if (seg->s_ops == &segspt_shmops)
3816 		return (MIN(spt_realsize(seg), size));
3817 
3818 	/*
3819 	 * If this is segment is a mapping from /dev/null, then this is a
3820 	 * reservation of virtual address space and has no actual size.
3821 	 * Such segments are backed by segdev and have type set to neither
3822 	 * MAP_SHARED nor MAP_PRIVATE.
3823 	 */
3824 	if (seg->s_ops == &segdev_ops &&
3825 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
3826 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
3827 		return (0);
3828 
3829 	/*
3830 	 * If this segment doesn't match one of the special types we handle,
3831 	 * just return the size of the segment itself.
3832 	 */
3833 	return (size);
3834 }
3835 
3836 uint_t
pr_getprot(struct seg * seg,int reserved,void ** tmp,caddr_t * saddrp,caddr_t * naddrp,caddr_t eaddr)3837 pr_getprot(struct seg *seg, int reserved, void **tmp,
3838 	caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3839 {
3840 	struct as *as = seg->s_as;
3841 
3842 	caddr_t saddr = *saddrp;
3843 	caddr_t naddr;
3844 
3845 	int check_noreserve;
3846 	uint_t prot;
3847 
3848 	union {
3849 		struct segvn_data *svd;
3850 		struct segdev_data *sdp;
3851 		void *data;
3852 	} s;
3853 
3854 	s.data = seg->s_data;
3855 
3856 	ASSERT(AS_WRITE_HELD(as));
3857 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
3858 	ASSERT(eaddr <= seg->s_base + seg->s_size);
3859 
3860 	/*
3861 	 * Don't include MAP_NORESERVE pages in the address range
3862 	 * unless their mappings have actually materialized.
3863 	 * We cheat by knowing that segvn is the only segment
3864 	 * driver that supports MAP_NORESERVE.
3865 	 */
3866 	check_noreserve =
3867 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3868 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3869 	    (s.svd->flags & MAP_NORESERVE));
3870 
3871 	/*
3872 	 * Examine every page only as a last resort.  We use guilty knowledge
3873 	 * of segvn and segdev to avoid this: if there are no per-page
3874 	 * protections present in the segment and we don't care about
3875 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3876 	 */
3877 	if (!check_noreserve && saddr == seg->s_base &&
3878 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3879 		prot = s.svd->prot;
3880 		getwatchprot(as, saddr, &prot);
3881 		naddr = eaddr;
3882 
3883 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3884 	    s.sdp != NULL && s.sdp->pageprot == 0) {
3885 		prot = s.sdp->prot;
3886 		getwatchprot(as, saddr, &prot);
3887 		naddr = eaddr;
3888 
3889 	} else {
3890 		prpagev_t *pagev;
3891 
3892 		/*
3893 		 * If addr is sitting at the start of the segment, then
3894 		 * create a page vector to store protection and incore
3895 		 * information for pages in the segment, and fill it.
3896 		 * Otherwise, we expect *tmp to address the prpagev_t
3897 		 * allocated by a previous call to this function.
3898 		 */
3899 		if (saddr == seg->s_base) {
3900 			pagev = pr_pagev_create(seg, check_noreserve);
3901 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3902 
3903 			ASSERT(*tmp == NULL);
3904 			*tmp = pagev;
3905 
3906 			ASSERT(saddr <= eaddr);
3907 			*saddrp = saddr;
3908 
3909 			if (saddr == eaddr) {
3910 				naddr = saddr;
3911 				prot = 0;
3912 				goto out;
3913 			}
3914 
3915 		} else {
3916 			ASSERT(*tmp != NULL);
3917 			pagev = (prpagev_t *)*tmp;
3918 		}
3919 
3920 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3921 		ASSERT(naddr <= eaddr);
3922 	}
3923 
3924 out:
3925 	if (naddr == eaddr)
3926 		pr_getprot_done(tmp);
3927 	*naddrp = naddr;
3928 	return (prot);
3929 }
3930 
3931 void
pr_getprot_done(void ** tmp)3932 pr_getprot_done(void **tmp)
3933 {
3934 	if (*tmp != NULL) {
3935 		pr_pagev_destroy((prpagev_t *)*tmp);
3936 		*tmp = NULL;
3937 	}
3938 }
3939 
3940 /*
3941  * Return true iff the vnode is a /proc file from the object directory.
3942  */
3943 int
pr_isobject(vnode_t * vp)3944 pr_isobject(vnode_t *vp)
3945 {
3946 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3947 }
3948 
3949 /*
3950  * Return true iff the vnode is a /proc file opened by the process itself.
3951  */
3952 int
pr_isself(vnode_t * vp)3953 pr_isself(vnode_t *vp)
3954 {
3955 	/*
3956 	 * XXX: To retain binary compatibility with the old
3957 	 * ioctl()-based version of /proc, we exempt self-opens
3958 	 * of /proc/<pid> from being marked close-on-exec.
3959 	 */
3960 	return (vn_matchops(vp, prvnodeops) &&
3961 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
3962 	    VTOP(vp)->pr_type != PR_PIDDIR);
3963 }
3964 
3965 static ssize_t
pr_getpagesize(struct seg * seg,caddr_t saddr,caddr_t * naddrp,caddr_t eaddr)3966 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3967 {
3968 	ssize_t pagesize, hatsize;
3969 
3970 	ASSERT(AS_WRITE_HELD(seg->s_as));
3971 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3972 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3973 	ASSERT(saddr < eaddr);
3974 
3975 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3976 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3977 	ASSERT(pagesize != 0);
3978 
3979 	if (pagesize == -1)
3980 		pagesize = PAGESIZE;
3981 
3982 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3983 
3984 	while (saddr < eaddr) {
3985 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3986 			break;
3987 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
3988 		saddr += pagesize;
3989 	}
3990 
3991 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
3992 	return (hatsize);
3993 }
3994 
3995 /*
3996  * Return an array of structures with extended memory map information.
3997  * We allocate here; the caller must deallocate.
3998  */
3999 int
prgetxmap(proc_t * p,list_t * iolhead)4000 prgetxmap(proc_t *p, list_t *iolhead)
4001 {
4002 	struct as *as = p->p_as;
4003 	prxmap_t *mp;
4004 	struct seg *seg;
4005 	struct seg *brkseg, *stkseg;
4006 	struct vnode *vp;
4007 	struct vattr vattr;
4008 	uint_t prot;
4009 
4010 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4011 
4012 	/*
4013 	 * Request an initial buffer size that doesn't waste memory
4014 	 * if the address space has only a small number of segments.
4015 	 */
4016 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4017 
4018 	if ((seg = AS_SEGFIRST(as)) == NULL)
4019 		return (0);
4020 
4021 	brkseg = break_seg(p);
4022 	stkseg = as_segat(as, prgetstackbase(p));
4023 
4024 	do {
4025 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4026 		caddr_t saddr, naddr, baddr;
4027 		void *tmp = NULL;
4028 		ssize_t psz;
4029 		char *parr;
4030 		uint64_t npages;
4031 		uint64_t pagenum;
4032 
4033 		/*
4034 		 * Segment loop part one: iterate from the base of the segment
4035 		 * to its end, pausing at each address boundary (baddr) between
4036 		 * ranges that have different virtual memory protections.
4037 		 */
4038 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4039 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4040 			ASSERT(baddr >= saddr && baddr <= eaddr);
4041 
4042 			/*
4043 			 * Segment loop part two: iterate from the current
4044 			 * position to the end of the protection boundary,
4045 			 * pausing at each address boundary (naddr) between
4046 			 * ranges that have different underlying page sizes.
4047 			 */
4048 			for (; saddr < baddr; saddr = naddr) {
4049 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4050 				ASSERT(naddr >= saddr && naddr <= baddr);
4051 
4052 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4053 
4054 				mp->pr_vaddr = (uintptr_t)saddr;
4055 				mp->pr_size = naddr - saddr;
4056 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4057 				mp->pr_mflags = 0;
4058 				if (prot & PROT_READ)
4059 					mp->pr_mflags |= MA_READ;
4060 				if (prot & PROT_WRITE)
4061 					mp->pr_mflags |= MA_WRITE;
4062 				if (prot & PROT_EXEC)
4063 					mp->pr_mflags |= MA_EXEC;
4064 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4065 					mp->pr_mflags |= MA_SHARED;
4066 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4067 					mp->pr_mflags |= MA_NORESERVE;
4068 				if (seg->s_ops == &segspt_shmops ||
4069 				    (seg->s_ops == &segvn_ops &&
4070 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4071 				    vp == NULL)))
4072 					mp->pr_mflags |= MA_ANON;
4073 				if (seg == brkseg)
4074 					mp->pr_mflags |= MA_BREAK;
4075 				else if (seg == stkseg)
4076 					mp->pr_mflags |= MA_STACK;
4077 				if (seg->s_ops == &segspt_shmops)
4078 					mp->pr_mflags |= MA_ISM | MA_SHM;
4079 
4080 				mp->pr_pagesize = PAGESIZE;
4081 				if (psz == -1) {
4082 					mp->pr_hatpagesize = 0;
4083 				} else {
4084 					mp->pr_hatpagesize = psz;
4085 				}
4086 
4087 				/*
4088 				 * Manufacture a filename for the "object" dir.
4089 				 */
4090 				mp->pr_dev = PRNODEV;
4091 				vattr.va_mask = AT_FSID|AT_NODEID;
4092 				if (seg->s_ops == &segvn_ops &&
4093 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4094 				    vp != NULL && vp->v_type == VREG &&
4095 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4096 				    NULL) == 0) {
4097 					mp->pr_dev = vattr.va_fsid;
4098 					mp->pr_ino = vattr.va_nodeid;
4099 					if (vp == p->p_exec)
4100 						(void) strcpy(mp->pr_mapname,
4101 						    "a.out");
4102 					else
4103 						pr_object_name(mp->pr_mapname,
4104 						    vp, &vattr);
4105 				}
4106 
4107 				/*
4108 				 * Get the SysV shared memory id, if any.
4109 				 */
4110 				if ((mp->pr_mflags & MA_SHARED) &&
4111 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4112 				    seg->s_base)) != SHMID_NONE) {
4113 					if (mp->pr_shmid == SHMID_FREE)
4114 						mp->pr_shmid = -1;
4115 
4116 					mp->pr_mflags |= MA_SHM;
4117 				} else {
4118 					mp->pr_shmid = -1;
4119 				}
4120 
4121 				npages = ((uintptr_t)(naddr - saddr)) >>
4122 				    PAGESHIFT;
4123 				parr = kmem_zalloc(npages, KM_SLEEP);
4124 
4125 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4126 
4127 				for (pagenum = 0; pagenum < npages; pagenum++) {
4128 					if (parr[pagenum] & SEG_PAGE_INCORE)
4129 						mp->pr_rss++;
4130 					if (parr[pagenum] & SEG_PAGE_ANON)
4131 						mp->pr_anon++;
4132 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4133 						mp->pr_locked++;
4134 				}
4135 				kmem_free(parr, npages);
4136 			}
4137 		}
4138 		ASSERT(tmp == NULL);
4139 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4140 
4141 	return (0);
4142 }
4143 
4144 /*
4145  * Return the process's credentials.  We don't need a 32-bit equivalent of
4146  * this function because prcred_t and prcred32_t are actually the same.
4147  */
4148 void
prgetcred(proc_t * p,prcred_t * pcrp)4149 prgetcred(proc_t *p, prcred_t *pcrp)
4150 {
4151 	mutex_enter(&p->p_crlock);
4152 	cred2prcred(p->p_cred, pcrp);
4153 	mutex_exit(&p->p_crlock);
4154 }
4155 
4156 /*
4157  * Compute actual size of the prpriv_t structure.
4158  */
4159 
4160 size_t
prgetprivsize(void)4161 prgetprivsize(void)
4162 {
4163 	return (priv_prgetprivsize(NULL));
4164 }
4165 
4166 /*
4167  * Return the process's privileges.  We don't need a 32-bit equivalent of
4168  * this function because prpriv_t and prpriv32_t are actually the same.
4169  */
4170 void
prgetpriv(proc_t * p,prpriv_t * pprp)4171 prgetpriv(proc_t *p, prpriv_t *pprp)
4172 {
4173 	mutex_enter(&p->p_crlock);
4174 	cred2prpriv(p->p_cred, pprp);
4175 	mutex_exit(&p->p_crlock);
4176 }
4177 
4178 #ifdef _SYSCALL32_IMPL
4179 /*
4180  * Return an array of structures with HAT memory map information.
4181  * We allocate here; the caller must deallocate.
4182  */
4183 int
prgetxmap32(proc_t * p,list_t * iolhead)4184 prgetxmap32(proc_t *p, list_t *iolhead)
4185 {
4186 	struct as *as = p->p_as;
4187 	prxmap32_t *mp;
4188 	struct seg *seg;
4189 	struct seg *brkseg, *stkseg;
4190 	struct vnode *vp;
4191 	struct vattr vattr;
4192 	uint_t prot;
4193 
4194 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4195 
4196 	/*
4197 	 * Request an initial buffer size that doesn't waste memory
4198 	 * if the address space has only a small number of segments.
4199 	 */
4200 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4201 
4202 	if ((seg = AS_SEGFIRST(as)) == NULL)
4203 		return (0);
4204 
4205 	brkseg = break_seg(p);
4206 	stkseg = as_segat(as, prgetstackbase(p));
4207 
4208 	do {
4209 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4210 		caddr_t saddr, naddr, baddr;
4211 		void *tmp = NULL;
4212 		ssize_t psz;
4213 		char *parr;
4214 		uint64_t npages;
4215 		uint64_t pagenum;
4216 
4217 		/*
4218 		 * Segment loop part one: iterate from the base of the segment
4219 		 * to its end, pausing at each address boundary (baddr) between
4220 		 * ranges that have different virtual memory protections.
4221 		 */
4222 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4223 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4224 			ASSERT(baddr >= saddr && baddr <= eaddr);
4225 
4226 			/*
4227 			 * Segment loop part two: iterate from the current
4228 			 * position to the end of the protection boundary,
4229 			 * pausing at each address boundary (naddr) between
4230 			 * ranges that have different underlying page sizes.
4231 			 */
4232 			for (; saddr < baddr; saddr = naddr) {
4233 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4234 				ASSERT(naddr >= saddr && naddr <= baddr);
4235 
4236 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4237 
4238 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4239 				mp->pr_size = (size32_t)(naddr - saddr);
4240 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4241 				mp->pr_mflags = 0;
4242 				if (prot & PROT_READ)
4243 					mp->pr_mflags |= MA_READ;
4244 				if (prot & PROT_WRITE)
4245 					mp->pr_mflags |= MA_WRITE;
4246 				if (prot & PROT_EXEC)
4247 					mp->pr_mflags |= MA_EXEC;
4248 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4249 					mp->pr_mflags |= MA_SHARED;
4250 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4251 					mp->pr_mflags |= MA_NORESERVE;
4252 				if (seg->s_ops == &segspt_shmops ||
4253 				    (seg->s_ops == &segvn_ops &&
4254 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4255 				    vp == NULL)))
4256 					mp->pr_mflags |= MA_ANON;
4257 				if (seg == brkseg)
4258 					mp->pr_mflags |= MA_BREAK;
4259 				else if (seg == stkseg)
4260 					mp->pr_mflags |= MA_STACK;
4261 				if (seg->s_ops == &segspt_shmops)
4262 					mp->pr_mflags |= MA_ISM | MA_SHM;
4263 
4264 				mp->pr_pagesize = PAGESIZE;
4265 				if (psz == -1) {
4266 					mp->pr_hatpagesize = 0;
4267 				} else {
4268 					mp->pr_hatpagesize = psz;
4269 				}
4270 
4271 				/*
4272 				 * Manufacture a filename for the "object" dir.
4273 				 */
4274 				mp->pr_dev = PRNODEV32;
4275 				vattr.va_mask = AT_FSID|AT_NODEID;
4276 				if (seg->s_ops == &segvn_ops &&
4277 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4278 				    vp != NULL && vp->v_type == VREG &&
4279 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4280 				    NULL) == 0) {
4281 					(void) cmpldev(&mp->pr_dev,
4282 					    vattr.va_fsid);
4283 					mp->pr_ino = vattr.va_nodeid;
4284 					if (vp == p->p_exec)
4285 						(void) strcpy(mp->pr_mapname,
4286 						    "a.out");
4287 					else
4288 						pr_object_name(mp->pr_mapname,
4289 						    vp, &vattr);
4290 				}
4291 
4292 				/*
4293 				 * Get the SysV shared memory id, if any.
4294 				 */
4295 				if ((mp->pr_mflags & MA_SHARED) &&
4296 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4297 				    seg->s_base)) != SHMID_NONE) {
4298 					if (mp->pr_shmid == SHMID_FREE)
4299 						mp->pr_shmid = -1;
4300 
4301 					mp->pr_mflags |= MA_SHM;
4302 				} else {
4303 					mp->pr_shmid = -1;
4304 				}
4305 
4306 				npages = ((uintptr_t)(naddr - saddr)) >>
4307 				    PAGESHIFT;
4308 				parr = kmem_zalloc(npages, KM_SLEEP);
4309 
4310 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4311 
4312 				for (pagenum = 0; pagenum < npages; pagenum++) {
4313 					if (parr[pagenum] & SEG_PAGE_INCORE)
4314 						mp->pr_rss++;
4315 					if (parr[pagenum] & SEG_PAGE_ANON)
4316 						mp->pr_anon++;
4317 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4318 						mp->pr_locked++;
4319 				}
4320 				kmem_free(parr, npages);
4321 			}
4322 		}
4323 		ASSERT(tmp == NULL);
4324 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4325 
4326 	return (0);
4327 }
4328 #endif	/* _SYSCALL32_IMPL */
4329