1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #include <sys/types.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/cmn_err.h>
34 #include <sys/cred.h>
35 #include <sys/priv.h>
36 #include <sys/debug.h>
37 #include <sys/errno.h>
38 #include <sys/inline.h>
39 #include <sys/kmem.h>
40 #include <sys/mman.h>
41 #include <sys/proc.h>
42 #include <sys/brand.h>
43 #include <sys/sobject.h>
44 #include <sys/sysmacros.h>
45 #include <sys/systm.h>
46 #include <sys/uio.h>
47 #include <sys/var.h>
48 #include <sys/vfs.h>
49 #include <sys/vnode.h>
50 #include <sys/session.h>
51 #include <sys/pcb.h>
52 #include <sys/signal.h>
53 #include <sys/user.h>
54 #include <sys/disp.h>
55 #include <sys/class.h>
56 #include <sys/ts.h>
57 #include <sys/bitmap.h>
58 #include <sys/poll.h>
59 #include <sys/shm_impl.h>
60 #include <sys/fault.h>
61 #include <sys/syscall.h>
62 #include <sys/procfs.h>
63 #include <sys/processor.h>
64 #include <sys/cpuvar.h>
65 #include <sys/copyops.h>
66 #include <sys/time.h>
67 #include <sys/msacct.h>
68 #include <vm/as.h>
69 #include <vm/rm.h>
70 #include <vm/seg.h>
71 #include <vm/seg_vn.h>
72 #include <vm/seg_dev.h>
73 #include <vm/seg_spt.h>
74 #include <vm/page.h>
75 #include <sys/vmparam.h>
76 #include <sys/swap.h>
77 #include <fs/proc/prdata.h>
78 #include <sys/task.h>
79 #include <sys/project.h>
80 #include <sys/contract_impl.h>
81 #include <sys/contract/process.h>
82 #include <sys/contract/process_impl.h>
83 #include <sys/schedctl.h>
84 #include <sys/pool.h>
85 #include <sys/zone.h>
86 #include <sys/atomic.h>
87 #include <sys/sdt.h>
88
89 #define MAX_ITERS_SPIN 5
90
91 typedef struct prpagev {
92 uint_t *pg_protv; /* vector of page permissions */
93 char *pg_incore; /* vector of incore flags */
94 size_t pg_npages; /* number of pages in protv and incore */
95 ulong_t pg_pnbase; /* pn within segment of first protv element */
96 } prpagev_t;
97
98 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
99
100 extern struct seg_ops segdev_ops; /* needs a header file */
101 extern struct seg_ops segspt_shmops; /* needs a header file */
102
103 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
105
106 /*
107 * Choose an lwp from the complete set of lwps for the process.
108 * This is called for any operation applied to the process
109 * file descriptor that requires an lwp to operate upon.
110 *
111 * Returns a pointer to the thread for the selected LWP,
112 * and with the dispatcher lock held for the thread.
113 *
114 * The algorithm for choosing an lwp is critical for /proc semantics;
115 * don't touch this code unless you know all of the implications.
116 */
117 kthread_t *
prchoose(proc_t * p)118 prchoose(proc_t *p)
119 {
120 kthread_t *t;
121 kthread_t *t_onproc = NULL; /* running on processor */
122 kthread_t *t_run = NULL; /* runnable, on disp queue */
123 kthread_t *t_sleep = NULL; /* sleeping */
124 kthread_t *t_hold = NULL; /* sleeping, performing hold */
125 kthread_t *t_susp = NULL; /* suspended stop */
126 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */
127 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */
128 kthread_t *t_req = NULL; /* requested stop */
129 kthread_t *t_istop = NULL; /* event-of-interest stop */
130 kthread_t *t_dtrace = NULL; /* DTrace stop */
131
132 ASSERT(MUTEX_HELD(&p->p_lock));
133
134 /*
135 * If the agent lwp exists, it takes precedence over all others.
136 */
137 if ((t = p->p_agenttp) != NULL) {
138 thread_lock(t);
139 return (t);
140 }
141
142 if ((t = p->p_tlist) == NULL) /* start at the head of the list */
143 return (t);
144 do { /* for eacn lwp in the process */
145 if (VSTOPPED(t)) { /* virtually stopped */
146 if (t_req == NULL)
147 t_req = t;
148 continue;
149 }
150
151 thread_lock(t); /* make sure thread is in good state */
152 switch (t->t_state) {
153 default:
154 panic("prchoose: bad thread state %d, thread 0x%p",
155 t->t_state, (void *)t);
156 /*NOTREACHED*/
157 case TS_SLEEP:
158 /* this is filthy */
159 if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160 t->t_wchan0 == NULL) {
161 if (t_hold == NULL)
162 t_hold = t;
163 } else {
164 if (t_sleep == NULL)
165 t_sleep = t;
166 }
167 break;
168 case TS_RUN:
169 case TS_WAIT:
170 if (t_run == NULL)
171 t_run = t;
172 break;
173 case TS_ONPROC:
174 if (t_onproc == NULL)
175 t_onproc = t;
176 break;
177 case TS_ZOMB: /* last possible choice */
178 break;
179 case TS_STOPPED:
180 switch (t->t_whystop) {
181 case PR_SUSPENDED:
182 if (t_susp == NULL)
183 t_susp = t;
184 break;
185 case PR_JOBCONTROL:
186 if (t->t_proc_flag & TP_PRSTOP) {
187 if (t_jdstop == NULL)
188 t_jdstop = t;
189 } else {
190 if (t_jstop == NULL)
191 t_jstop = t;
192 }
193 break;
194 case PR_REQUESTED:
195 if (t->t_dtrace_stop && t_dtrace == NULL)
196 t_dtrace = t;
197 else if (t_req == NULL)
198 t_req = t;
199 break;
200 case PR_SYSENTRY:
201 case PR_SYSEXIT:
202 case PR_SIGNALLED:
203 case PR_FAULTED:
204 /*
205 * Make an lwp calling exit() be the
206 * last lwp seen in the process.
207 */
208 if (t_istop == NULL ||
209 (t_istop->t_whystop == PR_SYSENTRY &&
210 t_istop->t_whatstop == SYS_exit))
211 t_istop = t;
212 break;
213 case PR_CHECKPOINT: /* can't happen? */
214 break;
215 default:
216 panic("prchoose: bad t_whystop %d, thread 0x%p",
217 t->t_whystop, (void *)t);
218 /*NOTREACHED*/
219 }
220 break;
221 }
222 thread_unlock(t);
223 } while ((t = t->t_forw) != p->p_tlist);
224
225 if (t_onproc)
226 t = t_onproc;
227 else if (t_run)
228 t = t_run;
229 else if (t_sleep)
230 t = t_sleep;
231 else if (t_jstop)
232 t = t_jstop;
233 else if (t_jdstop)
234 t = t_jdstop;
235 else if (t_istop)
236 t = t_istop;
237 else if (t_dtrace)
238 t = t_dtrace;
239 else if (t_req)
240 t = t_req;
241 else if (t_hold)
242 t = t_hold;
243 else if (t_susp)
244 t = t_susp;
245 else /* TS_ZOMB */
246 t = p->p_tlist;
247
248 if (t != NULL)
249 thread_lock(t);
250 return (t);
251 }
252
253 /*
254 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
255 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
256 * on the /proc file descriptor. Called from stop() when a traced
257 * process stops on an event of interest. Also called from exit()
258 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
259 */
260 void
prnotify(struct vnode * vp)261 prnotify(struct vnode *vp)
262 {
263 prcommon_t *pcp = VTOP(vp)->pr_common;
264
265 mutex_enter(&pcp->prc_mutex);
266 cv_broadcast(&pcp->prc_wait);
267 mutex_exit(&pcp->prc_mutex);
268 if (pcp->prc_flags & PRC_POLL) {
269 /*
270 * We call pollwakeup() with POLLHUP to ensure that
271 * the pollers are awakened even if they are polling
272 * for nothing (i.e., waiting for the process to exit).
273 * This enables the use of the PRC_POLL flag for optimization
274 * (we can turn off PRC_POLL only if we know no pollers remain).
275 */
276 pcp->prc_flags &= ~PRC_POLL;
277 pollwakeup(&pcp->prc_pollhead, POLLHUP);
278 }
279 }
280
281 /* called immediately below, in prfree() */
282 static void
prfreenotify(vnode_t * vp)283 prfreenotify(vnode_t *vp)
284 {
285 prnode_t *pnp;
286 prcommon_t *pcp;
287
288 while (vp != NULL) {
289 pnp = VTOP(vp);
290 pcp = pnp->pr_common;
291 ASSERT(pcp->prc_thread == NULL);
292 pcp->prc_proc = NULL;
293 /*
294 * We can't call prnotify() here because we are holding
295 * pidlock. We assert that there is no need to.
296 */
297 mutex_enter(&pcp->prc_mutex);
298 cv_broadcast(&pcp->prc_wait);
299 mutex_exit(&pcp->prc_mutex);
300 ASSERT(!(pcp->prc_flags & PRC_POLL));
301
302 vp = pnp->pr_next;
303 pnp->pr_next = NULL;
304 }
305 }
306
307 /*
308 * Called from a hook in freeproc() when a traced process is removed
309 * from the process table. The proc-table pointers of all associated
310 * /proc vnodes are cleared to indicate that the process has gone away.
311 */
312 void
prfree(proc_t * p)313 prfree(proc_t *p)
314 {
315 uint_t slot = p->p_slot;
316
317 ASSERT(MUTEX_HELD(&pidlock));
318
319 /*
320 * Block the process against /proc so it can be freed.
321 * It cannot be freed while locked by some controlling process.
322 * Lock ordering:
323 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
324 */
325 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */
326 mutex_enter(&p->p_lock);
327 while (p->p_proc_flag & P_PR_LOCK) {
328 mutex_exit(&pr_pidlock);
329 cv_wait(&pr_pid_cv[slot], &p->p_lock);
330 mutex_exit(&p->p_lock);
331 mutex_enter(&pr_pidlock);
332 mutex_enter(&p->p_lock);
333 }
334
335 ASSERT(p->p_tlist == NULL);
336
337 prfreenotify(p->p_plist);
338 p->p_plist = NULL;
339
340 prfreenotify(p->p_trace);
341 p->p_trace = NULL;
342
343 /*
344 * We broadcast to wake up everyone waiting for this process.
345 * No one can reach this process from this point on.
346 */
347 cv_broadcast(&pr_pid_cv[slot]);
348
349 mutex_exit(&p->p_lock);
350 mutex_exit(&pr_pidlock);
351 }
352
353 /*
354 * Called from a hook in exit() when a traced process is becoming a zombie.
355 */
356 void
prexit(proc_t * p)357 prexit(proc_t *p)
358 {
359 ASSERT(MUTEX_HELD(&p->p_lock));
360
361 if (pr_watch_active(p)) {
362 pr_free_watchpoints(p);
363 watch_disable(curthread);
364 }
365 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */
366 if (p->p_trace) {
367 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
368 prnotify(p->p_trace);
369 }
370 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */
371 }
372
373 /*
374 * Called when a thread calls lwp_exit().
375 */
376 void
prlwpexit(kthread_t * t)377 prlwpexit(kthread_t *t)
378 {
379 vnode_t *vp;
380 prnode_t *pnp;
381 prcommon_t *pcp;
382 proc_t *p = ttoproc(t);
383 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
384
385 ASSERT(t == curthread);
386 ASSERT(MUTEX_HELD(&p->p_lock));
387
388 /*
389 * The process must be blocked against /proc to do this safely.
390 * The lwp must not disappear while the process is marked P_PR_LOCK.
391 * It is the caller's responsibility to have called prbarrier(p).
392 */
393 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
394
395 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
396 pnp = VTOP(vp);
397 pcp = pnp->pr_common;
398 if (pcp->prc_thread == t) {
399 pcp->prc_thread = NULL;
400 pcp->prc_flags |= PRC_DESTROY;
401 }
402 }
403
404 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
405 pnp = VTOP(vp);
406 pcp = pnp->pr_common;
407 pcp->prc_thread = NULL;
408 pcp->prc_flags |= PRC_DESTROY;
409 prnotify(vp);
410 }
411
412 if (p->p_trace)
413 prnotify(p->p_trace);
414 }
415
416 /*
417 * Called when a zombie thread is joined or when a
418 * detached lwp exits. Called from lwp_hash_out().
419 */
420 void
prlwpfree(proc_t * p,lwpent_t * lep)421 prlwpfree(proc_t *p, lwpent_t *lep)
422 {
423 vnode_t *vp;
424 prnode_t *pnp;
425 prcommon_t *pcp;
426
427 ASSERT(MUTEX_HELD(&p->p_lock));
428
429 /*
430 * The process must be blocked against /proc to do this safely.
431 * The lwp must not disappear while the process is marked P_PR_LOCK.
432 * It is the caller's responsibility to have called prbarrier(p).
433 */
434 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
435
436 vp = lep->le_trace;
437 lep->le_trace = NULL;
438 while (vp) {
439 prnotify(vp);
440 pnp = VTOP(vp);
441 pcp = pnp->pr_common;
442 ASSERT(pcp->prc_thread == NULL &&
443 (pcp->prc_flags & PRC_DESTROY));
444 pcp->prc_tslot = -1;
445 vp = pnp->pr_next;
446 pnp->pr_next = NULL;
447 }
448
449 if (p->p_trace)
450 prnotify(p->p_trace);
451 }
452
453 /*
454 * Called from a hook in exec() when a thread starts exec().
455 */
456 void
prexecstart(void)457 prexecstart(void)
458 {
459 proc_t *p = ttoproc(curthread);
460 klwp_t *lwp = ttolwp(curthread);
461
462 /*
463 * The P_PR_EXEC flag blocks /proc operations for
464 * the duration of the exec().
465 * We can't start exec() while the process is
466 * locked by /proc, so we call prbarrier().
467 * lwp_nostop keeps the process from being stopped
468 * via job control for the duration of the exec().
469 */
470
471 ASSERT(MUTEX_HELD(&p->p_lock));
472 prbarrier(p);
473 lwp->lwp_nostop++;
474 p->p_proc_flag |= P_PR_EXEC;
475 }
476
477 /*
478 * Called from a hook in exec() when a thread finishes exec().
479 * The thread may or may not have succeeded. Some other thread
480 * may have beat it to the punch.
481 */
482 void
prexecend(void)483 prexecend(void)
484 {
485 proc_t *p = ttoproc(curthread);
486 klwp_t *lwp = ttolwp(curthread);
487 vnode_t *vp;
488 prnode_t *pnp;
489 prcommon_t *pcp;
490 model_t model = p->p_model;
491 id_t tid = curthread->t_tid;
492 int tslot = curthread->t_dslot;
493
494 ASSERT(MUTEX_HELD(&p->p_lock));
495
496 lwp->lwp_nostop--;
497 if (p->p_flag & SEXITLWPS) {
498 /*
499 * We are on our way to exiting because some
500 * other thread beat us in the race to exec().
501 * Don't clear the P_PR_EXEC flag in this case.
502 */
503 return;
504 }
505
506 /*
507 * Wake up anyone waiting in /proc for the process to complete exec().
508 */
509 p->p_proc_flag &= ~P_PR_EXEC;
510 if ((vp = p->p_trace) != NULL) {
511 pcp = VTOP(vp)->pr_common;
512 mutex_enter(&pcp->prc_mutex);
513 cv_broadcast(&pcp->prc_wait);
514 mutex_exit(&pcp->prc_mutex);
515 for (; vp != NULL; vp = pnp->pr_next) {
516 pnp = VTOP(vp);
517 pnp->pr_common->prc_datamodel = model;
518 }
519 }
520 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
521 /*
522 * We dealt with the process common above.
523 */
524 ASSERT(p->p_trace != NULL);
525 pcp = VTOP(vp)->pr_common;
526 mutex_enter(&pcp->prc_mutex);
527 cv_broadcast(&pcp->prc_wait);
528 mutex_exit(&pcp->prc_mutex);
529 for (; vp != NULL; vp = pnp->pr_next) {
530 pnp = VTOP(vp);
531 pcp = pnp->pr_common;
532 pcp->prc_datamodel = model;
533 pcp->prc_tid = tid;
534 pcp->prc_tslot = tslot;
535 }
536 }
537 }
538
539 /*
540 * Called from a hook in relvm() just before freeing the address space.
541 * We free all the watched areas now.
542 */
543 void
prrelvm(void)544 prrelvm(void)
545 {
546 proc_t *p = ttoproc(curthread);
547
548 mutex_enter(&p->p_lock);
549 prbarrier(p); /* block all other /proc operations */
550 if (pr_watch_active(p)) {
551 pr_free_watchpoints(p);
552 watch_disable(curthread);
553 }
554 mutex_exit(&p->p_lock);
555 pr_free_watched_pages(p);
556 }
557
558 /*
559 * Called from hooks in exec-related code when a traced process
560 * attempts to exec(2) a setuid/setgid program or an unreadable
561 * file. Rather than fail the exec we invalidate the associated
562 * /proc vnodes so that subsequent attempts to use them will fail.
563 *
564 * All /proc vnodes, except directory vnodes, are retained on a linked
565 * list (rooted at p_plist in the process structure) until last close.
566 *
567 * A controlling process must re-open the /proc files in order to
568 * regain control.
569 */
570 void
prinvalidate(struct user * up)571 prinvalidate(struct user *up)
572 {
573 kthread_t *t = curthread;
574 proc_t *p = ttoproc(t);
575 vnode_t *vp;
576 prnode_t *pnp;
577 int writers = 0;
578
579 mutex_enter(&p->p_lock);
580 prbarrier(p); /* block all other /proc operations */
581
582 /*
583 * At this moment, there can be only one lwp in the process.
584 */
585 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
586
587 /*
588 * Invalidate any currently active /proc vnodes.
589 */
590 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
591 pnp = VTOP(vp);
592 switch (pnp->pr_type) {
593 case PR_PSINFO: /* these files can read by anyone */
594 case PR_LPSINFO:
595 case PR_LWPSINFO:
596 case PR_LWPDIR:
597 case PR_LWPIDDIR:
598 case PR_USAGE:
599 case PR_LUSAGE:
600 case PR_LWPUSAGE:
601 break;
602 default:
603 pnp->pr_flags |= PR_INVAL;
604 break;
605 }
606 }
607 /*
608 * Wake up anyone waiting for the process or lwp.
609 * p->p_trace is guaranteed to be non-NULL if there
610 * are any open /proc files for this process.
611 */
612 if ((vp = p->p_trace) != NULL) {
613 prcommon_t *pcp = VTOP(vp)->pr_pcommon;
614
615 prnotify(vp);
616 /*
617 * Are there any writers?
618 */
619 if ((writers = pcp->prc_writers) != 0) {
620 /*
621 * Clear the exclusive open flag (old /proc interface).
622 * Set prc_selfopens equal to prc_writers so that
623 * the next O_EXCL|O_WRITE open will succeed
624 * even with existing (though invalid) writers.
625 * prclose() must decrement prc_selfopens when
626 * the invalid files are closed.
627 */
628 pcp->prc_flags &= ~PRC_EXCL;
629 ASSERT(pcp->prc_selfopens <= writers);
630 pcp->prc_selfopens = writers;
631 }
632 }
633 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
634 while (vp != NULL) {
635 /*
636 * We should not invalidate the lwpiddir vnodes,
637 * but the necessities of maintaining the old
638 * ioctl()-based version of /proc require it.
639 */
640 pnp = VTOP(vp);
641 pnp->pr_flags |= PR_INVAL;
642 prnotify(vp);
643 vp = pnp->pr_next;
644 }
645
646 /*
647 * If any tracing flags are in effect and any vnodes are open for
648 * writing then set the requested-stop and run-on-last-close flags.
649 * Otherwise, clear all tracing flags.
650 */
651 t->t_proc_flag &= ~TP_PAUSE;
652 if ((p->p_proc_flag & P_PR_TRACE) && writers) {
653 t->t_proc_flag |= TP_PRSTOP;
654 aston(t); /* so ISSIG will see the flag */
655 p->p_proc_flag |= P_PR_RUNLCL;
656 } else {
657 premptyset(&up->u_entrymask); /* syscalls */
658 premptyset(&up->u_exitmask);
659 up->u_systrap = 0;
660 premptyset(&p->p_sigmask); /* signals */
661 premptyset(&p->p_fltmask); /* faults */
662 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
663 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
664 prnostep(ttolwp(t));
665 }
666
667 mutex_exit(&p->p_lock);
668 }
669
670 /*
671 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
672 * Return with pr_pidlock held in all cases.
673 * Return with p_lock held if the the process still exists.
674 * Return value is the process pointer if the process still exists, else NULL.
675 * If we lock the process, give ourself kernel priority to avoid deadlocks;
676 * this is undone in prunlock().
677 */
678 proc_t *
pr_p_lock(prnode_t * pnp)679 pr_p_lock(prnode_t *pnp)
680 {
681 proc_t *p;
682 prcommon_t *pcp;
683
684 mutex_enter(&pr_pidlock);
685 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
686 return (NULL);
687 mutex_enter(&p->p_lock);
688 while (p->p_proc_flag & P_PR_LOCK) {
689 /*
690 * This cv/mutex pair is persistent even if
691 * the process disappears while we sleep.
692 */
693 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
694 kmutex_t *mp = &p->p_lock;
695
696 mutex_exit(&pr_pidlock);
697 cv_wait(cv, mp);
698 mutex_exit(mp);
699 mutex_enter(&pr_pidlock);
700 if (pcp->prc_proc == NULL)
701 return (NULL);
702 ASSERT(p == pcp->prc_proc);
703 mutex_enter(&p->p_lock);
704 }
705 p->p_proc_flag |= P_PR_LOCK;
706 return (p);
707 }
708
709 /*
710 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
711 * This prevents any lwp of the process from disappearing and
712 * blocks most operations that a process can perform on itself.
713 * Returns 0 on success, a non-zero error number on failure.
714 *
715 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
716 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
717 *
718 * error returns:
719 * ENOENT: process or lwp has disappeared or process is exiting
720 * (or has become a zombie and zdisp == ZNO).
721 * EAGAIN: procfs vnode has become invalid.
722 * EINTR: signal arrived while waiting for exec to complete.
723 */
724 int
prlock(prnode_t * pnp,int zdisp)725 prlock(prnode_t *pnp, int zdisp)
726 {
727 prcommon_t *pcp;
728 proc_t *p;
729
730 again:
731 pcp = pnp->pr_common;
732 p = pr_p_lock(pnp);
733 mutex_exit(&pr_pidlock);
734
735 /*
736 * Return ENOENT immediately if there is no process.
737 */
738 if (p == NULL)
739 return (ENOENT);
740
741 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
742
743 /*
744 * Return ENOENT if process entered zombie state or is exiting
745 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
746 */
747 if (zdisp == ZNO &&
748 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
749 prunlock(pnp);
750 return (ENOENT);
751 }
752
753 /*
754 * If lwp-specific, check to see if lwp has disappeared.
755 */
756 if (pcp->prc_flags & PRC_LWP) {
757 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
758 pcp->prc_tslot == -1) {
759 prunlock(pnp);
760 return (ENOENT);
761 }
762 }
763
764 /*
765 * Return EAGAIN if we have encountered a security violation.
766 * (The process exec'd a set-id or unreadable executable file.)
767 */
768 if (pnp->pr_flags & PR_INVAL) {
769 prunlock(pnp);
770 return (EAGAIN);
771 }
772
773 /*
774 * If process is undergoing an exec(), wait for
775 * completion and then start all over again.
776 */
777 if (p->p_proc_flag & P_PR_EXEC) {
778 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */
779 mutex_enter(&pcp->prc_mutex);
780 prunlock(pnp);
781 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
782 mutex_exit(&pcp->prc_mutex);
783 return (EINTR);
784 }
785 mutex_exit(&pcp->prc_mutex);
786 goto again;
787 }
788
789 /*
790 * We return holding p->p_lock.
791 */
792 return (0);
793 }
794
795 /*
796 * Undo prlock() and pr_p_lock().
797 * p->p_lock is still held; pr_pidlock is no longer held.
798 *
799 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
800 * if any, waiting for the flag to be dropped; it retains p->p_lock.
801 *
802 * prunlock() calls prunmark() and then drops p->p_lock.
803 */
804 void
prunmark(proc_t * p)805 prunmark(proc_t *p)
806 {
807 ASSERT(p->p_proc_flag & P_PR_LOCK);
808 ASSERT(MUTEX_HELD(&p->p_lock));
809
810 cv_signal(&pr_pid_cv[p->p_slot]);
811 p->p_proc_flag &= ~P_PR_LOCK;
812 }
813
814 void
prunlock(prnode_t * pnp)815 prunlock(prnode_t *pnp)
816 {
817 prcommon_t *pcp = pnp->pr_common;
818 proc_t *p = pcp->prc_proc;
819
820 /*
821 * If we (or someone) gave it a SIGKILL, and it is not
822 * already a zombie, set it running unconditionally.
823 */
824 if ((p->p_flag & SKILLED) &&
825 !(p->p_flag & SEXITING) &&
826 !(pcp->prc_flags & PRC_DESTROY) &&
827 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
828 (void) pr_setrun(pnp, 0);
829 prunmark(p);
830 mutex_exit(&p->p_lock);
831 }
832
833 /*
834 * Called while holding p->p_lock to delay until the process is unlocked.
835 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
836 * The process cannot become locked again until p->p_lock is dropped.
837 */
838 void
prbarrier(proc_t * p)839 prbarrier(proc_t *p)
840 {
841 ASSERT(MUTEX_HELD(&p->p_lock));
842
843 if (p->p_proc_flag & P_PR_LOCK) {
844 /* The process is locked; delay until not locked */
845 uint_t slot = p->p_slot;
846
847 while (p->p_proc_flag & P_PR_LOCK)
848 cv_wait(&pr_pid_cv[slot], &p->p_lock);
849 cv_signal(&pr_pid_cv[slot]);
850 }
851 }
852
853 /*
854 * Return process/lwp status.
855 * The u-block is mapped in by this routine and unmapped at the end.
856 */
857 void
prgetstatus(proc_t * p,pstatus_t * sp,zone_t * zp)858 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
859 {
860 kthread_t *t;
861
862 ASSERT(MUTEX_HELD(&p->p_lock));
863
864 t = prchoose(p); /* returns locked thread */
865 ASSERT(t != NULL);
866 thread_unlock(t);
867
868 /* just bzero the process part, prgetlwpstatus() does the rest */
869 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
870 sp->pr_nlwp = p->p_lwpcnt;
871 sp->pr_nzomb = p->p_zombcnt;
872 prassignset(&sp->pr_sigpend, &p->p_sig);
873 sp->pr_brkbase = (uintptr_t)p->p_brkbase;
874 sp->pr_brksize = p->p_brksize;
875 sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
876 sp->pr_stksize = p->p_stksize;
877 sp->pr_pid = p->p_pid;
878 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
879 (p->p_flag & SZONETOP)) {
880 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
881 /*
882 * Inside local zones, fake zsched's pid as parent pids for
883 * processes which reference processes outside of the zone.
884 */
885 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
886 } else {
887 sp->pr_ppid = p->p_ppid;
888 }
889 sp->pr_pgid = p->p_pgrp;
890 sp->pr_sid = p->p_sessp->s_sid;
891 sp->pr_taskid = p->p_task->tk_tkid;
892 sp->pr_projid = p->p_task->tk_proj->kpj_id;
893 sp->pr_zoneid = p->p_zone->zone_id;
894 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
895 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
896 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
897 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
898 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
899 prassignset(&sp->pr_flttrace, &p->p_fltmask);
900 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
901 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
902 switch (p->p_model) {
903 case DATAMODEL_ILP32:
904 sp->pr_dmodel = PR_MODEL_ILP32;
905 break;
906 case DATAMODEL_LP64:
907 sp->pr_dmodel = PR_MODEL_LP64;
908 break;
909 }
910 if (p->p_agenttp)
911 sp->pr_agentid = p->p_agenttp->t_tid;
912
913 /* get the chosen lwp's status */
914 prgetlwpstatus(t, &sp->pr_lwp, zp);
915
916 /* replicate the flags */
917 sp->pr_flags = sp->pr_lwp.pr_flags;
918 }
919
920 #ifdef _SYSCALL32_IMPL
921 void
prgetlwpstatus32(kthread_t * t,lwpstatus32_t * sp,zone_t * zp)922 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
923 {
924 proc_t *p = ttoproc(t);
925 klwp_t *lwp = ttolwp(t);
926 struct mstate *ms = &lwp->lwp_mstate;
927 hrtime_t usr, sys;
928 int flags;
929 ulong_t instr;
930
931 ASSERT(MUTEX_HELD(&p->p_lock));
932
933 bzero(sp, sizeof (*sp));
934 flags = 0L;
935 if (t->t_state == TS_STOPPED) {
936 flags |= PR_STOPPED;
937 if ((t->t_schedflag & TS_PSTART) == 0)
938 flags |= PR_ISTOP;
939 } else if (VSTOPPED(t)) {
940 flags |= PR_STOPPED|PR_ISTOP;
941 }
942 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
943 flags |= PR_DSTOP;
944 if (lwp->lwp_asleep)
945 flags |= PR_ASLEEP;
946 if (t == p->p_agenttp)
947 flags |= PR_AGENT;
948 if (!(t->t_proc_flag & TP_TWAIT))
949 flags |= PR_DETACH;
950 if (t->t_proc_flag & TP_DAEMON)
951 flags |= PR_DAEMON;
952 if (p->p_proc_flag & P_PR_FORK)
953 flags |= PR_FORK;
954 if (p->p_proc_flag & P_PR_RUNLCL)
955 flags |= PR_RLC;
956 if (p->p_proc_flag & P_PR_KILLCL)
957 flags |= PR_KLC;
958 if (p->p_proc_flag & P_PR_ASYNC)
959 flags |= PR_ASYNC;
960 if (p->p_proc_flag & P_PR_BPTADJ)
961 flags |= PR_BPTADJ;
962 if (p->p_proc_flag & P_PR_PTRACE)
963 flags |= PR_PTRACE;
964 if (p->p_flag & SMSACCT)
965 flags |= PR_MSACCT;
966 if (p->p_flag & SMSFORK)
967 flags |= PR_MSFORK;
968 if (p->p_flag & SVFWAIT)
969 flags |= PR_VFORKP;
970 sp->pr_flags = flags;
971 if (VSTOPPED(t)) {
972 sp->pr_why = PR_REQUESTED;
973 sp->pr_what = 0;
974 } else {
975 sp->pr_why = t->t_whystop;
976 sp->pr_what = t->t_whatstop;
977 }
978 sp->pr_lwpid = t->t_tid;
979 sp->pr_cursig = lwp->lwp_cursig;
980 prassignset(&sp->pr_lwppend, &t->t_sig);
981 schedctl_finish_sigblock(t);
982 prassignset(&sp->pr_lwphold, &t->t_hold);
983 if (t->t_whystop == PR_FAULTED) {
984 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
985 if (t->t_whatstop == FLTPAGE)
986 sp->pr_info.si_addr =
987 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
988 } else if (lwp->lwp_curinfo)
989 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
990 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
991 sp->pr_info.si_zoneid != zp->zone_id) {
992 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
993 sp->pr_info.si_uid = 0;
994 sp->pr_info.si_ctid = -1;
995 sp->pr_info.si_zoneid = zp->zone_id;
996 }
997 sp->pr_altstack.ss_sp =
998 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
999 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1000 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1001 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1002 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1003 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1004 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1005 sizeof (sp->pr_clname) - 1);
1006 if (flags & PR_STOPPED)
1007 hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1008 usr = ms->ms_acct[LMS_USER];
1009 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1010 scalehrtime(&usr);
1011 scalehrtime(&sys);
1012 hrt2ts32(usr, &sp->pr_utime);
1013 hrt2ts32(sys, &sp->pr_stime);
1014
1015 /*
1016 * Fetch the current instruction, if not a system process.
1017 * We don't attempt this unless the lwp is stopped.
1018 */
1019 if ((p->p_flag & SSYS) || p->p_as == &kas)
1020 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1021 else if (!(flags & PR_STOPPED))
1022 sp->pr_flags |= PR_PCINVAL;
1023 else if (!prfetchinstr(lwp, &instr))
1024 sp->pr_flags |= PR_PCINVAL;
1025 else
1026 sp->pr_instr = (uint32_t)instr;
1027
1028 /*
1029 * Drop p_lock while touching the lwp's stack.
1030 */
1031 mutex_exit(&p->p_lock);
1032 if (prisstep(lwp))
1033 sp->pr_flags |= PR_STEP;
1034 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1035 int i;
1036
1037 sp->pr_syscall = get_syscall32_args(lwp,
1038 (int *)sp->pr_sysarg, &i);
1039 sp->pr_nsysarg = (ushort_t)i;
1040 }
1041 if ((flags & PR_STOPPED) || t == curthread)
1042 prgetprregs32(lwp, sp->pr_reg);
1043 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1044 (flags & PR_VFORKP)) {
1045 long r1, r2;
1046 user_t *up;
1047 auxv_t *auxp;
1048 int i;
1049
1050 sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1051 if (sp->pr_errno == 0) {
1052 sp->pr_rval1 = (int32_t)r1;
1053 sp->pr_rval2 = (int32_t)r2;
1054 sp->pr_errpriv = PRIV_NONE;
1055 } else
1056 sp->pr_errpriv = lwp->lwp_badpriv;
1057
1058 if (t->t_sysnum == SYS_execve) {
1059 up = PTOU(p);
1060 sp->pr_sysarg[0] = 0;
1061 sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1062 sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1063 for (i = 0, auxp = up->u_auxv;
1064 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1065 i++, auxp++) {
1066 if (auxp->a_type == AT_SUN_EXECNAME) {
1067 sp->pr_sysarg[0] =
1068 (caddr32_t)
1069 (uintptr_t)auxp->a_un.a_ptr;
1070 break;
1071 }
1072 }
1073 }
1074 }
1075 if (prhasfp())
1076 prgetprfpregs32(lwp, &sp->pr_fpreg);
1077 mutex_enter(&p->p_lock);
1078 }
1079
1080 void
prgetstatus32(proc_t * p,pstatus32_t * sp,zone_t * zp)1081 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1082 {
1083 kthread_t *t;
1084
1085 ASSERT(MUTEX_HELD(&p->p_lock));
1086
1087 t = prchoose(p); /* returns locked thread */
1088 ASSERT(t != NULL);
1089 thread_unlock(t);
1090
1091 /* just bzero the process part, prgetlwpstatus32() does the rest */
1092 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1093 sp->pr_nlwp = p->p_lwpcnt;
1094 sp->pr_nzomb = p->p_zombcnt;
1095 prassignset(&sp->pr_sigpend, &p->p_sig);
1096 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1097 sp->pr_brksize = (uint32_t)p->p_brksize;
1098 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1099 sp->pr_stksize = (uint32_t)p->p_stksize;
1100 sp->pr_pid = p->p_pid;
1101 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1102 (p->p_flag & SZONETOP)) {
1103 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1104 /*
1105 * Inside local zones, fake zsched's pid as parent pids for
1106 * processes which reference processes outside of the zone.
1107 */
1108 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1109 } else {
1110 sp->pr_ppid = p->p_ppid;
1111 }
1112 sp->pr_pgid = p->p_pgrp;
1113 sp->pr_sid = p->p_sessp->s_sid;
1114 sp->pr_taskid = p->p_task->tk_tkid;
1115 sp->pr_projid = p->p_task->tk_proj->kpj_id;
1116 sp->pr_zoneid = p->p_zone->zone_id;
1117 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1118 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1119 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1120 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1121 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1122 prassignset(&sp->pr_flttrace, &p->p_fltmask);
1123 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1124 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1125 switch (p->p_model) {
1126 case DATAMODEL_ILP32:
1127 sp->pr_dmodel = PR_MODEL_ILP32;
1128 break;
1129 case DATAMODEL_LP64:
1130 sp->pr_dmodel = PR_MODEL_LP64;
1131 break;
1132 }
1133 if (p->p_agenttp)
1134 sp->pr_agentid = p->p_agenttp->t_tid;
1135
1136 /* get the chosen lwp's status */
1137 prgetlwpstatus32(t, &sp->pr_lwp, zp);
1138
1139 /* replicate the flags */
1140 sp->pr_flags = sp->pr_lwp.pr_flags;
1141 }
1142 #endif /* _SYSCALL32_IMPL */
1143
1144 /*
1145 * Return lwp status.
1146 */
1147 void
prgetlwpstatus(kthread_t * t,lwpstatus_t * sp,zone_t * zp)1148 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1149 {
1150 proc_t *p = ttoproc(t);
1151 klwp_t *lwp = ttolwp(t);
1152 struct mstate *ms = &lwp->lwp_mstate;
1153 hrtime_t usr, sys;
1154 int flags;
1155 ulong_t instr;
1156
1157 ASSERT(MUTEX_HELD(&p->p_lock));
1158
1159 bzero(sp, sizeof (*sp));
1160 flags = 0L;
1161 if (t->t_state == TS_STOPPED) {
1162 flags |= PR_STOPPED;
1163 if ((t->t_schedflag & TS_PSTART) == 0)
1164 flags |= PR_ISTOP;
1165 } else if (VSTOPPED(t)) {
1166 flags |= PR_STOPPED|PR_ISTOP;
1167 }
1168 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1169 flags |= PR_DSTOP;
1170 if (lwp->lwp_asleep)
1171 flags |= PR_ASLEEP;
1172 if (t == p->p_agenttp)
1173 flags |= PR_AGENT;
1174 if (!(t->t_proc_flag & TP_TWAIT))
1175 flags |= PR_DETACH;
1176 if (t->t_proc_flag & TP_DAEMON)
1177 flags |= PR_DAEMON;
1178 if (p->p_proc_flag & P_PR_FORK)
1179 flags |= PR_FORK;
1180 if (p->p_proc_flag & P_PR_RUNLCL)
1181 flags |= PR_RLC;
1182 if (p->p_proc_flag & P_PR_KILLCL)
1183 flags |= PR_KLC;
1184 if (p->p_proc_flag & P_PR_ASYNC)
1185 flags |= PR_ASYNC;
1186 if (p->p_proc_flag & P_PR_BPTADJ)
1187 flags |= PR_BPTADJ;
1188 if (p->p_proc_flag & P_PR_PTRACE)
1189 flags |= PR_PTRACE;
1190 if (p->p_flag & SMSACCT)
1191 flags |= PR_MSACCT;
1192 if (p->p_flag & SMSFORK)
1193 flags |= PR_MSFORK;
1194 if (p->p_flag & SVFWAIT)
1195 flags |= PR_VFORKP;
1196 if (p->p_pgidp->pid_pgorphaned)
1197 flags |= PR_ORPHAN;
1198 if (p->p_pidflag & CLDNOSIGCHLD)
1199 flags |= PR_NOSIGCHLD;
1200 if (p->p_pidflag & CLDWAITPID)
1201 flags |= PR_WAITPID;
1202 sp->pr_flags = flags;
1203 if (VSTOPPED(t)) {
1204 sp->pr_why = PR_REQUESTED;
1205 sp->pr_what = 0;
1206 } else {
1207 sp->pr_why = t->t_whystop;
1208 sp->pr_what = t->t_whatstop;
1209 }
1210 sp->pr_lwpid = t->t_tid;
1211 sp->pr_cursig = lwp->lwp_cursig;
1212 prassignset(&sp->pr_lwppend, &t->t_sig);
1213 schedctl_finish_sigblock(t);
1214 prassignset(&sp->pr_lwphold, &t->t_hold);
1215 if (t->t_whystop == PR_FAULTED)
1216 bcopy(&lwp->lwp_siginfo,
1217 &sp->pr_info, sizeof (k_siginfo_t));
1218 else if (lwp->lwp_curinfo)
1219 bcopy(&lwp->lwp_curinfo->sq_info,
1220 &sp->pr_info, sizeof (k_siginfo_t));
1221 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1222 sp->pr_info.si_zoneid != zp->zone_id) {
1223 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1224 sp->pr_info.si_uid = 0;
1225 sp->pr_info.si_ctid = -1;
1226 sp->pr_info.si_zoneid = zp->zone_id;
1227 }
1228 sp->pr_altstack = lwp->lwp_sigaltstack;
1229 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1230 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1231 sp->pr_ustack = lwp->lwp_ustack;
1232 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1233 sizeof (sp->pr_clname) - 1);
1234 if (flags & PR_STOPPED)
1235 hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1236 usr = ms->ms_acct[LMS_USER];
1237 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1238 scalehrtime(&usr);
1239 scalehrtime(&sys);
1240 hrt2ts(usr, &sp->pr_utime);
1241 hrt2ts(sys, &sp->pr_stime);
1242
1243 /*
1244 * Fetch the current instruction, if not a system process.
1245 * We don't attempt this unless the lwp is stopped.
1246 */
1247 if ((p->p_flag & SSYS) || p->p_as == &kas)
1248 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1249 else if (!(flags & PR_STOPPED))
1250 sp->pr_flags |= PR_PCINVAL;
1251 else if (!prfetchinstr(lwp, &instr))
1252 sp->pr_flags |= PR_PCINVAL;
1253 else
1254 sp->pr_instr = instr;
1255
1256 /*
1257 * Drop p_lock while touching the lwp's stack.
1258 */
1259 mutex_exit(&p->p_lock);
1260 if (prisstep(lwp))
1261 sp->pr_flags |= PR_STEP;
1262 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1263 int i;
1264
1265 sp->pr_syscall = get_syscall_args(lwp,
1266 (long *)sp->pr_sysarg, &i);
1267 sp->pr_nsysarg = (ushort_t)i;
1268 }
1269 if ((flags & PR_STOPPED) || t == curthread)
1270 prgetprregs(lwp, sp->pr_reg);
1271 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1272 (flags & PR_VFORKP)) {
1273 user_t *up;
1274 auxv_t *auxp;
1275 int i;
1276
1277 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1278 if (sp->pr_errno == 0)
1279 sp->pr_errpriv = PRIV_NONE;
1280 else
1281 sp->pr_errpriv = lwp->lwp_badpriv;
1282
1283 if (t->t_sysnum == SYS_execve) {
1284 up = PTOU(p);
1285 sp->pr_sysarg[0] = 0;
1286 sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1287 sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1288 for (i = 0, auxp = up->u_auxv;
1289 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1290 i++, auxp++) {
1291 if (auxp->a_type == AT_SUN_EXECNAME) {
1292 sp->pr_sysarg[0] =
1293 (uintptr_t)auxp->a_un.a_ptr;
1294 break;
1295 }
1296 }
1297 }
1298 }
1299 if (prhasfp())
1300 prgetprfpregs(lwp, &sp->pr_fpreg);
1301 mutex_enter(&p->p_lock);
1302 }
1303
1304 /*
1305 * Get the sigaction structure for the specified signal. The u-block
1306 * must already have been mapped in by the caller.
1307 */
1308 void
prgetaction(proc_t * p,user_t * up,uint_t sig,struct sigaction * sp)1309 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1310 {
1311 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1312
1313 bzero(sp, sizeof (*sp));
1314
1315 if (sig != 0 && (unsigned)sig < nsig) {
1316 sp->sa_handler = up->u_signal[sig-1];
1317 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1318 if (sigismember(&up->u_sigonstack, sig))
1319 sp->sa_flags |= SA_ONSTACK;
1320 if (sigismember(&up->u_sigresethand, sig))
1321 sp->sa_flags |= SA_RESETHAND;
1322 if (sigismember(&up->u_sigrestart, sig))
1323 sp->sa_flags |= SA_RESTART;
1324 if (sigismember(&p->p_siginfo, sig))
1325 sp->sa_flags |= SA_SIGINFO;
1326 if (sigismember(&up->u_signodefer, sig))
1327 sp->sa_flags |= SA_NODEFER;
1328 if (sig == SIGCLD) {
1329 if (p->p_flag & SNOWAIT)
1330 sp->sa_flags |= SA_NOCLDWAIT;
1331 if ((p->p_flag & SJCTL) == 0)
1332 sp->sa_flags |= SA_NOCLDSTOP;
1333 }
1334 }
1335 }
1336
1337 #ifdef _SYSCALL32_IMPL
1338 void
prgetaction32(proc_t * p,user_t * up,uint_t sig,struct sigaction32 * sp)1339 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1340 {
1341 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1342
1343 bzero(sp, sizeof (*sp));
1344
1345 if (sig != 0 && (unsigned)sig < nsig) {
1346 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1347 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1348 if (sigismember(&up->u_sigonstack, sig))
1349 sp->sa_flags |= SA_ONSTACK;
1350 if (sigismember(&up->u_sigresethand, sig))
1351 sp->sa_flags |= SA_RESETHAND;
1352 if (sigismember(&up->u_sigrestart, sig))
1353 sp->sa_flags |= SA_RESTART;
1354 if (sigismember(&p->p_siginfo, sig))
1355 sp->sa_flags |= SA_SIGINFO;
1356 if (sigismember(&up->u_signodefer, sig))
1357 sp->sa_flags |= SA_NODEFER;
1358 if (sig == SIGCLD) {
1359 if (p->p_flag & SNOWAIT)
1360 sp->sa_flags |= SA_NOCLDWAIT;
1361 if ((p->p_flag & SJCTL) == 0)
1362 sp->sa_flags |= SA_NOCLDSTOP;
1363 }
1364 }
1365 }
1366 #endif /* _SYSCALL32_IMPL */
1367
1368 /*
1369 * Count the number of segments in this process's address space.
1370 */
1371 int
prnsegs(struct as * as,int reserved)1372 prnsegs(struct as *as, int reserved)
1373 {
1374 int n = 0;
1375 struct seg *seg;
1376
1377 ASSERT(as != &kas && AS_WRITE_HELD(as));
1378
1379 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1380 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1381 caddr_t saddr, naddr;
1382 void *tmp = NULL;
1383
1384 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1385 (void) pr_getprot(seg, reserved, &tmp,
1386 &saddr, &naddr, eaddr);
1387 if (saddr != naddr)
1388 n++;
1389 }
1390
1391 ASSERT(tmp == NULL);
1392 }
1393
1394 return (n);
1395 }
1396
1397 /*
1398 * Convert uint32_t to decimal string w/o leading zeros.
1399 * Add trailing null characters if 'len' is greater than string length.
1400 * Return the string length.
1401 */
1402 int
pr_u32tos(uint32_t n,char * s,int len)1403 pr_u32tos(uint32_t n, char *s, int len)
1404 {
1405 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */
1406 char *cp = cbuf;
1407 char *end = s + len;
1408
1409 do {
1410 *cp++ = (char)(n % 10 + '0');
1411 n /= 10;
1412 } while (n);
1413
1414 len = (int)(cp - cbuf);
1415
1416 do {
1417 *s++ = *--cp;
1418 } while (cp > cbuf);
1419
1420 while (s < end) /* optional pad */
1421 *s++ = '\0';
1422
1423 return (len);
1424 }
1425
1426 /*
1427 * Convert uint64_t to decimal string w/o leading zeros.
1428 * Return the string length.
1429 */
1430 static int
pr_u64tos(uint64_t n,char * s)1431 pr_u64tos(uint64_t n, char *s)
1432 {
1433 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */
1434 char *cp = cbuf;
1435 int len;
1436
1437 do {
1438 *cp++ = (char)(n % 10 + '0');
1439 n /= 10;
1440 } while (n);
1441
1442 len = (int)(cp - cbuf);
1443
1444 do {
1445 *s++ = *--cp;
1446 } while (cp > cbuf);
1447
1448 return (len);
1449 }
1450
1451 void
pr_object_name(char * name,vnode_t * vp,struct vattr * vattr)1452 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1453 {
1454 char *s = name;
1455 struct vfs *vfsp;
1456 struct vfssw *vfsswp;
1457
1458 if ((vfsp = vp->v_vfsp) != NULL &&
1459 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1460 *vfsswp->vsw_name) {
1461 (void) strcpy(s, vfsswp->vsw_name);
1462 s += strlen(s);
1463 *s++ = '.';
1464 }
1465 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1466 *s++ = '.';
1467 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1468 *s++ = '.';
1469 s += pr_u64tos(vattr->va_nodeid, s);
1470 *s++ = '\0';
1471 }
1472
1473 struct seg *
break_seg(proc_t * p)1474 break_seg(proc_t *p)
1475 {
1476 caddr_t addr = p->p_brkbase;
1477 struct seg *seg;
1478 struct vnode *vp;
1479
1480 if (p->p_brksize != 0)
1481 addr += p->p_brksize - 1;
1482 seg = as_segat(p->p_as, addr);
1483 if (seg != NULL && seg->s_ops == &segvn_ops &&
1484 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1485 return (seg);
1486 return (NULL);
1487 }
1488
1489 /*
1490 * Implementation of service functions to handle procfs generic chained
1491 * copyout buffers.
1492 */
1493 typedef struct pr_iobuf_list {
1494 list_node_t piol_link; /* buffer linkage */
1495 size_t piol_size; /* total size (header + data) */
1496 size_t piol_usedsize; /* amount to copy out from this buf */
1497 } piol_t;
1498
1499 #define MAPSIZE (64 * 1024)
1500 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1501
1502 void
pr_iol_initlist(list_t * iolhead,size_t itemsize,int n)1503 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1504 {
1505 piol_t *iol;
1506 size_t initial_size = MIN(1, n) * itemsize;
1507
1508 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1509
1510 ASSERT(list_head(iolhead) == NULL);
1511 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1512 ASSERT(initial_size > 0);
1513
1514 /*
1515 * Someone creating chained copyout buffers may ask for less than
1516 * MAPSIZE if the amount of data to be buffered is known to be
1517 * smaller than that.
1518 * But in order to prevent involuntary self-denial of service,
1519 * the requested input size is clamped at MAPSIZE.
1520 */
1521 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1522 iol = kmem_alloc(initial_size, KM_SLEEP);
1523 list_insert_head(iolhead, iol);
1524 iol->piol_usedsize = 0;
1525 iol->piol_size = initial_size;
1526 }
1527
1528 void *
pr_iol_newbuf(list_t * iolhead,size_t itemsize)1529 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1530 {
1531 piol_t *iol;
1532 char *new;
1533
1534 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1535 ASSERT(list_head(iolhead) != NULL);
1536
1537 iol = (piol_t *)list_tail(iolhead);
1538
1539 if (iol->piol_size <
1540 iol->piol_usedsize + sizeof (*iol) + itemsize) {
1541 /*
1542 * Out of space in the current buffer. Allocate more.
1543 */
1544 piol_t *newiol;
1545
1546 newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1547 newiol->piol_size = MAPSIZE;
1548 newiol->piol_usedsize = 0;
1549
1550 list_insert_after(iolhead, iol, newiol);
1551 iol = list_next(iolhead, iol);
1552 ASSERT(iol == newiol);
1553 }
1554 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1555 iol->piol_usedsize += itemsize;
1556 bzero(new, itemsize);
1557 return (new);
1558 }
1559
1560 int
pr_iol_copyout_and_free(list_t * iolhead,caddr_t * tgt,int errin)1561 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1562 {
1563 int error = errin;
1564 piol_t *iol;
1565
1566 while ((iol = list_head(iolhead)) != NULL) {
1567 list_remove(iolhead, iol);
1568 if (!error) {
1569 if (copyout(PIOL_DATABUF(iol), *tgt,
1570 iol->piol_usedsize))
1571 error = EFAULT;
1572 *tgt += iol->piol_usedsize;
1573 }
1574 kmem_free(iol, iol->piol_size);
1575 }
1576 list_destroy(iolhead);
1577
1578 return (error);
1579 }
1580
1581 int
pr_iol_uiomove_and_free(list_t * iolhead,uio_t * uiop,int errin)1582 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1583 {
1584 offset_t off = uiop->uio_offset;
1585 char *base;
1586 size_t size;
1587 piol_t *iol;
1588 int error = errin;
1589
1590 while ((iol = list_head(iolhead)) != NULL) {
1591 list_remove(iolhead, iol);
1592 base = PIOL_DATABUF(iol);
1593 size = iol->piol_usedsize;
1594 if (off <= size && error == 0 && uiop->uio_resid > 0)
1595 error = uiomove(base + off, size - off,
1596 UIO_READ, uiop);
1597 off = MAX(0, off - (offset_t)size);
1598 kmem_free(iol, iol->piol_size);
1599 }
1600 list_destroy(iolhead);
1601
1602 return (error);
1603 }
1604
1605 /*
1606 * Return an array of structures with memory map information.
1607 * We allocate here; the caller must deallocate.
1608 */
1609 int
prgetmap(proc_t * p,int reserved,list_t * iolhead)1610 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1611 {
1612 struct as *as = p->p_as;
1613 prmap_t *mp;
1614 struct seg *seg;
1615 struct seg *brkseg, *stkseg;
1616 struct vnode *vp;
1617 struct vattr vattr;
1618 uint_t prot;
1619
1620 ASSERT(as != &kas && AS_WRITE_HELD(as));
1621
1622 /*
1623 * Request an initial buffer size that doesn't waste memory
1624 * if the address space has only a small number of segments.
1625 */
1626 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1627
1628 if ((seg = AS_SEGFIRST(as)) == NULL)
1629 return (0);
1630
1631 brkseg = break_seg(p);
1632 stkseg = as_segat(as, prgetstackbase(p));
1633
1634 do {
1635 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1636 caddr_t saddr, naddr;
1637 void *tmp = NULL;
1638
1639 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1640 prot = pr_getprot(seg, reserved, &tmp,
1641 &saddr, &naddr, eaddr);
1642 if (saddr == naddr)
1643 continue;
1644
1645 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1646
1647 mp->pr_vaddr = (uintptr_t)saddr;
1648 mp->pr_size = naddr - saddr;
1649 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1650 mp->pr_mflags = 0;
1651 if (prot & PROT_READ)
1652 mp->pr_mflags |= MA_READ;
1653 if (prot & PROT_WRITE)
1654 mp->pr_mflags |= MA_WRITE;
1655 if (prot & PROT_EXEC)
1656 mp->pr_mflags |= MA_EXEC;
1657 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1658 mp->pr_mflags |= MA_SHARED;
1659 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1660 mp->pr_mflags |= MA_NORESERVE;
1661 if (seg->s_ops == &segspt_shmops ||
1662 (seg->s_ops == &segvn_ops &&
1663 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1664 mp->pr_mflags |= MA_ANON;
1665 if (seg == brkseg)
1666 mp->pr_mflags |= MA_BREAK;
1667 else if (seg == stkseg) {
1668 mp->pr_mflags |= MA_STACK;
1669 if (reserved) {
1670 size_t maxstack =
1671 ((size_t)p->p_stk_ctl +
1672 PAGEOFFSET) & PAGEMASK;
1673 mp->pr_vaddr =
1674 (uintptr_t)prgetstackbase(p) +
1675 p->p_stksize - maxstack;
1676 mp->pr_size = (uintptr_t)naddr -
1677 mp->pr_vaddr;
1678 }
1679 }
1680 if (seg->s_ops == &segspt_shmops)
1681 mp->pr_mflags |= MA_ISM | MA_SHM;
1682 mp->pr_pagesize = PAGESIZE;
1683
1684 /*
1685 * Manufacture a filename for the "object" directory.
1686 */
1687 vattr.va_mask = AT_FSID|AT_NODEID;
1688 if (seg->s_ops == &segvn_ops &&
1689 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1690 vp != NULL && vp->v_type == VREG &&
1691 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1692 if (vp == p->p_exec)
1693 (void) strcpy(mp->pr_mapname, "a.out");
1694 else
1695 pr_object_name(mp->pr_mapname,
1696 vp, &vattr);
1697 }
1698
1699 /*
1700 * Get the SysV shared memory id, if any.
1701 */
1702 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1703 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1704 SHMID_NONE) {
1705 if (mp->pr_shmid == SHMID_FREE)
1706 mp->pr_shmid = -1;
1707
1708 mp->pr_mflags |= MA_SHM;
1709 } else {
1710 mp->pr_shmid = -1;
1711 }
1712 }
1713 ASSERT(tmp == NULL);
1714 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1715
1716 return (0);
1717 }
1718
1719 #ifdef _SYSCALL32_IMPL
1720 int
prgetmap32(proc_t * p,int reserved,list_t * iolhead)1721 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1722 {
1723 struct as *as = p->p_as;
1724 prmap32_t *mp;
1725 struct seg *seg;
1726 struct seg *brkseg, *stkseg;
1727 struct vnode *vp;
1728 struct vattr vattr;
1729 uint_t prot;
1730
1731 ASSERT(as != &kas && AS_WRITE_HELD(as));
1732
1733 /*
1734 * Request an initial buffer size that doesn't waste memory
1735 * if the address space has only a small number of segments.
1736 */
1737 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1738
1739 if ((seg = AS_SEGFIRST(as)) == NULL)
1740 return (0);
1741
1742 brkseg = break_seg(p);
1743 stkseg = as_segat(as, prgetstackbase(p));
1744
1745 do {
1746 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1747 caddr_t saddr, naddr;
1748 void *tmp = NULL;
1749
1750 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1751 prot = pr_getprot(seg, reserved, &tmp,
1752 &saddr, &naddr, eaddr);
1753 if (saddr == naddr)
1754 continue;
1755
1756 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1757
1758 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1759 mp->pr_size = (size32_t)(naddr - saddr);
1760 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1761 mp->pr_mflags = 0;
1762 if (prot & PROT_READ)
1763 mp->pr_mflags |= MA_READ;
1764 if (prot & PROT_WRITE)
1765 mp->pr_mflags |= MA_WRITE;
1766 if (prot & PROT_EXEC)
1767 mp->pr_mflags |= MA_EXEC;
1768 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1769 mp->pr_mflags |= MA_SHARED;
1770 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1771 mp->pr_mflags |= MA_NORESERVE;
1772 if (seg->s_ops == &segspt_shmops ||
1773 (seg->s_ops == &segvn_ops &&
1774 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1775 mp->pr_mflags |= MA_ANON;
1776 if (seg == brkseg)
1777 mp->pr_mflags |= MA_BREAK;
1778 else if (seg == stkseg) {
1779 mp->pr_mflags |= MA_STACK;
1780 if (reserved) {
1781 size_t maxstack =
1782 ((size_t)p->p_stk_ctl +
1783 PAGEOFFSET) & PAGEMASK;
1784 uintptr_t vaddr =
1785 (uintptr_t)prgetstackbase(p) +
1786 p->p_stksize - maxstack;
1787 mp->pr_vaddr = (caddr32_t)vaddr;
1788 mp->pr_size = (size32_t)
1789 ((uintptr_t)naddr - vaddr);
1790 }
1791 }
1792 if (seg->s_ops == &segspt_shmops)
1793 mp->pr_mflags |= MA_ISM | MA_SHM;
1794 mp->pr_pagesize = PAGESIZE;
1795
1796 /*
1797 * Manufacture a filename for the "object" directory.
1798 */
1799 vattr.va_mask = AT_FSID|AT_NODEID;
1800 if (seg->s_ops == &segvn_ops &&
1801 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1802 vp != NULL && vp->v_type == VREG &&
1803 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1804 if (vp == p->p_exec)
1805 (void) strcpy(mp->pr_mapname, "a.out");
1806 else
1807 pr_object_name(mp->pr_mapname,
1808 vp, &vattr);
1809 }
1810
1811 /*
1812 * Get the SysV shared memory id, if any.
1813 */
1814 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1815 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1816 SHMID_NONE) {
1817 if (mp->pr_shmid == SHMID_FREE)
1818 mp->pr_shmid = -1;
1819
1820 mp->pr_mflags |= MA_SHM;
1821 } else {
1822 mp->pr_shmid = -1;
1823 }
1824 }
1825 ASSERT(tmp == NULL);
1826 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1827
1828 return (0);
1829 }
1830 #endif /* _SYSCALL32_IMPL */
1831
1832 /*
1833 * Return the size of the /proc page data file.
1834 */
1835 size_t
prpdsize(struct as * as)1836 prpdsize(struct as *as)
1837 {
1838 struct seg *seg;
1839 size_t size;
1840
1841 ASSERT(as != &kas && AS_WRITE_HELD(as));
1842
1843 if ((seg = AS_SEGFIRST(as)) == NULL)
1844 return (0);
1845
1846 size = sizeof (prpageheader_t);
1847 do {
1848 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1849 caddr_t saddr, naddr;
1850 void *tmp = NULL;
1851 size_t npage;
1852
1853 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1854 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1855 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1856 size += sizeof (prasmap_t) + round8(npage);
1857 }
1858 ASSERT(tmp == NULL);
1859 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1860
1861 return (size);
1862 }
1863
1864 #ifdef _SYSCALL32_IMPL
1865 size_t
prpdsize32(struct as * as)1866 prpdsize32(struct as *as)
1867 {
1868 struct seg *seg;
1869 size_t size;
1870
1871 ASSERT(as != &kas && AS_WRITE_HELD(as));
1872
1873 if ((seg = AS_SEGFIRST(as)) == NULL)
1874 return (0);
1875
1876 size = sizeof (prpageheader32_t);
1877 do {
1878 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1879 caddr_t saddr, naddr;
1880 void *tmp = NULL;
1881 size_t npage;
1882
1883 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1884 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1885 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1886 size += sizeof (prasmap32_t) + round8(npage);
1887 }
1888 ASSERT(tmp == NULL);
1889 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1890
1891 return (size);
1892 }
1893 #endif /* _SYSCALL32_IMPL */
1894
1895 /*
1896 * Read page data information.
1897 */
1898 int
prpdread(proc_t * p,uint_t hatid,struct uio * uiop)1899 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1900 {
1901 struct as *as = p->p_as;
1902 caddr_t buf;
1903 size_t size;
1904 prpageheader_t *php;
1905 prasmap_t *pmp;
1906 struct seg *seg;
1907 int error;
1908
1909 again:
1910 AS_LOCK_ENTER(as, RW_WRITER);
1911
1912 if ((seg = AS_SEGFIRST(as)) == NULL) {
1913 AS_LOCK_EXIT(as);
1914 return (0);
1915 }
1916 size = prpdsize(as);
1917 if (uiop->uio_resid < size) {
1918 AS_LOCK_EXIT(as);
1919 return (E2BIG);
1920 }
1921
1922 buf = kmem_zalloc(size, KM_SLEEP);
1923 php = (prpageheader_t *)buf;
1924 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1925
1926 hrt2ts(gethrtime(), &php->pr_tstamp);
1927 php->pr_nmap = 0;
1928 php->pr_npage = 0;
1929 do {
1930 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1931 caddr_t saddr, naddr;
1932 void *tmp = NULL;
1933
1934 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1935 struct vnode *vp;
1936 struct vattr vattr;
1937 size_t len;
1938 size_t npage;
1939 uint_t prot;
1940 uintptr_t next;
1941
1942 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1943 if ((len = (size_t)(naddr - saddr)) == 0)
1944 continue;
1945 npage = len / PAGESIZE;
1946 next = (uintptr_t)(pmp + 1) + round8(npage);
1947 /*
1948 * It's possible that the address space can change
1949 * subtlely even though we're holding as->a_lock
1950 * due to the nondeterminism of page_exists() in
1951 * the presence of asychronously flushed pages or
1952 * mapped files whose sizes are changing.
1953 * page_exists() may be called indirectly from
1954 * pr_getprot() by a SEGOP_INCORE() routine.
1955 * If this happens we need to make sure we don't
1956 * overrun the buffer whose size we computed based
1957 * on the initial iteration through the segments.
1958 * Once we've detected an overflow, we need to clean
1959 * up the temporary memory allocated in pr_getprot()
1960 * and retry. If there's a pending signal, we return
1961 * EINTR so that this thread can be dislodged if
1962 * a latent bug causes us to spin indefinitely.
1963 */
1964 if (next > (uintptr_t)buf + size) {
1965 pr_getprot_done(&tmp);
1966 AS_LOCK_EXIT(as);
1967
1968 kmem_free(buf, size);
1969
1970 if (ISSIG(curthread, JUSTLOOKING))
1971 return (EINTR);
1972
1973 goto again;
1974 }
1975
1976 php->pr_nmap++;
1977 php->pr_npage += npage;
1978 pmp->pr_vaddr = (uintptr_t)saddr;
1979 pmp->pr_npage = npage;
1980 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1981 pmp->pr_mflags = 0;
1982 if (prot & PROT_READ)
1983 pmp->pr_mflags |= MA_READ;
1984 if (prot & PROT_WRITE)
1985 pmp->pr_mflags |= MA_WRITE;
1986 if (prot & PROT_EXEC)
1987 pmp->pr_mflags |= MA_EXEC;
1988 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1989 pmp->pr_mflags |= MA_SHARED;
1990 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1991 pmp->pr_mflags |= MA_NORESERVE;
1992 if (seg->s_ops == &segspt_shmops ||
1993 (seg->s_ops == &segvn_ops &&
1994 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1995 pmp->pr_mflags |= MA_ANON;
1996 if (seg->s_ops == &segspt_shmops)
1997 pmp->pr_mflags |= MA_ISM | MA_SHM;
1998 pmp->pr_pagesize = PAGESIZE;
1999 /*
2000 * Manufacture a filename for the "object" directory.
2001 */
2002 vattr.va_mask = AT_FSID|AT_NODEID;
2003 if (seg->s_ops == &segvn_ops &&
2004 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2005 vp != NULL && vp->v_type == VREG &&
2006 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2007 if (vp == p->p_exec)
2008 (void) strcpy(pmp->pr_mapname, "a.out");
2009 else
2010 pr_object_name(pmp->pr_mapname,
2011 vp, &vattr);
2012 }
2013
2014 /*
2015 * Get the SysV shared memory id, if any.
2016 */
2017 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2018 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2019 SHMID_NONE) {
2020 if (pmp->pr_shmid == SHMID_FREE)
2021 pmp->pr_shmid = -1;
2022
2023 pmp->pr_mflags |= MA_SHM;
2024 } else {
2025 pmp->pr_shmid = -1;
2026 }
2027
2028 hat_getstat(as, saddr, len, hatid,
2029 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2030 pmp = (prasmap_t *)next;
2031 }
2032 ASSERT(tmp == NULL);
2033 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2034
2035 AS_LOCK_EXIT(as);
2036
2037 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2038 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2039 kmem_free(buf, size);
2040
2041 return (error);
2042 }
2043
2044 #ifdef _SYSCALL32_IMPL
2045 int
prpdread32(proc_t * p,uint_t hatid,struct uio * uiop)2046 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2047 {
2048 struct as *as = p->p_as;
2049 caddr_t buf;
2050 size_t size;
2051 prpageheader32_t *php;
2052 prasmap32_t *pmp;
2053 struct seg *seg;
2054 int error;
2055
2056 again:
2057 AS_LOCK_ENTER(as, RW_WRITER);
2058
2059 if ((seg = AS_SEGFIRST(as)) == NULL) {
2060 AS_LOCK_EXIT(as);
2061 return (0);
2062 }
2063 size = prpdsize32(as);
2064 if (uiop->uio_resid < size) {
2065 AS_LOCK_EXIT(as);
2066 return (E2BIG);
2067 }
2068
2069 buf = kmem_zalloc(size, KM_SLEEP);
2070 php = (prpageheader32_t *)buf;
2071 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2072
2073 hrt2ts32(gethrtime(), &php->pr_tstamp);
2074 php->pr_nmap = 0;
2075 php->pr_npage = 0;
2076 do {
2077 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2078 caddr_t saddr, naddr;
2079 void *tmp = NULL;
2080
2081 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2082 struct vnode *vp;
2083 struct vattr vattr;
2084 size_t len;
2085 size_t npage;
2086 uint_t prot;
2087 uintptr_t next;
2088
2089 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2090 if ((len = (size_t)(naddr - saddr)) == 0)
2091 continue;
2092 npage = len / PAGESIZE;
2093 next = (uintptr_t)(pmp + 1) + round8(npage);
2094 /*
2095 * It's possible that the address space can change
2096 * subtlely even though we're holding as->a_lock
2097 * due to the nondeterminism of page_exists() in
2098 * the presence of asychronously flushed pages or
2099 * mapped files whose sizes are changing.
2100 * page_exists() may be called indirectly from
2101 * pr_getprot() by a SEGOP_INCORE() routine.
2102 * If this happens we need to make sure we don't
2103 * overrun the buffer whose size we computed based
2104 * on the initial iteration through the segments.
2105 * Once we've detected an overflow, we need to clean
2106 * up the temporary memory allocated in pr_getprot()
2107 * and retry. If there's a pending signal, we return
2108 * EINTR so that this thread can be dislodged if
2109 * a latent bug causes us to spin indefinitely.
2110 */
2111 if (next > (uintptr_t)buf + size) {
2112 pr_getprot_done(&tmp);
2113 AS_LOCK_EXIT(as);
2114
2115 kmem_free(buf, size);
2116
2117 if (ISSIG(curthread, JUSTLOOKING))
2118 return (EINTR);
2119
2120 goto again;
2121 }
2122
2123 php->pr_nmap++;
2124 php->pr_npage += npage;
2125 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2126 pmp->pr_npage = (size32_t)npage;
2127 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2128 pmp->pr_mflags = 0;
2129 if (prot & PROT_READ)
2130 pmp->pr_mflags |= MA_READ;
2131 if (prot & PROT_WRITE)
2132 pmp->pr_mflags |= MA_WRITE;
2133 if (prot & PROT_EXEC)
2134 pmp->pr_mflags |= MA_EXEC;
2135 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2136 pmp->pr_mflags |= MA_SHARED;
2137 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2138 pmp->pr_mflags |= MA_NORESERVE;
2139 if (seg->s_ops == &segspt_shmops ||
2140 (seg->s_ops == &segvn_ops &&
2141 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2142 pmp->pr_mflags |= MA_ANON;
2143 if (seg->s_ops == &segspt_shmops)
2144 pmp->pr_mflags |= MA_ISM | MA_SHM;
2145 pmp->pr_pagesize = PAGESIZE;
2146 /*
2147 * Manufacture a filename for the "object" directory.
2148 */
2149 vattr.va_mask = AT_FSID|AT_NODEID;
2150 if (seg->s_ops == &segvn_ops &&
2151 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2152 vp != NULL && vp->v_type == VREG &&
2153 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2154 if (vp == p->p_exec)
2155 (void) strcpy(pmp->pr_mapname, "a.out");
2156 else
2157 pr_object_name(pmp->pr_mapname,
2158 vp, &vattr);
2159 }
2160
2161 /*
2162 * Get the SysV shared memory id, if any.
2163 */
2164 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2165 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2166 SHMID_NONE) {
2167 if (pmp->pr_shmid == SHMID_FREE)
2168 pmp->pr_shmid = -1;
2169
2170 pmp->pr_mflags |= MA_SHM;
2171 } else {
2172 pmp->pr_shmid = -1;
2173 }
2174
2175 hat_getstat(as, saddr, len, hatid,
2176 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2177 pmp = (prasmap32_t *)next;
2178 }
2179 ASSERT(tmp == NULL);
2180 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2181
2182 AS_LOCK_EXIT(as);
2183
2184 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2185 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2186 kmem_free(buf, size);
2187
2188 return (error);
2189 }
2190 #endif /* _SYSCALL32_IMPL */
2191
2192 ushort_t
prgetpctcpu(uint64_t pct)2193 prgetpctcpu(uint64_t pct)
2194 {
2195 /*
2196 * The value returned will be relevant in the zone of the examiner,
2197 * which may not be the same as the zone which performed the procfs
2198 * mount.
2199 */
2200 int nonline = zone_ncpus_online_get(curproc->p_zone);
2201
2202 /*
2203 * Prorate over online cpus so we don't exceed 100%
2204 */
2205 if (nonline > 1)
2206 pct /= nonline;
2207 pct >>= 16; /* convert to 16-bit scaled integer */
2208 if (pct > 0x8000) /* might happen, due to rounding */
2209 pct = 0x8000;
2210 return ((ushort_t)pct);
2211 }
2212
2213 /*
2214 * Return information used by ps(1).
2215 */
2216 void
prgetpsinfo(proc_t * p,psinfo_t * psp)2217 prgetpsinfo(proc_t *p, psinfo_t *psp)
2218 {
2219 kthread_t *t;
2220 struct cred *cred;
2221 hrtime_t hrutime, hrstime;
2222
2223 ASSERT(MUTEX_HELD(&p->p_lock));
2224
2225 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2226 bzero(psp, sizeof (*psp));
2227 else {
2228 thread_unlock(t);
2229 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2230 }
2231
2232 /*
2233 * only export SSYS and SMSACCT; everything else is off-limits to
2234 * userland apps.
2235 */
2236 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2237 psp->pr_nlwp = p->p_lwpcnt;
2238 psp->pr_nzomb = p->p_zombcnt;
2239 mutex_enter(&p->p_crlock);
2240 cred = p->p_cred;
2241 psp->pr_uid = crgetruid(cred);
2242 psp->pr_euid = crgetuid(cred);
2243 psp->pr_gid = crgetrgid(cred);
2244 psp->pr_egid = crgetgid(cred);
2245 mutex_exit(&p->p_crlock);
2246 psp->pr_pid = p->p_pid;
2247 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2248 (p->p_flag & SZONETOP)) {
2249 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2250 /*
2251 * Inside local zones, fake zsched's pid as parent pids for
2252 * processes which reference processes outside of the zone.
2253 */
2254 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2255 } else {
2256 psp->pr_ppid = p->p_ppid;
2257 }
2258 psp->pr_pgid = p->p_pgrp;
2259 psp->pr_sid = p->p_sessp->s_sid;
2260 psp->pr_taskid = p->p_task->tk_tkid;
2261 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2262 psp->pr_poolid = p->p_pool->pool_id;
2263 psp->pr_zoneid = p->p_zone->zone_id;
2264 if ((psp->pr_contract = PRCTID(p)) == 0)
2265 psp->pr_contract = -1;
2266 psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2267 switch (p->p_model) {
2268 case DATAMODEL_ILP32:
2269 psp->pr_dmodel = PR_MODEL_ILP32;
2270 break;
2271 case DATAMODEL_LP64:
2272 psp->pr_dmodel = PR_MODEL_LP64;
2273 break;
2274 }
2275 hrutime = mstate_aggr_state(p, LMS_USER);
2276 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2277 hrt2ts((hrutime + hrstime), &psp->pr_time);
2278 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2279
2280 if (t == NULL) {
2281 int wcode = p->p_wcode; /* must be atomic read */
2282
2283 if (wcode)
2284 psp->pr_wstat = wstat(wcode, p->p_wdata);
2285 psp->pr_ttydev = PRNODEV;
2286 psp->pr_lwp.pr_state = SZOMB;
2287 psp->pr_lwp.pr_sname = 'Z';
2288 psp->pr_lwp.pr_bindpro = PBIND_NONE;
2289 psp->pr_lwp.pr_bindpset = PS_NONE;
2290 } else {
2291 user_t *up = PTOU(p);
2292 struct as *as;
2293 dev_t d;
2294 extern dev_t rwsconsdev, rconsdev, uconsdev;
2295
2296 d = cttydev(p);
2297 /*
2298 * If the controlling terminal is the real
2299 * or workstation console device, map to what the
2300 * user thinks is the console device. Handle case when
2301 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2302 */
2303 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2304 d = uconsdev;
2305 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2306 psp->pr_start = up->u_start;
2307 bcopy(up->u_comm, psp->pr_fname,
2308 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2309 bcopy(up->u_psargs, psp->pr_psargs,
2310 MIN(PRARGSZ-1, PSARGSZ));
2311 psp->pr_argc = up->u_argc;
2312 psp->pr_argv = up->u_argv;
2313 psp->pr_envp = up->u_envp;
2314
2315 /* get the chosen lwp's lwpsinfo */
2316 prgetlwpsinfo(t, &psp->pr_lwp);
2317
2318 /* compute %cpu for the process */
2319 if (p->p_lwpcnt == 1)
2320 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2321 else {
2322 uint64_t pct = 0;
2323 hrtime_t cur_time = gethrtime_unscaled();
2324
2325 t = p->p_tlist;
2326 do {
2327 pct += cpu_update_pct(t, cur_time);
2328 } while ((t = t->t_forw) != p->p_tlist);
2329
2330 psp->pr_pctcpu = prgetpctcpu(pct);
2331 }
2332 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2333 psp->pr_size = 0;
2334 psp->pr_rssize = 0;
2335 } else {
2336 mutex_exit(&p->p_lock);
2337 AS_LOCK_ENTER(as, RW_READER);
2338 psp->pr_size = btopr(as->a_resvsize) *
2339 (PAGESIZE / 1024);
2340 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2341 psp->pr_pctmem = rm_pctmemory(as);
2342 AS_LOCK_EXIT(as);
2343 mutex_enter(&p->p_lock);
2344 }
2345 }
2346 }
2347
2348 #ifdef _SYSCALL32_IMPL
2349 void
prgetpsinfo32(proc_t * p,psinfo32_t * psp)2350 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2351 {
2352 kthread_t *t;
2353 struct cred *cred;
2354 hrtime_t hrutime, hrstime;
2355
2356 ASSERT(MUTEX_HELD(&p->p_lock));
2357
2358 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2359 bzero(psp, sizeof (*psp));
2360 else {
2361 thread_unlock(t);
2362 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2363 }
2364
2365 /*
2366 * only export SSYS and SMSACCT; everything else is off-limits to
2367 * userland apps.
2368 */
2369 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2370 psp->pr_nlwp = p->p_lwpcnt;
2371 psp->pr_nzomb = p->p_zombcnt;
2372 mutex_enter(&p->p_crlock);
2373 cred = p->p_cred;
2374 psp->pr_uid = crgetruid(cred);
2375 psp->pr_euid = crgetuid(cred);
2376 psp->pr_gid = crgetrgid(cred);
2377 psp->pr_egid = crgetgid(cred);
2378 mutex_exit(&p->p_crlock);
2379 psp->pr_pid = p->p_pid;
2380 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2381 (p->p_flag & SZONETOP)) {
2382 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2383 /*
2384 * Inside local zones, fake zsched's pid as parent pids for
2385 * processes which reference processes outside of the zone.
2386 */
2387 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2388 } else {
2389 psp->pr_ppid = p->p_ppid;
2390 }
2391 psp->pr_pgid = p->p_pgrp;
2392 psp->pr_sid = p->p_sessp->s_sid;
2393 psp->pr_taskid = p->p_task->tk_tkid;
2394 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2395 psp->pr_poolid = p->p_pool->pool_id;
2396 psp->pr_zoneid = p->p_zone->zone_id;
2397 if ((psp->pr_contract = PRCTID(p)) == 0)
2398 psp->pr_contract = -1;
2399 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2400 switch (p->p_model) {
2401 case DATAMODEL_ILP32:
2402 psp->pr_dmodel = PR_MODEL_ILP32;
2403 break;
2404 case DATAMODEL_LP64:
2405 psp->pr_dmodel = PR_MODEL_LP64;
2406 break;
2407 }
2408 hrutime = mstate_aggr_state(p, LMS_USER);
2409 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2410 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2411 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2412
2413 if (t == NULL) {
2414 extern int wstat(int, int); /* needs a header file */
2415 int wcode = p->p_wcode; /* must be atomic read */
2416
2417 if (wcode)
2418 psp->pr_wstat = wstat(wcode, p->p_wdata);
2419 psp->pr_ttydev = PRNODEV32;
2420 psp->pr_lwp.pr_state = SZOMB;
2421 psp->pr_lwp.pr_sname = 'Z';
2422 } else {
2423 user_t *up = PTOU(p);
2424 struct as *as;
2425 dev_t d;
2426 extern dev_t rwsconsdev, rconsdev, uconsdev;
2427
2428 d = cttydev(p);
2429 /*
2430 * If the controlling terminal is the real
2431 * or workstation console device, map to what the
2432 * user thinks is the console device. Handle case when
2433 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2434 */
2435 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2436 d = uconsdev;
2437 (void) cmpldev(&psp->pr_ttydev, d);
2438 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2439 bcopy(up->u_comm, psp->pr_fname,
2440 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2441 bcopy(up->u_psargs, psp->pr_psargs,
2442 MIN(PRARGSZ-1, PSARGSZ));
2443 psp->pr_argc = up->u_argc;
2444 psp->pr_argv = (caddr32_t)up->u_argv;
2445 psp->pr_envp = (caddr32_t)up->u_envp;
2446
2447 /* get the chosen lwp's lwpsinfo */
2448 prgetlwpsinfo32(t, &psp->pr_lwp);
2449
2450 /* compute %cpu for the process */
2451 if (p->p_lwpcnt == 1)
2452 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2453 else {
2454 uint64_t pct = 0;
2455 hrtime_t cur_time;
2456
2457 t = p->p_tlist;
2458 cur_time = gethrtime_unscaled();
2459 do {
2460 pct += cpu_update_pct(t, cur_time);
2461 } while ((t = t->t_forw) != p->p_tlist);
2462
2463 psp->pr_pctcpu = prgetpctcpu(pct);
2464 }
2465 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2466 psp->pr_size = 0;
2467 psp->pr_rssize = 0;
2468 } else {
2469 mutex_exit(&p->p_lock);
2470 AS_LOCK_ENTER(as, RW_READER);
2471 psp->pr_size = (size32_t)
2472 (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2473 psp->pr_rssize = (size32_t)
2474 (rm_asrss(as) * (PAGESIZE / 1024));
2475 psp->pr_pctmem = rm_pctmemory(as);
2476 AS_LOCK_EXIT(as);
2477 mutex_enter(&p->p_lock);
2478 }
2479 }
2480
2481 /*
2482 * If we are looking at an LP64 process, zero out
2483 * the fields that cannot be represented in ILP32.
2484 */
2485 if (p->p_model != DATAMODEL_ILP32) {
2486 psp->pr_size = 0;
2487 psp->pr_rssize = 0;
2488 psp->pr_argv = 0;
2489 psp->pr_envp = 0;
2490 }
2491 }
2492
2493 #endif /* _SYSCALL32_IMPL */
2494
2495 void
prgetlwpsinfo(kthread_t * t,lwpsinfo_t * psp)2496 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2497 {
2498 klwp_t *lwp = ttolwp(t);
2499 sobj_ops_t *sobj;
2500 char c, state;
2501 uint64_t pct;
2502 int retval, niceval;
2503 hrtime_t hrutime, hrstime;
2504
2505 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2506
2507 bzero(psp, sizeof (*psp));
2508
2509 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2510 psp->pr_lwpid = t->t_tid;
2511 psp->pr_addr = (uintptr_t)t;
2512 psp->pr_wchan = (uintptr_t)t->t_wchan;
2513
2514 /* map the thread state enum into a process state enum */
2515 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2516 switch (state) {
2517 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2518 case TS_RUN: state = SRUN; c = 'R'; break;
2519 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2520 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2521 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2522 case TS_WAIT: state = SWAIT; c = 'W'; break;
2523 default: state = 0; c = '?'; break;
2524 }
2525 psp->pr_state = state;
2526 psp->pr_sname = c;
2527 if ((sobj = t->t_sobj_ops) != NULL)
2528 psp->pr_stype = SOBJ_TYPE(sobj);
2529 retval = CL_DONICE(t, NULL, 0, &niceval);
2530 if (retval == 0) {
2531 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2532 psp->pr_nice = niceval + NZERO;
2533 }
2534 psp->pr_syscall = t->t_sysnum;
2535 psp->pr_pri = t->t_pri;
2536 psp->pr_start.tv_sec = t->t_start;
2537 psp->pr_start.tv_nsec = 0L;
2538 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2539 scalehrtime(&hrutime);
2540 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2541 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2542 scalehrtime(&hrstime);
2543 hrt2ts(hrutime + hrstime, &psp->pr_time);
2544 /* compute %cpu for the lwp */
2545 pct = cpu_update_pct(t, gethrtime_unscaled());
2546 psp->pr_pctcpu = prgetpctcpu(pct);
2547 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2548 if (psp->pr_cpu > 99)
2549 psp->pr_cpu = 99;
2550
2551 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2552 sizeof (psp->pr_clname) - 1);
2553 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2554 psp->pr_onpro = t->t_cpu->cpu_id;
2555 psp->pr_bindpro = t->t_bind_cpu;
2556 psp->pr_bindpset = t->t_bind_pset;
2557 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2558 }
2559
2560 #ifdef _SYSCALL32_IMPL
2561 void
prgetlwpsinfo32(kthread_t * t,lwpsinfo32_t * psp)2562 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2563 {
2564 proc_t *p = ttoproc(t);
2565 klwp_t *lwp = ttolwp(t);
2566 sobj_ops_t *sobj;
2567 char c, state;
2568 uint64_t pct;
2569 int retval, niceval;
2570 hrtime_t hrutime, hrstime;
2571
2572 ASSERT(MUTEX_HELD(&p->p_lock));
2573
2574 bzero(psp, sizeof (*psp));
2575
2576 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2577 psp->pr_lwpid = t->t_tid;
2578 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2579 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */
2580
2581 /* map the thread state enum into a process state enum */
2582 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2583 switch (state) {
2584 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2585 case TS_RUN: state = SRUN; c = 'R'; break;
2586 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2587 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2588 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2589 case TS_WAIT: state = SWAIT; c = 'W'; break;
2590 default: state = 0; c = '?'; break;
2591 }
2592 psp->pr_state = state;
2593 psp->pr_sname = c;
2594 if ((sobj = t->t_sobj_ops) != NULL)
2595 psp->pr_stype = SOBJ_TYPE(sobj);
2596 retval = CL_DONICE(t, NULL, 0, &niceval);
2597 if (retval == 0) {
2598 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2599 psp->pr_nice = niceval + NZERO;
2600 } else {
2601 psp->pr_oldpri = 0;
2602 psp->pr_nice = 0;
2603 }
2604 psp->pr_syscall = t->t_sysnum;
2605 psp->pr_pri = t->t_pri;
2606 psp->pr_start.tv_sec = (time32_t)t->t_start;
2607 psp->pr_start.tv_nsec = 0L;
2608 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2609 scalehrtime(&hrutime);
2610 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2611 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2612 scalehrtime(&hrstime);
2613 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2614 /* compute %cpu for the lwp */
2615 pct = cpu_update_pct(t, gethrtime_unscaled());
2616 psp->pr_pctcpu = prgetpctcpu(pct);
2617 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2618 if (psp->pr_cpu > 99)
2619 psp->pr_cpu = 99;
2620
2621 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2622 sizeof (psp->pr_clname) - 1);
2623 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2624 psp->pr_onpro = t->t_cpu->cpu_id;
2625 psp->pr_bindpro = t->t_bind_cpu;
2626 psp->pr_bindpset = t->t_bind_pset;
2627 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2628 }
2629 #endif /* _SYSCALL32_IMPL */
2630
2631 #ifdef _SYSCALL32_IMPL
2632
2633 #define PR_COPY_FIELD(s, d, field) d->field = s->field
2634
2635 #define PR_COPY_FIELD_ILP32(s, d, field) \
2636 if (s->pr_dmodel == PR_MODEL_ILP32) { \
2637 d->field = s->field; \
2638 }
2639
2640 #define PR_COPY_TIMESPEC(s, d, field) \
2641 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
2642
2643 #define PR_COPY_BUF(s, d, field) \
2644 bcopy(s->field, d->field, sizeof (d->field));
2645
2646 #define PR_IGNORE_FIELD(s, d, field)
2647
2648 void
lwpsinfo_kto32(const struct lwpsinfo * src,struct lwpsinfo32 * dest)2649 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
2650 {
2651 bzero(dest, sizeof (*dest));
2652
2653 PR_COPY_FIELD(src, dest, pr_flag);
2654 PR_COPY_FIELD(src, dest, pr_lwpid);
2655 PR_IGNORE_FIELD(src, dest, pr_addr);
2656 PR_IGNORE_FIELD(src, dest, pr_wchan);
2657 PR_COPY_FIELD(src, dest, pr_stype);
2658 PR_COPY_FIELD(src, dest, pr_state);
2659 PR_COPY_FIELD(src, dest, pr_sname);
2660 PR_COPY_FIELD(src, dest, pr_nice);
2661 PR_COPY_FIELD(src, dest, pr_syscall);
2662 PR_COPY_FIELD(src, dest, pr_oldpri);
2663 PR_COPY_FIELD(src, dest, pr_cpu);
2664 PR_COPY_FIELD(src, dest, pr_pri);
2665 PR_COPY_FIELD(src, dest, pr_pctcpu);
2666 PR_COPY_TIMESPEC(src, dest, pr_start);
2667 PR_COPY_BUF(src, dest, pr_clname);
2668 PR_COPY_BUF(src, dest, pr_name);
2669 PR_COPY_FIELD(src, dest, pr_onpro);
2670 PR_COPY_FIELD(src, dest, pr_bindpro);
2671 PR_COPY_FIELD(src, dest, pr_bindpset);
2672 PR_COPY_FIELD(src, dest, pr_lgrp);
2673 }
2674
2675 void
psinfo_kto32(const struct psinfo * src,struct psinfo32 * dest)2676 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
2677 {
2678 bzero(dest, sizeof (*dest));
2679
2680 PR_COPY_FIELD(src, dest, pr_flag);
2681 PR_COPY_FIELD(src, dest, pr_nlwp);
2682 PR_COPY_FIELD(src, dest, pr_pid);
2683 PR_COPY_FIELD(src, dest, pr_ppid);
2684 PR_COPY_FIELD(src, dest, pr_pgid);
2685 PR_COPY_FIELD(src, dest, pr_sid);
2686 PR_COPY_FIELD(src, dest, pr_uid);
2687 PR_COPY_FIELD(src, dest, pr_euid);
2688 PR_COPY_FIELD(src, dest, pr_gid);
2689 PR_COPY_FIELD(src, dest, pr_egid);
2690 PR_IGNORE_FIELD(src, dest, pr_addr);
2691 PR_COPY_FIELD_ILP32(src, dest, pr_size);
2692 PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
2693 PR_COPY_FIELD(src, dest, pr_ttydev);
2694 PR_COPY_FIELD(src, dest, pr_pctcpu);
2695 PR_COPY_FIELD(src, dest, pr_pctmem);
2696 PR_COPY_TIMESPEC(src, dest, pr_start);
2697 PR_COPY_TIMESPEC(src, dest, pr_time);
2698 PR_COPY_TIMESPEC(src, dest, pr_ctime);
2699 PR_COPY_BUF(src, dest, pr_fname);
2700 PR_COPY_BUF(src, dest, pr_psargs);
2701 PR_COPY_FIELD(src, dest, pr_wstat);
2702 PR_COPY_FIELD(src, dest, pr_argc);
2703 PR_COPY_FIELD_ILP32(src, dest, pr_argv);
2704 PR_COPY_FIELD_ILP32(src, dest, pr_envp);
2705 PR_COPY_FIELD(src, dest, pr_dmodel);
2706 PR_COPY_FIELD(src, dest, pr_taskid);
2707 PR_COPY_FIELD(src, dest, pr_projid);
2708 PR_COPY_FIELD(src, dest, pr_nzomb);
2709 PR_COPY_FIELD(src, dest, pr_poolid);
2710 PR_COPY_FIELD(src, dest, pr_contract);
2711 PR_COPY_FIELD(src, dest, pr_poolid);
2712 PR_COPY_FIELD(src, dest, pr_poolid);
2713
2714 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
2715 }
2716
2717 #undef PR_COPY_FIELD
2718 #undef PR_COPY_FIELD_ILP32
2719 #undef PR_COPY_TIMESPEC
2720 #undef PR_COPY_BUF
2721 #undef PR_IGNORE_FIELD
2722
2723 #endif /* _SYSCALL32_IMPL */
2724
2725 /*
2726 * This used to get called when microstate accounting was disabled but
2727 * microstate information was requested. Since Microstate accounting is on
2728 * regardless of the proc flags, this simply makes it appear to procfs that
2729 * microstate accounting is on. This is relatively meaningless since you
2730 * can't turn it off, but this is here for the sake of appearances.
2731 */
2732
2733 /*ARGSUSED*/
2734 void
estimate_msacct(kthread_t * t,hrtime_t curtime)2735 estimate_msacct(kthread_t *t, hrtime_t curtime)
2736 {
2737 proc_t *p;
2738
2739 if (t == NULL)
2740 return;
2741
2742 p = ttoproc(t);
2743 ASSERT(MUTEX_HELD(&p->p_lock));
2744
2745 /*
2746 * A system process (p0) could be referenced if the thread is
2747 * in the process of exiting. Don't turn on microstate accounting
2748 * in that case.
2749 */
2750 if (p->p_flag & SSYS)
2751 return;
2752
2753 /*
2754 * Loop through all the LWPs (kernel threads) in the process.
2755 */
2756 t = p->p_tlist;
2757 do {
2758 t->t_proc_flag |= TP_MSACCT;
2759 } while ((t = t->t_forw) != p->p_tlist);
2760
2761 p->p_flag |= SMSACCT; /* set process-wide MSACCT */
2762 }
2763
2764 /*
2765 * It's not really possible to disable microstate accounting anymore.
2766 * However, this routine simply turns off the ms accounting flags in a process
2767 * This way procfs can still pretend to turn microstate accounting on and
2768 * off for a process, but it actually doesn't do anything. This is
2769 * a neutered form of preemptive idiot-proofing.
2770 */
2771 void
disable_msacct(proc_t * p)2772 disable_msacct(proc_t *p)
2773 {
2774 kthread_t *t;
2775
2776 ASSERT(MUTEX_HELD(&p->p_lock));
2777
2778 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */
2779 /*
2780 * Loop through all the LWPs (kernel threads) in the process.
2781 */
2782 if ((t = p->p_tlist) != NULL) {
2783 do {
2784 /* clear per-thread flag */
2785 t->t_proc_flag &= ~TP_MSACCT;
2786 } while ((t = t->t_forw) != p->p_tlist);
2787 }
2788 }
2789
2790 /*
2791 * Return resource usage information.
2792 */
2793 void
prgetusage(kthread_t * t,prhusage_t * pup)2794 prgetusage(kthread_t *t, prhusage_t *pup)
2795 {
2796 klwp_t *lwp = ttolwp(t);
2797 hrtime_t *mstimep;
2798 struct mstate *ms = &lwp->lwp_mstate;
2799 int state;
2800 int i;
2801 hrtime_t curtime;
2802 hrtime_t waitrq;
2803 hrtime_t tmp1;
2804
2805 curtime = gethrtime_unscaled();
2806
2807 pup->pr_lwpid = t->t_tid;
2808 pup->pr_count = 1;
2809 pup->pr_create = ms->ms_start;
2810 pup->pr_term = ms->ms_term;
2811 scalehrtime(&pup->pr_create);
2812 scalehrtime(&pup->pr_term);
2813 if (ms->ms_term == 0) {
2814 pup->pr_rtime = curtime - ms->ms_start;
2815 scalehrtime(&pup->pr_rtime);
2816 } else {
2817 pup->pr_rtime = ms->ms_term - ms->ms_start;
2818 scalehrtime(&pup->pr_rtime);
2819 }
2820
2821
2822 pup->pr_utime = ms->ms_acct[LMS_USER];
2823 pup->pr_stime = ms->ms_acct[LMS_SYSTEM];
2824 pup->pr_ttime = ms->ms_acct[LMS_TRAP];
2825 pup->pr_tftime = ms->ms_acct[LMS_TFAULT];
2826 pup->pr_dftime = ms->ms_acct[LMS_DFAULT];
2827 pup->pr_kftime = ms->ms_acct[LMS_KFAULT];
2828 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2829 pup->pr_slptime = ms->ms_acct[LMS_SLEEP];
2830 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2831 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2832
2833 prscaleusage(pup);
2834
2835 /*
2836 * Adjust for time waiting in the dispatcher queue.
2837 */
2838 waitrq = t->t_waitrq; /* hopefully atomic */
2839 if (waitrq != 0) {
2840 if (waitrq > curtime) {
2841 curtime = gethrtime_unscaled();
2842 }
2843 tmp1 = curtime - waitrq;
2844 scalehrtime(&tmp1);
2845 pup->pr_wtime += tmp1;
2846 curtime = waitrq;
2847 }
2848
2849 /*
2850 * Adjust for time spent in current microstate.
2851 */
2852 if (ms->ms_state_start > curtime) {
2853 curtime = gethrtime_unscaled();
2854 }
2855
2856 i = 0;
2857 do {
2858 switch (state = t->t_mstate) {
2859 case LMS_SLEEP:
2860 /*
2861 * Update the timer for the current sleep state.
2862 */
2863 switch (state = ms->ms_prev) {
2864 case LMS_TFAULT:
2865 case LMS_DFAULT:
2866 case LMS_KFAULT:
2867 case LMS_USER_LOCK:
2868 break;
2869 default:
2870 state = LMS_SLEEP;
2871 break;
2872 }
2873 break;
2874 case LMS_TFAULT:
2875 case LMS_DFAULT:
2876 case LMS_KFAULT:
2877 case LMS_USER_LOCK:
2878 state = LMS_SYSTEM;
2879 break;
2880 }
2881 switch (state) {
2882 case LMS_USER: mstimep = &pup->pr_utime; break;
2883 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
2884 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
2885 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
2886 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
2887 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
2888 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
2889 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
2890 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
2891 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
2892 default: panic("prgetusage: unknown microstate");
2893 }
2894 tmp1 = curtime - ms->ms_state_start;
2895 if (tmp1 < 0) {
2896 curtime = gethrtime_unscaled();
2897 i++;
2898 continue;
2899 }
2900 scalehrtime(&tmp1);
2901 } while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2902
2903 *mstimep += tmp1;
2904
2905 /* update pup timestamp */
2906 pup->pr_tstamp = curtime;
2907 scalehrtime(&pup->pr_tstamp);
2908
2909 /*
2910 * Resource usage counters.
2911 */
2912 pup->pr_minf = lwp->lwp_ru.minflt;
2913 pup->pr_majf = lwp->lwp_ru.majflt;
2914 pup->pr_nswap = lwp->lwp_ru.nswap;
2915 pup->pr_inblk = lwp->lwp_ru.inblock;
2916 pup->pr_oublk = lwp->lwp_ru.oublock;
2917 pup->pr_msnd = lwp->lwp_ru.msgsnd;
2918 pup->pr_mrcv = lwp->lwp_ru.msgrcv;
2919 pup->pr_sigs = lwp->lwp_ru.nsignals;
2920 pup->pr_vctx = lwp->lwp_ru.nvcsw;
2921 pup->pr_ictx = lwp->lwp_ru.nivcsw;
2922 pup->pr_sysc = lwp->lwp_ru.sysc;
2923 pup->pr_ioch = lwp->lwp_ru.ioch;
2924 }
2925
2926 /*
2927 * Convert ms_acct stats from unscaled high-res time to nanoseconds
2928 */
2929 void
prscaleusage(prhusage_t * usg)2930 prscaleusage(prhusage_t *usg)
2931 {
2932 scalehrtime(&usg->pr_utime);
2933 scalehrtime(&usg->pr_stime);
2934 scalehrtime(&usg->pr_ttime);
2935 scalehrtime(&usg->pr_tftime);
2936 scalehrtime(&usg->pr_dftime);
2937 scalehrtime(&usg->pr_kftime);
2938 scalehrtime(&usg->pr_ltime);
2939 scalehrtime(&usg->pr_slptime);
2940 scalehrtime(&usg->pr_wtime);
2941 scalehrtime(&usg->pr_stoptime);
2942 }
2943
2944
2945 /*
2946 * Sum resource usage information.
2947 */
2948 void
praddusage(kthread_t * t,prhusage_t * pup)2949 praddusage(kthread_t *t, prhusage_t *pup)
2950 {
2951 klwp_t *lwp = ttolwp(t);
2952 hrtime_t *mstimep;
2953 struct mstate *ms = &lwp->lwp_mstate;
2954 int state;
2955 int i;
2956 hrtime_t curtime;
2957 hrtime_t waitrq;
2958 hrtime_t tmp;
2959 prhusage_t conv;
2960
2961 curtime = gethrtime_unscaled();
2962
2963 if (ms->ms_term == 0) {
2964 tmp = curtime - ms->ms_start;
2965 scalehrtime(&tmp);
2966 pup->pr_rtime += tmp;
2967 } else {
2968 tmp = ms->ms_term - ms->ms_start;
2969 scalehrtime(&tmp);
2970 pup->pr_rtime += tmp;
2971 }
2972
2973 conv.pr_utime = ms->ms_acct[LMS_USER];
2974 conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2975 conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2976 conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2977 conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2978 conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2979 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2980 conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2981 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2982 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2983
2984 prscaleusage(&conv);
2985
2986 pup->pr_utime += conv.pr_utime;
2987 pup->pr_stime += conv.pr_stime;
2988 pup->pr_ttime += conv.pr_ttime;
2989 pup->pr_tftime += conv.pr_tftime;
2990 pup->pr_dftime += conv.pr_dftime;
2991 pup->pr_kftime += conv.pr_kftime;
2992 pup->pr_ltime += conv.pr_ltime;
2993 pup->pr_slptime += conv.pr_slptime;
2994 pup->pr_wtime += conv.pr_wtime;
2995 pup->pr_stoptime += conv.pr_stoptime;
2996
2997 /*
2998 * Adjust for time waiting in the dispatcher queue.
2999 */
3000 waitrq = t->t_waitrq; /* hopefully atomic */
3001 if (waitrq != 0) {
3002 if (waitrq > curtime) {
3003 curtime = gethrtime_unscaled();
3004 }
3005 tmp = curtime - waitrq;
3006 scalehrtime(&tmp);
3007 pup->pr_wtime += tmp;
3008 curtime = waitrq;
3009 }
3010
3011 /*
3012 * Adjust for time spent in current microstate.
3013 */
3014 if (ms->ms_state_start > curtime) {
3015 curtime = gethrtime_unscaled();
3016 }
3017
3018 i = 0;
3019 do {
3020 switch (state = t->t_mstate) {
3021 case LMS_SLEEP:
3022 /*
3023 * Update the timer for the current sleep state.
3024 */
3025 switch (state = ms->ms_prev) {
3026 case LMS_TFAULT:
3027 case LMS_DFAULT:
3028 case LMS_KFAULT:
3029 case LMS_USER_LOCK:
3030 break;
3031 default:
3032 state = LMS_SLEEP;
3033 break;
3034 }
3035 break;
3036 case LMS_TFAULT:
3037 case LMS_DFAULT:
3038 case LMS_KFAULT:
3039 case LMS_USER_LOCK:
3040 state = LMS_SYSTEM;
3041 break;
3042 }
3043 switch (state) {
3044 case LMS_USER: mstimep = &pup->pr_utime; break;
3045 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3046 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3047 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3048 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3049 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3050 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3051 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3052 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3053 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3054 default: panic("praddusage: unknown microstate");
3055 }
3056 tmp = curtime - ms->ms_state_start;
3057 if (tmp < 0) {
3058 curtime = gethrtime_unscaled();
3059 i++;
3060 continue;
3061 }
3062 scalehrtime(&tmp);
3063 } while (tmp < 0 && i < MAX_ITERS_SPIN);
3064
3065 *mstimep += tmp;
3066
3067 /* update pup timestamp */
3068 pup->pr_tstamp = curtime;
3069 scalehrtime(&pup->pr_tstamp);
3070
3071 /*
3072 * Resource usage counters.
3073 */
3074 pup->pr_minf += lwp->lwp_ru.minflt;
3075 pup->pr_majf += lwp->lwp_ru.majflt;
3076 pup->pr_nswap += lwp->lwp_ru.nswap;
3077 pup->pr_inblk += lwp->lwp_ru.inblock;
3078 pup->pr_oublk += lwp->lwp_ru.oublock;
3079 pup->pr_msnd += lwp->lwp_ru.msgsnd;
3080 pup->pr_mrcv += lwp->lwp_ru.msgrcv;
3081 pup->pr_sigs += lwp->lwp_ru.nsignals;
3082 pup->pr_vctx += lwp->lwp_ru.nvcsw;
3083 pup->pr_ictx += lwp->lwp_ru.nivcsw;
3084 pup->pr_sysc += lwp->lwp_ru.sysc;
3085 pup->pr_ioch += lwp->lwp_ru.ioch;
3086 }
3087
3088 /*
3089 * Convert a prhusage_t to a prusage_t.
3090 * This means convert each hrtime_t to a timestruc_t
3091 * and copy the count fields uint64_t => ulong_t.
3092 */
3093 void
prcvtusage(prhusage_t * pup,prusage_t * upup)3094 prcvtusage(prhusage_t *pup, prusage_t *upup)
3095 {
3096 uint64_t *ullp;
3097 ulong_t *ulp;
3098 int i;
3099
3100 upup->pr_lwpid = pup->pr_lwpid;
3101 upup->pr_count = pup->pr_count;
3102
3103 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp);
3104 hrt2ts(pup->pr_create, &upup->pr_create);
3105 hrt2ts(pup->pr_term, &upup->pr_term);
3106 hrt2ts(pup->pr_rtime, &upup->pr_rtime);
3107 hrt2ts(pup->pr_utime, &upup->pr_utime);
3108 hrt2ts(pup->pr_stime, &upup->pr_stime);
3109 hrt2ts(pup->pr_ttime, &upup->pr_ttime);
3110 hrt2ts(pup->pr_tftime, &upup->pr_tftime);
3111 hrt2ts(pup->pr_dftime, &upup->pr_dftime);
3112 hrt2ts(pup->pr_kftime, &upup->pr_kftime);
3113 hrt2ts(pup->pr_ltime, &upup->pr_ltime);
3114 hrt2ts(pup->pr_slptime, &upup->pr_slptime);
3115 hrt2ts(pup->pr_wtime, &upup->pr_wtime);
3116 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3117 bzero(upup->filltime, sizeof (upup->filltime));
3118
3119 ullp = &pup->pr_minf;
3120 ulp = &upup->pr_minf;
3121 for (i = 0; i < 22; i++)
3122 *ulp++ = (ulong_t)*ullp++;
3123 }
3124
3125 #ifdef _SYSCALL32_IMPL
3126 void
prcvtusage32(prhusage_t * pup,prusage32_t * upup)3127 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3128 {
3129 uint64_t *ullp;
3130 uint32_t *ulp;
3131 int i;
3132
3133 upup->pr_lwpid = pup->pr_lwpid;
3134 upup->pr_count = pup->pr_count;
3135
3136 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp);
3137 hrt2ts32(pup->pr_create, &upup->pr_create);
3138 hrt2ts32(pup->pr_term, &upup->pr_term);
3139 hrt2ts32(pup->pr_rtime, &upup->pr_rtime);
3140 hrt2ts32(pup->pr_utime, &upup->pr_utime);
3141 hrt2ts32(pup->pr_stime, &upup->pr_stime);
3142 hrt2ts32(pup->pr_ttime, &upup->pr_ttime);
3143 hrt2ts32(pup->pr_tftime, &upup->pr_tftime);
3144 hrt2ts32(pup->pr_dftime, &upup->pr_dftime);
3145 hrt2ts32(pup->pr_kftime, &upup->pr_kftime);
3146 hrt2ts32(pup->pr_ltime, &upup->pr_ltime);
3147 hrt2ts32(pup->pr_slptime, &upup->pr_slptime);
3148 hrt2ts32(pup->pr_wtime, &upup->pr_wtime);
3149 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime);
3150 bzero(upup->filltime, sizeof (upup->filltime));
3151
3152 ullp = &pup->pr_minf;
3153 ulp = &upup->pr_minf;
3154 for (i = 0; i < 22; i++)
3155 *ulp++ = (uint32_t)*ullp++;
3156 }
3157 #endif /* _SYSCALL32_IMPL */
3158
3159 /*
3160 * Determine whether a set is empty.
3161 */
3162 int
setisempty(uint32_t * sp,uint_t n)3163 setisempty(uint32_t *sp, uint_t n)
3164 {
3165 while (n--)
3166 if (*sp++)
3167 return (0);
3168 return (1);
3169 }
3170
3171 /*
3172 * Utility routine for establishing a watched area in the process.
3173 * Keep the list of watched areas sorted by virtual address.
3174 */
3175 int
set_watched_area(proc_t * p,struct watched_area * pwa)3176 set_watched_area(proc_t *p, struct watched_area *pwa)
3177 {
3178 caddr_t vaddr = pwa->wa_vaddr;
3179 caddr_t eaddr = pwa->wa_eaddr;
3180 ulong_t flags = pwa->wa_flags;
3181 struct watched_area *target;
3182 avl_index_t where;
3183 int error = 0;
3184
3185 /* we must not be holding p->p_lock, but the process must be locked */
3186 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3187 ASSERT(p->p_proc_flag & P_PR_LOCK);
3188
3189 /*
3190 * If this is our first watchpoint, enable watchpoints for the process.
3191 */
3192 if (!pr_watch_active(p)) {
3193 kthread_t *t;
3194
3195 mutex_enter(&p->p_lock);
3196 if ((t = p->p_tlist) != NULL) {
3197 do {
3198 watch_enable(t);
3199 } while ((t = t->t_forw) != p->p_tlist);
3200 }
3201 mutex_exit(&p->p_lock);
3202 }
3203
3204 target = pr_find_watched_area(p, pwa, &where);
3205 if (target != NULL) {
3206 /*
3207 * We discovered an existing, overlapping watched area.
3208 * Allow it only if it is an exact match.
3209 */
3210 if (target->wa_vaddr != vaddr ||
3211 target->wa_eaddr != eaddr)
3212 error = EINVAL;
3213 else if (target->wa_flags != flags) {
3214 error = set_watched_page(p, vaddr, eaddr,
3215 flags, target->wa_flags);
3216 target->wa_flags = flags;
3217 }
3218 kmem_free(pwa, sizeof (struct watched_area));
3219 } else {
3220 avl_insert(&p->p_warea, pwa, where);
3221 error = set_watched_page(p, vaddr, eaddr, flags, 0);
3222 }
3223
3224 return (error);
3225 }
3226
3227 /*
3228 * Utility routine for clearing a watched area in the process.
3229 * Must be an exact match of the virtual address.
3230 * size and flags don't matter.
3231 */
3232 int
clear_watched_area(proc_t * p,struct watched_area * pwa)3233 clear_watched_area(proc_t *p, struct watched_area *pwa)
3234 {
3235 struct watched_area *found;
3236
3237 /* we must not be holding p->p_lock, but the process must be locked */
3238 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3239 ASSERT(p->p_proc_flag & P_PR_LOCK);
3240
3241
3242 if (!pr_watch_active(p)) {
3243 kmem_free(pwa, sizeof (struct watched_area));
3244 return (0);
3245 }
3246
3247 /*
3248 * Look for a matching address in the watched areas. If a match is
3249 * found, clear the old watched area and adjust the watched page(s). It
3250 * is not an error if there is no match.
3251 */
3252 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3253 found->wa_vaddr == pwa->wa_vaddr) {
3254 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3255 found->wa_flags);
3256 avl_remove(&p->p_warea, found);
3257 kmem_free(found, sizeof (struct watched_area));
3258 }
3259
3260 kmem_free(pwa, sizeof (struct watched_area));
3261
3262 /*
3263 * If we removed the last watched area from the process, disable
3264 * watchpoints.
3265 */
3266 if (!pr_watch_active(p)) {
3267 kthread_t *t;
3268
3269 mutex_enter(&p->p_lock);
3270 if ((t = p->p_tlist) != NULL) {
3271 do {
3272 watch_disable(t);
3273 } while ((t = t->t_forw) != p->p_tlist);
3274 }
3275 mutex_exit(&p->p_lock);
3276 }
3277
3278 return (0);
3279 }
3280
3281 /*
3282 * Frees all the watched_area structures
3283 */
3284 void
pr_free_watchpoints(proc_t * p)3285 pr_free_watchpoints(proc_t *p)
3286 {
3287 struct watched_area *delp;
3288 void *cookie;
3289
3290 cookie = NULL;
3291 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3292 kmem_free(delp, sizeof (struct watched_area));
3293
3294 avl_destroy(&p->p_warea);
3295 }
3296
3297 /*
3298 * This one is called by the traced process to unwatch all the
3299 * pages while deallocating the list of watched_page structs.
3300 */
3301 void
pr_free_watched_pages(proc_t * p)3302 pr_free_watched_pages(proc_t *p)
3303 {
3304 struct as *as = p->p_as;
3305 struct watched_page *pwp;
3306 uint_t prot;
3307 int retrycnt, err;
3308 void *cookie;
3309
3310 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3311 return;
3312
3313 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3314 AS_LOCK_ENTER(as, RW_WRITER);
3315
3316 pwp = avl_first(&as->a_wpage);
3317
3318 cookie = NULL;
3319 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3320 retrycnt = 0;
3321 if ((prot = pwp->wp_oprot) != 0) {
3322 caddr_t addr = pwp->wp_vaddr;
3323 struct seg *seg;
3324 retry:
3325
3326 if ((pwp->wp_prot != prot ||
3327 (pwp->wp_flags & WP_NOWATCH)) &&
3328 (seg = as_segat(as, addr)) != NULL) {
3329 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3330 if (err == IE_RETRY) {
3331 ASSERT(retrycnt == 0);
3332 retrycnt++;
3333 goto retry;
3334 }
3335 }
3336 }
3337 kmem_free(pwp, sizeof (struct watched_page));
3338 }
3339
3340 avl_destroy(&as->a_wpage);
3341 p->p_wprot = NULL;
3342
3343 AS_LOCK_EXIT(as);
3344 }
3345
3346 /*
3347 * Insert a watched area into the list of watched pages.
3348 * If oflags is zero then we are adding a new watched area.
3349 * Otherwise we are changing the flags of an existing watched area.
3350 */
3351 static int
set_watched_page(proc_t * p,caddr_t vaddr,caddr_t eaddr,ulong_t flags,ulong_t oflags)3352 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3353 ulong_t flags, ulong_t oflags)
3354 {
3355 struct as *as = p->p_as;
3356 avl_tree_t *pwp_tree;
3357 struct watched_page *pwp, *newpwp;
3358 struct watched_page tpw;
3359 avl_index_t where;
3360 struct seg *seg;
3361 uint_t prot;
3362 caddr_t addr;
3363
3364 /*
3365 * We need to pre-allocate a list of structures before we grab the
3366 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3367 * held.
3368 */
3369 newpwp = NULL;
3370 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3371 addr < eaddr; addr += PAGESIZE) {
3372 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3373 pwp->wp_list = newpwp;
3374 newpwp = pwp;
3375 }
3376
3377 AS_LOCK_ENTER(as, RW_WRITER);
3378
3379 /*
3380 * Search for an existing watched page to contain the watched area.
3381 * If none is found, grab a new one from the available list
3382 * and insert it in the active list, keeping the list sorted
3383 * by user-level virtual address.
3384 */
3385 if (p->p_flag & SVFWAIT)
3386 pwp_tree = &p->p_wpage;
3387 else
3388 pwp_tree = &as->a_wpage;
3389
3390 again:
3391 if (avl_numnodes(pwp_tree) > prnwatch) {
3392 AS_LOCK_EXIT(as);
3393 while (newpwp != NULL) {
3394 pwp = newpwp->wp_list;
3395 kmem_free(newpwp, sizeof (struct watched_page));
3396 newpwp = pwp;
3397 }
3398 return (E2BIG);
3399 }
3400
3401 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3402 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3403 pwp = newpwp;
3404 newpwp = newpwp->wp_list;
3405 pwp->wp_list = NULL;
3406 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3407 (uintptr_t)PAGEMASK);
3408 avl_insert(pwp_tree, pwp, where);
3409 }
3410
3411 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3412
3413 if (oflags & WA_READ)
3414 pwp->wp_read--;
3415 if (oflags & WA_WRITE)
3416 pwp->wp_write--;
3417 if (oflags & WA_EXEC)
3418 pwp->wp_exec--;
3419
3420 ASSERT(pwp->wp_read >= 0);
3421 ASSERT(pwp->wp_write >= 0);
3422 ASSERT(pwp->wp_exec >= 0);
3423
3424 if (flags & WA_READ)
3425 pwp->wp_read++;
3426 if (flags & WA_WRITE)
3427 pwp->wp_write++;
3428 if (flags & WA_EXEC)
3429 pwp->wp_exec++;
3430
3431 if (!(p->p_flag & SVFWAIT)) {
3432 vaddr = pwp->wp_vaddr;
3433 if (pwp->wp_oprot == 0 &&
3434 (seg = as_segat(as, vaddr)) != NULL) {
3435 SEGOP_GETPROT(seg, vaddr, 0, &prot);
3436 pwp->wp_oprot = (uchar_t)prot;
3437 pwp->wp_prot = (uchar_t)prot;
3438 }
3439 if (pwp->wp_oprot != 0) {
3440 prot = pwp->wp_oprot;
3441 if (pwp->wp_read)
3442 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3443 if (pwp->wp_write)
3444 prot &= ~PROT_WRITE;
3445 if (pwp->wp_exec)
3446 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3447 if (!(pwp->wp_flags & WP_NOWATCH) &&
3448 pwp->wp_prot != prot &&
3449 (pwp->wp_flags & WP_SETPROT) == 0) {
3450 pwp->wp_flags |= WP_SETPROT;
3451 pwp->wp_list = p->p_wprot;
3452 p->p_wprot = pwp;
3453 }
3454 pwp->wp_prot = (uchar_t)prot;
3455 }
3456 }
3457
3458 /*
3459 * If the watched area extends into the next page then do
3460 * it over again with the virtual address of the next page.
3461 */
3462 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3463 goto again;
3464
3465 AS_LOCK_EXIT(as);
3466
3467 /*
3468 * Free any pages we may have over-allocated
3469 */
3470 while (newpwp != NULL) {
3471 pwp = newpwp->wp_list;
3472 kmem_free(newpwp, sizeof (struct watched_page));
3473 newpwp = pwp;
3474 }
3475
3476 return (0);
3477 }
3478
3479 /*
3480 * Remove a watched area from the list of watched pages.
3481 * A watched area may extend over more than one page.
3482 */
3483 static void
clear_watched_page(proc_t * p,caddr_t vaddr,caddr_t eaddr,ulong_t flags)3484 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3485 {
3486 struct as *as = p->p_as;
3487 struct watched_page *pwp;
3488 struct watched_page tpw;
3489 avl_tree_t *tree;
3490 avl_index_t where;
3491
3492 AS_LOCK_ENTER(as, RW_WRITER);
3493
3494 if (p->p_flag & SVFWAIT)
3495 tree = &p->p_wpage;
3496 else
3497 tree = &as->a_wpage;
3498
3499 tpw.wp_vaddr = vaddr =
3500 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3501 pwp = avl_find(tree, &tpw, &where);
3502 if (pwp == NULL)
3503 pwp = avl_nearest(tree, where, AVL_AFTER);
3504
3505 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3506 ASSERT(vaddr <= pwp->wp_vaddr);
3507
3508 if (flags & WA_READ)
3509 pwp->wp_read--;
3510 if (flags & WA_WRITE)
3511 pwp->wp_write--;
3512 if (flags & WA_EXEC)
3513 pwp->wp_exec--;
3514
3515 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3516 /*
3517 * Reset the hat layer's protections on this page.
3518 */
3519 if (pwp->wp_oprot != 0) {
3520 uint_t prot = pwp->wp_oprot;
3521
3522 if (pwp->wp_read)
3523 prot &=
3524 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3525 if (pwp->wp_write)
3526 prot &= ~PROT_WRITE;
3527 if (pwp->wp_exec)
3528 prot &=
3529 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3530 if (!(pwp->wp_flags & WP_NOWATCH) &&
3531 pwp->wp_prot != prot &&
3532 (pwp->wp_flags & WP_SETPROT) == 0) {
3533 pwp->wp_flags |= WP_SETPROT;
3534 pwp->wp_list = p->p_wprot;
3535 p->p_wprot = pwp;
3536 }
3537 pwp->wp_prot = (uchar_t)prot;
3538 }
3539 } else {
3540 /*
3541 * No watched areas remain in this page.
3542 * Reset everything to normal.
3543 */
3544 if (pwp->wp_oprot != 0) {
3545 pwp->wp_prot = pwp->wp_oprot;
3546 if ((pwp->wp_flags & WP_SETPROT) == 0) {
3547 pwp->wp_flags |= WP_SETPROT;
3548 pwp->wp_list = p->p_wprot;
3549 p->p_wprot = pwp;
3550 }
3551 }
3552 }
3553
3554 pwp = AVL_NEXT(tree, pwp);
3555 }
3556
3557 AS_LOCK_EXIT(as);
3558 }
3559
3560 /*
3561 * Return the original protections for the specified page.
3562 */
3563 static void
getwatchprot(struct as * as,caddr_t addr,uint_t * prot)3564 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3565 {
3566 struct watched_page *pwp;
3567 struct watched_page tpw;
3568
3569 ASSERT(AS_LOCK_HELD(as));
3570
3571 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3572 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3573 *prot = pwp->wp_oprot;
3574 }
3575
3576 static prpagev_t *
pr_pagev_create(struct seg * seg,int check_noreserve)3577 pr_pagev_create(struct seg *seg, int check_noreserve)
3578 {
3579 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3580 size_t total_pages = seg_pages(seg);
3581
3582 /*
3583 * Limit the size of our vectors to pagev_lim pages at a time. We need
3584 * 4 or 5 bytes of storage per page, so this means we limit ourself
3585 * to about a megabyte of kernel heap by default.
3586 */
3587 pagev->pg_npages = MIN(total_pages, pagev_lim);
3588 pagev->pg_pnbase = 0;
3589
3590 pagev->pg_protv =
3591 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3592
3593 if (check_noreserve)
3594 pagev->pg_incore =
3595 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3596 else
3597 pagev->pg_incore = NULL;
3598
3599 return (pagev);
3600 }
3601
3602 static void
pr_pagev_destroy(prpagev_t * pagev)3603 pr_pagev_destroy(prpagev_t *pagev)
3604 {
3605 if (pagev->pg_incore != NULL)
3606 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3607
3608 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3609 kmem_free(pagev, sizeof (prpagev_t));
3610 }
3611
3612 static caddr_t
pr_pagev_fill(prpagev_t * pagev,struct seg * seg,caddr_t addr,caddr_t eaddr)3613 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3614 {
3615 ulong_t lastpg = seg_page(seg, eaddr - 1);
3616 ulong_t pn, pnlim;
3617 caddr_t saddr;
3618 size_t len;
3619
3620 ASSERT(addr >= seg->s_base && addr <= eaddr);
3621
3622 if (addr == eaddr)
3623 return (eaddr);
3624
3625 refill:
3626 ASSERT(addr < eaddr);
3627 pagev->pg_pnbase = seg_page(seg, addr);
3628 pnlim = pagev->pg_pnbase + pagev->pg_npages;
3629 saddr = addr;
3630
3631 if (lastpg < pnlim)
3632 len = (size_t)(eaddr - addr);
3633 else
3634 len = pagev->pg_npages * PAGESIZE;
3635
3636 if (pagev->pg_incore != NULL) {
3637 /*
3638 * INCORE cleverly has different semantics than GETPROT:
3639 * it returns info on pages up to but NOT including addr + len.
3640 */
3641 SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3642 pn = pagev->pg_pnbase;
3643
3644 do {
3645 /*
3646 * Guilty knowledge here: We know that segvn_incore
3647 * returns more than just the low-order bit that
3648 * indicates the page is actually in memory. If any
3649 * bits are set, then the page has backing store.
3650 */
3651 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3652 goto out;
3653
3654 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3655
3656 /*
3657 * If we examined all the pages in the vector but we're not
3658 * at the end of the segment, take another lap.
3659 */
3660 if (addr < eaddr)
3661 goto refill;
3662 }
3663
3664 /*
3665 * Need to take len - 1 because addr + len is the address of the
3666 * first byte of the page just past the end of what we want.
3667 */
3668 out:
3669 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3670 return (addr);
3671 }
3672
3673 static caddr_t
pr_pagev_nextprot(prpagev_t * pagev,struct seg * seg,caddr_t * saddrp,caddr_t eaddr,uint_t * protp)3674 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3675 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3676 {
3677 /*
3678 * Our starting address is either the specified address, or the base
3679 * address from the start of the pagev. If the latter is greater,
3680 * this means a previous call to pr_pagev_fill has already scanned
3681 * further than the end of the previous mapping.
3682 */
3683 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3684 caddr_t addr = MAX(*saddrp, base);
3685 ulong_t pn = seg_page(seg, addr);
3686 uint_t prot, nprot;
3687
3688 /*
3689 * If we're dealing with noreserve pages, then advance addr to
3690 * the address of the next page which has backing store.
3691 */
3692 if (pagev->pg_incore != NULL) {
3693 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3694 if ((addr += PAGESIZE) == eaddr) {
3695 *saddrp = addr;
3696 prot = 0;
3697 goto out;
3698 }
3699 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3700 addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3701 if (addr == eaddr) {
3702 *saddrp = addr;
3703 prot = 0;
3704 goto out;
3705 }
3706 pn = seg_page(seg, addr);
3707 }
3708 }
3709 }
3710
3711 /*
3712 * Get the protections on the page corresponding to addr.
3713 */
3714 pn = seg_page(seg, addr);
3715 ASSERT(pn >= pagev->pg_pnbase);
3716 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3717
3718 prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3719 getwatchprot(seg->s_as, addr, &prot);
3720 *saddrp = addr;
3721
3722 /*
3723 * Now loop until we find a backed page with different protections
3724 * or we reach the end of this segment.
3725 */
3726 while ((addr += PAGESIZE) < eaddr) {
3727 /*
3728 * If pn has advanced to the page number following what we
3729 * have information on, refill the page vector and reset
3730 * addr and pn. If pr_pagev_fill does not return the
3731 * address of the next page, we have a discontiguity and
3732 * thus have reached the end of the current mapping.
3733 */
3734 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3735 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3736 if (naddr != addr)
3737 goto out;
3738 pn = seg_page(seg, addr);
3739 }
3740
3741 /*
3742 * The previous page's protections are in prot, and it has
3743 * backing. If this page is MAP_NORESERVE and has no backing,
3744 * then end this mapping and return the previous protections.
3745 */
3746 if (pagev->pg_incore != NULL &&
3747 pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3748 break;
3749
3750 /*
3751 * Otherwise end the mapping if this page's protections (nprot)
3752 * are different than those in the previous page (prot).
3753 */
3754 nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3755 getwatchprot(seg->s_as, addr, &nprot);
3756
3757 if (nprot != prot)
3758 break;
3759 }
3760
3761 out:
3762 *protp = prot;
3763 return (addr);
3764 }
3765
3766 size_t
pr_getsegsize(struct seg * seg,int reserved)3767 pr_getsegsize(struct seg *seg, int reserved)
3768 {
3769 size_t size = seg->s_size;
3770
3771 /*
3772 * If we're interested in the reserved space, return the size of the
3773 * segment itself. Everything else in this function is a special case
3774 * to determine the actual underlying size of various segment types.
3775 */
3776 if (reserved)
3777 return (size);
3778
3779 /*
3780 * If this is a segvn mapping of a regular file, return the smaller
3781 * of the segment size and the remaining size of the file beyond
3782 * the file offset corresponding to seg->s_base.
3783 */
3784 if (seg->s_ops == &segvn_ops) {
3785 vattr_t vattr;
3786 vnode_t *vp;
3787
3788 vattr.va_mask = AT_SIZE;
3789
3790 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3791 vp != NULL && vp->v_type == VREG &&
3792 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3793
3794 u_offset_t fsize = vattr.va_size;
3795 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3796
3797 if (fsize < offset)
3798 fsize = 0;
3799 else
3800 fsize -= offset;
3801
3802 fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3803
3804 if (fsize < (u_offset_t)size)
3805 size = (size_t)fsize;
3806 }
3807
3808 return (size);
3809 }
3810
3811 /*
3812 * If this is an ISM shared segment, don't include pages that are
3813 * beyond the real size of the spt segment that backs it.
3814 */
3815 if (seg->s_ops == &segspt_shmops)
3816 return (MIN(spt_realsize(seg), size));
3817
3818 /*
3819 * If this is segment is a mapping from /dev/null, then this is a
3820 * reservation of virtual address space and has no actual size.
3821 * Such segments are backed by segdev and have type set to neither
3822 * MAP_SHARED nor MAP_PRIVATE.
3823 */
3824 if (seg->s_ops == &segdev_ops &&
3825 ((SEGOP_GETTYPE(seg, seg->s_base) &
3826 (MAP_SHARED | MAP_PRIVATE)) == 0))
3827 return (0);
3828
3829 /*
3830 * If this segment doesn't match one of the special types we handle,
3831 * just return the size of the segment itself.
3832 */
3833 return (size);
3834 }
3835
3836 uint_t
pr_getprot(struct seg * seg,int reserved,void ** tmp,caddr_t * saddrp,caddr_t * naddrp,caddr_t eaddr)3837 pr_getprot(struct seg *seg, int reserved, void **tmp,
3838 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3839 {
3840 struct as *as = seg->s_as;
3841
3842 caddr_t saddr = *saddrp;
3843 caddr_t naddr;
3844
3845 int check_noreserve;
3846 uint_t prot;
3847
3848 union {
3849 struct segvn_data *svd;
3850 struct segdev_data *sdp;
3851 void *data;
3852 } s;
3853
3854 s.data = seg->s_data;
3855
3856 ASSERT(AS_WRITE_HELD(as));
3857 ASSERT(saddr >= seg->s_base && saddr < eaddr);
3858 ASSERT(eaddr <= seg->s_base + seg->s_size);
3859
3860 /*
3861 * Don't include MAP_NORESERVE pages in the address range
3862 * unless their mappings have actually materialized.
3863 * We cheat by knowing that segvn is the only segment
3864 * driver that supports MAP_NORESERVE.
3865 */
3866 check_noreserve =
3867 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3868 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3869 (s.svd->flags & MAP_NORESERVE));
3870
3871 /*
3872 * Examine every page only as a last resort. We use guilty knowledge
3873 * of segvn and segdev to avoid this: if there are no per-page
3874 * protections present in the segment and we don't care about
3875 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3876 */
3877 if (!check_noreserve && saddr == seg->s_base &&
3878 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3879 prot = s.svd->prot;
3880 getwatchprot(as, saddr, &prot);
3881 naddr = eaddr;
3882
3883 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3884 s.sdp != NULL && s.sdp->pageprot == 0) {
3885 prot = s.sdp->prot;
3886 getwatchprot(as, saddr, &prot);
3887 naddr = eaddr;
3888
3889 } else {
3890 prpagev_t *pagev;
3891
3892 /*
3893 * If addr is sitting at the start of the segment, then
3894 * create a page vector to store protection and incore
3895 * information for pages in the segment, and fill it.
3896 * Otherwise, we expect *tmp to address the prpagev_t
3897 * allocated by a previous call to this function.
3898 */
3899 if (saddr == seg->s_base) {
3900 pagev = pr_pagev_create(seg, check_noreserve);
3901 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3902
3903 ASSERT(*tmp == NULL);
3904 *tmp = pagev;
3905
3906 ASSERT(saddr <= eaddr);
3907 *saddrp = saddr;
3908
3909 if (saddr == eaddr) {
3910 naddr = saddr;
3911 prot = 0;
3912 goto out;
3913 }
3914
3915 } else {
3916 ASSERT(*tmp != NULL);
3917 pagev = (prpagev_t *)*tmp;
3918 }
3919
3920 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3921 ASSERT(naddr <= eaddr);
3922 }
3923
3924 out:
3925 if (naddr == eaddr)
3926 pr_getprot_done(tmp);
3927 *naddrp = naddr;
3928 return (prot);
3929 }
3930
3931 void
pr_getprot_done(void ** tmp)3932 pr_getprot_done(void **tmp)
3933 {
3934 if (*tmp != NULL) {
3935 pr_pagev_destroy((prpagev_t *)*tmp);
3936 *tmp = NULL;
3937 }
3938 }
3939
3940 /*
3941 * Return true iff the vnode is a /proc file from the object directory.
3942 */
3943 int
pr_isobject(vnode_t * vp)3944 pr_isobject(vnode_t *vp)
3945 {
3946 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3947 }
3948
3949 /*
3950 * Return true iff the vnode is a /proc file opened by the process itself.
3951 */
3952 int
pr_isself(vnode_t * vp)3953 pr_isself(vnode_t *vp)
3954 {
3955 /*
3956 * XXX: To retain binary compatibility with the old
3957 * ioctl()-based version of /proc, we exempt self-opens
3958 * of /proc/<pid> from being marked close-on-exec.
3959 */
3960 return (vn_matchops(vp, prvnodeops) &&
3961 (VTOP(vp)->pr_flags & PR_ISSELF) &&
3962 VTOP(vp)->pr_type != PR_PIDDIR);
3963 }
3964
3965 static ssize_t
pr_getpagesize(struct seg * seg,caddr_t saddr,caddr_t * naddrp,caddr_t eaddr)3966 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3967 {
3968 ssize_t pagesize, hatsize;
3969
3970 ASSERT(AS_WRITE_HELD(seg->s_as));
3971 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3972 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3973 ASSERT(saddr < eaddr);
3974
3975 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3976 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3977 ASSERT(pagesize != 0);
3978
3979 if (pagesize == -1)
3980 pagesize = PAGESIZE;
3981
3982 saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3983
3984 while (saddr < eaddr) {
3985 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3986 break;
3987 ASSERT(IS_P2ALIGNED(saddr, pagesize));
3988 saddr += pagesize;
3989 }
3990
3991 *naddrp = ((saddr < eaddr) ? saddr : eaddr);
3992 return (hatsize);
3993 }
3994
3995 /*
3996 * Return an array of structures with extended memory map information.
3997 * We allocate here; the caller must deallocate.
3998 */
3999 int
prgetxmap(proc_t * p,list_t * iolhead)4000 prgetxmap(proc_t *p, list_t *iolhead)
4001 {
4002 struct as *as = p->p_as;
4003 prxmap_t *mp;
4004 struct seg *seg;
4005 struct seg *brkseg, *stkseg;
4006 struct vnode *vp;
4007 struct vattr vattr;
4008 uint_t prot;
4009
4010 ASSERT(as != &kas && AS_WRITE_HELD(as));
4011
4012 /*
4013 * Request an initial buffer size that doesn't waste memory
4014 * if the address space has only a small number of segments.
4015 */
4016 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4017
4018 if ((seg = AS_SEGFIRST(as)) == NULL)
4019 return (0);
4020
4021 brkseg = break_seg(p);
4022 stkseg = as_segat(as, prgetstackbase(p));
4023
4024 do {
4025 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4026 caddr_t saddr, naddr, baddr;
4027 void *tmp = NULL;
4028 ssize_t psz;
4029 char *parr;
4030 uint64_t npages;
4031 uint64_t pagenum;
4032
4033 /*
4034 * Segment loop part one: iterate from the base of the segment
4035 * to its end, pausing at each address boundary (baddr) between
4036 * ranges that have different virtual memory protections.
4037 */
4038 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4039 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4040 ASSERT(baddr >= saddr && baddr <= eaddr);
4041
4042 /*
4043 * Segment loop part two: iterate from the current
4044 * position to the end of the protection boundary,
4045 * pausing at each address boundary (naddr) between
4046 * ranges that have different underlying page sizes.
4047 */
4048 for (; saddr < baddr; saddr = naddr) {
4049 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4050 ASSERT(naddr >= saddr && naddr <= baddr);
4051
4052 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4053
4054 mp->pr_vaddr = (uintptr_t)saddr;
4055 mp->pr_size = naddr - saddr;
4056 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4057 mp->pr_mflags = 0;
4058 if (prot & PROT_READ)
4059 mp->pr_mflags |= MA_READ;
4060 if (prot & PROT_WRITE)
4061 mp->pr_mflags |= MA_WRITE;
4062 if (prot & PROT_EXEC)
4063 mp->pr_mflags |= MA_EXEC;
4064 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4065 mp->pr_mflags |= MA_SHARED;
4066 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4067 mp->pr_mflags |= MA_NORESERVE;
4068 if (seg->s_ops == &segspt_shmops ||
4069 (seg->s_ops == &segvn_ops &&
4070 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4071 vp == NULL)))
4072 mp->pr_mflags |= MA_ANON;
4073 if (seg == brkseg)
4074 mp->pr_mflags |= MA_BREAK;
4075 else if (seg == stkseg)
4076 mp->pr_mflags |= MA_STACK;
4077 if (seg->s_ops == &segspt_shmops)
4078 mp->pr_mflags |= MA_ISM | MA_SHM;
4079
4080 mp->pr_pagesize = PAGESIZE;
4081 if (psz == -1) {
4082 mp->pr_hatpagesize = 0;
4083 } else {
4084 mp->pr_hatpagesize = psz;
4085 }
4086
4087 /*
4088 * Manufacture a filename for the "object" dir.
4089 */
4090 mp->pr_dev = PRNODEV;
4091 vattr.va_mask = AT_FSID|AT_NODEID;
4092 if (seg->s_ops == &segvn_ops &&
4093 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4094 vp != NULL && vp->v_type == VREG &&
4095 VOP_GETATTR(vp, &vattr, 0, CRED(),
4096 NULL) == 0) {
4097 mp->pr_dev = vattr.va_fsid;
4098 mp->pr_ino = vattr.va_nodeid;
4099 if (vp == p->p_exec)
4100 (void) strcpy(mp->pr_mapname,
4101 "a.out");
4102 else
4103 pr_object_name(mp->pr_mapname,
4104 vp, &vattr);
4105 }
4106
4107 /*
4108 * Get the SysV shared memory id, if any.
4109 */
4110 if ((mp->pr_mflags & MA_SHARED) &&
4111 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4112 seg->s_base)) != SHMID_NONE) {
4113 if (mp->pr_shmid == SHMID_FREE)
4114 mp->pr_shmid = -1;
4115
4116 mp->pr_mflags |= MA_SHM;
4117 } else {
4118 mp->pr_shmid = -1;
4119 }
4120
4121 npages = ((uintptr_t)(naddr - saddr)) >>
4122 PAGESHIFT;
4123 parr = kmem_zalloc(npages, KM_SLEEP);
4124
4125 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4126
4127 for (pagenum = 0; pagenum < npages; pagenum++) {
4128 if (parr[pagenum] & SEG_PAGE_INCORE)
4129 mp->pr_rss++;
4130 if (parr[pagenum] & SEG_PAGE_ANON)
4131 mp->pr_anon++;
4132 if (parr[pagenum] & SEG_PAGE_LOCKED)
4133 mp->pr_locked++;
4134 }
4135 kmem_free(parr, npages);
4136 }
4137 }
4138 ASSERT(tmp == NULL);
4139 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4140
4141 return (0);
4142 }
4143
4144 /*
4145 * Return the process's credentials. We don't need a 32-bit equivalent of
4146 * this function because prcred_t and prcred32_t are actually the same.
4147 */
4148 void
prgetcred(proc_t * p,prcred_t * pcrp)4149 prgetcred(proc_t *p, prcred_t *pcrp)
4150 {
4151 mutex_enter(&p->p_crlock);
4152 cred2prcred(p->p_cred, pcrp);
4153 mutex_exit(&p->p_crlock);
4154 }
4155
4156 /*
4157 * Compute actual size of the prpriv_t structure.
4158 */
4159
4160 size_t
prgetprivsize(void)4161 prgetprivsize(void)
4162 {
4163 return (priv_prgetprivsize(NULL));
4164 }
4165
4166 /*
4167 * Return the process's privileges. We don't need a 32-bit equivalent of
4168 * this function because prpriv_t and prpriv32_t are actually the same.
4169 */
4170 void
prgetpriv(proc_t * p,prpriv_t * pprp)4171 prgetpriv(proc_t *p, prpriv_t *pprp)
4172 {
4173 mutex_enter(&p->p_crlock);
4174 cred2prpriv(p->p_cred, pprp);
4175 mutex_exit(&p->p_crlock);
4176 }
4177
4178 #ifdef _SYSCALL32_IMPL
4179 /*
4180 * Return an array of structures with HAT memory map information.
4181 * We allocate here; the caller must deallocate.
4182 */
4183 int
prgetxmap32(proc_t * p,list_t * iolhead)4184 prgetxmap32(proc_t *p, list_t *iolhead)
4185 {
4186 struct as *as = p->p_as;
4187 prxmap32_t *mp;
4188 struct seg *seg;
4189 struct seg *brkseg, *stkseg;
4190 struct vnode *vp;
4191 struct vattr vattr;
4192 uint_t prot;
4193
4194 ASSERT(as != &kas && AS_WRITE_HELD(as));
4195
4196 /*
4197 * Request an initial buffer size that doesn't waste memory
4198 * if the address space has only a small number of segments.
4199 */
4200 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4201
4202 if ((seg = AS_SEGFIRST(as)) == NULL)
4203 return (0);
4204
4205 brkseg = break_seg(p);
4206 stkseg = as_segat(as, prgetstackbase(p));
4207
4208 do {
4209 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4210 caddr_t saddr, naddr, baddr;
4211 void *tmp = NULL;
4212 ssize_t psz;
4213 char *parr;
4214 uint64_t npages;
4215 uint64_t pagenum;
4216
4217 /*
4218 * Segment loop part one: iterate from the base of the segment
4219 * to its end, pausing at each address boundary (baddr) between
4220 * ranges that have different virtual memory protections.
4221 */
4222 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4223 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4224 ASSERT(baddr >= saddr && baddr <= eaddr);
4225
4226 /*
4227 * Segment loop part two: iterate from the current
4228 * position to the end of the protection boundary,
4229 * pausing at each address boundary (naddr) between
4230 * ranges that have different underlying page sizes.
4231 */
4232 for (; saddr < baddr; saddr = naddr) {
4233 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4234 ASSERT(naddr >= saddr && naddr <= baddr);
4235
4236 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4237
4238 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4239 mp->pr_size = (size32_t)(naddr - saddr);
4240 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4241 mp->pr_mflags = 0;
4242 if (prot & PROT_READ)
4243 mp->pr_mflags |= MA_READ;
4244 if (prot & PROT_WRITE)
4245 mp->pr_mflags |= MA_WRITE;
4246 if (prot & PROT_EXEC)
4247 mp->pr_mflags |= MA_EXEC;
4248 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4249 mp->pr_mflags |= MA_SHARED;
4250 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4251 mp->pr_mflags |= MA_NORESERVE;
4252 if (seg->s_ops == &segspt_shmops ||
4253 (seg->s_ops == &segvn_ops &&
4254 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4255 vp == NULL)))
4256 mp->pr_mflags |= MA_ANON;
4257 if (seg == brkseg)
4258 mp->pr_mflags |= MA_BREAK;
4259 else if (seg == stkseg)
4260 mp->pr_mflags |= MA_STACK;
4261 if (seg->s_ops == &segspt_shmops)
4262 mp->pr_mflags |= MA_ISM | MA_SHM;
4263
4264 mp->pr_pagesize = PAGESIZE;
4265 if (psz == -1) {
4266 mp->pr_hatpagesize = 0;
4267 } else {
4268 mp->pr_hatpagesize = psz;
4269 }
4270
4271 /*
4272 * Manufacture a filename for the "object" dir.
4273 */
4274 mp->pr_dev = PRNODEV32;
4275 vattr.va_mask = AT_FSID|AT_NODEID;
4276 if (seg->s_ops == &segvn_ops &&
4277 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4278 vp != NULL && vp->v_type == VREG &&
4279 VOP_GETATTR(vp, &vattr, 0, CRED(),
4280 NULL) == 0) {
4281 (void) cmpldev(&mp->pr_dev,
4282 vattr.va_fsid);
4283 mp->pr_ino = vattr.va_nodeid;
4284 if (vp == p->p_exec)
4285 (void) strcpy(mp->pr_mapname,
4286 "a.out");
4287 else
4288 pr_object_name(mp->pr_mapname,
4289 vp, &vattr);
4290 }
4291
4292 /*
4293 * Get the SysV shared memory id, if any.
4294 */
4295 if ((mp->pr_mflags & MA_SHARED) &&
4296 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4297 seg->s_base)) != SHMID_NONE) {
4298 if (mp->pr_shmid == SHMID_FREE)
4299 mp->pr_shmid = -1;
4300
4301 mp->pr_mflags |= MA_SHM;
4302 } else {
4303 mp->pr_shmid = -1;
4304 }
4305
4306 npages = ((uintptr_t)(naddr - saddr)) >>
4307 PAGESHIFT;
4308 parr = kmem_zalloc(npages, KM_SLEEP);
4309
4310 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4311
4312 for (pagenum = 0; pagenum < npages; pagenum++) {
4313 if (parr[pagenum] & SEG_PAGE_INCORE)
4314 mp->pr_rss++;
4315 if (parr[pagenum] & SEG_PAGE_ANON)
4316 mp->pr_anon++;
4317 if (parr[pagenum] & SEG_PAGE_LOCKED)
4318 mp->pr_locked++;
4319 }
4320 kmem_free(parr, npages);
4321 }
4322 }
4323 ASSERT(tmp == NULL);
4324 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4325
4326 return (0);
4327 }
4328 #endif /* _SYSCALL32_IMPL */
4329