1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2019 Joyent, Inc.
25 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26 * Copyright 2022 MNX Cloud, Inc.
27 * Copyright 2022 Oxide Computer Company
28 */
29
30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32
33 #include <sys/types.h>
34 #include <sys/t_lock.h>
35 #include <sys/param.h>
36 #include <sys/cmn_err.h>
37 #include <sys/cred.h>
38 #include <sys/priv.h>
39 #include <sys/debug.h>
40 #include <sys/errno.h>
41 #include <sys/inline.h>
42 #include <sys/kmem.h>
43 #include <sys/mman.h>
44 #include <sys/proc.h>
45 #include <sys/brand.h>
46 #include <sys/sobject.h>
47 #include <sys/sysmacros.h>
48 #include <sys/systm.h>
49 #include <sys/uio.h>
50 #include <sys/var.h>
51 #include <sys/vfs.h>
52 #include <sys/vnode.h>
53 #include <sys/session.h>
54 #include <sys/pcb.h>
55 #include <sys/signal.h>
56 #include <sys/user.h>
57 #include <sys/disp.h>
58 #include <sys/class.h>
59 #include <sys/ts.h>
60 #include <sys/bitmap.h>
61 #include <sys/poll.h>
62 #include <sys/shm_impl.h>
63 #include <sys/fault.h>
64 #include <sys/syscall.h>
65 #include <sys/procfs.h>
66 #include <sys/processor.h>
67 #include <sys/cpuvar.h>
68 #include <sys/copyops.h>
69 #include <sys/time.h>
70 #include <sys/msacct.h>
71 #include <sys/flock_impl.h>
72 #include <sys/stropts.h>
73 #include <sys/strsubr.h>
74 #include <sys/pathname.h>
75 #include <sys/mode.h>
76 #include <sys/socketvar.h>
77 #include <sys/autoconf.h>
78 #include <sys/dtrace.h>
79 #include <sys/timod.h>
80 #include <sys/fs/namenode.h>
81 #include <netinet/udp.h>
82 #include <netinet/tcp.h>
83 #include <inet/cc.h>
84 #include <vm/as.h>
85 #include <vm/rm.h>
86 #include <vm/seg.h>
87 #include <vm/seg_vn.h>
88 #include <vm/seg_dev.h>
89 #include <vm/seg_spt.h>
90 #include <vm/page.h>
91 #include <sys/vmparam.h>
92 #include <sys/swap.h>
93 #include <fs/proc/prdata.h>
94 #include <sys/task.h>
95 #include <sys/project.h>
96 #include <sys/contract_impl.h>
97 #include <sys/contract/process.h>
98 #include <sys/contract/process_impl.h>
99 #include <sys/schedctl.h>
100 #include <sys/pool.h>
101 #include <sys/zone.h>
102 #include <sys/atomic.h>
103 #include <sys/sdt.h>
104
105 #define MAX_ITERS_SPIN 5
106
107 typedef struct prpagev {
108 uint_t *pg_protv; /* vector of page permissions */
109 char *pg_incore; /* vector of incore flags */
110 size_t pg_npages; /* number of pages in protv and incore */
111 ulong_t pg_pnbase; /* pn within segment of first protv element */
112 } prpagev_t;
113
114 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
115
116 extern struct seg_ops segdev_ops; /* needs a header file */
117 extern struct seg_ops segspt_shmops; /* needs a header file */
118
119 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
120 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
121
122 /*
123 * Choose an lwp from the complete set of lwps for the process.
124 * This is called for any operation applied to the process
125 * file descriptor that requires an lwp to operate upon.
126 *
127 * Returns a pointer to the thread for the selected LWP,
128 * and with the dispatcher lock held for the thread.
129 *
130 * The algorithm for choosing an lwp is critical for /proc semantics;
131 * don't touch this code unless you know all of the implications.
132 */
133 kthread_t *
prchoose(proc_t * p)134 prchoose(proc_t *p)
135 {
136 kthread_t *t;
137 kthread_t *t_onproc = NULL; /* running on processor */
138 kthread_t *t_run = NULL; /* runnable, on disp queue */
139 kthread_t *t_sleep = NULL; /* sleeping */
140 kthread_t *t_hold = NULL; /* sleeping, performing hold */
141 kthread_t *t_susp = NULL; /* suspended stop */
142 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */
143 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */
144 kthread_t *t_req = NULL; /* requested stop */
145 kthread_t *t_istop = NULL; /* event-of-interest stop */
146 kthread_t *t_dtrace = NULL; /* DTrace stop */
147
148 ASSERT(MUTEX_HELD(&p->p_lock));
149
150 /*
151 * If the agent lwp exists, it takes precedence over all others.
152 */
153 if ((t = p->p_agenttp) != NULL) {
154 thread_lock(t);
155 return (t);
156 }
157
158 if ((t = p->p_tlist) == NULL) /* start at the head of the list */
159 return (t);
160 do { /* for eacn lwp in the process */
161 if (VSTOPPED(t)) { /* virtually stopped */
162 if (t_req == NULL)
163 t_req = t;
164 continue;
165 }
166
167 /* If this is a process kernel thread, ignore it. */
168 if ((t->t_proc_flag & TP_KTHREAD) != 0) {
169 continue;
170 }
171
172 thread_lock(t); /* make sure thread is in good state */
173 switch (t->t_state) {
174 default:
175 panic("prchoose: bad thread state %d, thread 0x%p",
176 t->t_state, (void *)t);
177 /*NOTREACHED*/
178 case TS_SLEEP:
179 /* this is filthy */
180 if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
181 t->t_wchan0 == NULL) {
182 if (t_hold == NULL)
183 t_hold = t;
184 } else {
185 if (t_sleep == NULL)
186 t_sleep = t;
187 }
188 break;
189 case TS_RUN:
190 case TS_WAIT:
191 if (t_run == NULL)
192 t_run = t;
193 break;
194 case TS_ONPROC:
195 if (t_onproc == NULL)
196 t_onproc = t;
197 break;
198 case TS_ZOMB: /* last possible choice */
199 break;
200 case TS_STOPPED:
201 switch (t->t_whystop) {
202 case PR_SUSPENDED:
203 if (t_susp == NULL)
204 t_susp = t;
205 break;
206 case PR_JOBCONTROL:
207 if (t->t_proc_flag & TP_PRSTOP) {
208 if (t_jdstop == NULL)
209 t_jdstop = t;
210 } else {
211 if (t_jstop == NULL)
212 t_jstop = t;
213 }
214 break;
215 case PR_REQUESTED:
216 if (t->t_dtrace_stop && t_dtrace == NULL)
217 t_dtrace = t;
218 else if (t_req == NULL)
219 t_req = t;
220 break;
221 case PR_SYSENTRY:
222 case PR_SYSEXIT:
223 case PR_SIGNALLED:
224 case PR_FAULTED:
225 /*
226 * Make an lwp calling exit() be the
227 * last lwp seen in the process.
228 */
229 if (t_istop == NULL ||
230 (t_istop->t_whystop == PR_SYSENTRY &&
231 t_istop->t_whatstop == SYS_exit))
232 t_istop = t;
233 break;
234 case PR_CHECKPOINT: /* can't happen? */
235 break;
236 default:
237 panic("prchoose: bad t_whystop %d, thread 0x%p",
238 t->t_whystop, (void *)t);
239 /*NOTREACHED*/
240 }
241 break;
242 }
243 thread_unlock(t);
244 } while ((t = t->t_forw) != p->p_tlist);
245
246 if (t_onproc)
247 t = t_onproc;
248 else if (t_run)
249 t = t_run;
250 else if (t_sleep)
251 t = t_sleep;
252 else if (t_jstop)
253 t = t_jstop;
254 else if (t_jdstop)
255 t = t_jdstop;
256 else if (t_istop)
257 t = t_istop;
258 else if (t_dtrace)
259 t = t_dtrace;
260 else if (t_req)
261 t = t_req;
262 else if (t_hold)
263 t = t_hold;
264 else if (t_susp)
265 t = t_susp;
266 else /* TS_ZOMB */
267 t = p->p_tlist;
268
269 if (t != NULL)
270 thread_lock(t);
271 return (t);
272 }
273
274 /*
275 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
276 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
277 * on the /proc file descriptor. Called from stop() when a traced
278 * process stops on an event of interest. Also called from exit()
279 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
280 */
281 void
prnotify(struct vnode * vp)282 prnotify(struct vnode *vp)
283 {
284 prcommon_t *pcp = VTOP(vp)->pr_common;
285
286 mutex_enter(&pcp->prc_mutex);
287 cv_broadcast(&pcp->prc_wait);
288 mutex_exit(&pcp->prc_mutex);
289 if (pcp->prc_flags & PRC_POLL) {
290 /*
291 * We call pollwakeup() with POLLHUP to ensure that
292 * the pollers are awakened even if they are polling
293 * for nothing (i.e., waiting for the process to exit).
294 * This enables the use of the PRC_POLL flag for optimization
295 * (we can turn off PRC_POLL only if we know no pollers remain).
296 */
297 pcp->prc_flags &= ~PRC_POLL;
298 pollwakeup(&pcp->prc_pollhead, POLLHUP);
299 }
300 }
301
302 /* called immediately below, in prfree() */
303 static void
prfreenotify(vnode_t * vp)304 prfreenotify(vnode_t *vp)
305 {
306 prnode_t *pnp;
307 prcommon_t *pcp;
308
309 while (vp != NULL) {
310 pnp = VTOP(vp);
311 pcp = pnp->pr_common;
312 ASSERT(pcp->prc_thread == NULL);
313 pcp->prc_proc = NULL;
314 /*
315 * We can't call prnotify() here because we are holding
316 * pidlock. We assert that there is no need to.
317 */
318 mutex_enter(&pcp->prc_mutex);
319 cv_broadcast(&pcp->prc_wait);
320 mutex_exit(&pcp->prc_mutex);
321 ASSERT(!(pcp->prc_flags & PRC_POLL));
322
323 vp = pnp->pr_next;
324 pnp->pr_next = NULL;
325 }
326 }
327
328 /*
329 * Called from a hook in freeproc() when a traced process is removed
330 * from the process table. The proc-table pointers of all associated
331 * /proc vnodes are cleared to indicate that the process has gone away.
332 */
333 void
prfree(proc_t * p)334 prfree(proc_t *p)
335 {
336 uint_t slot = p->p_slot;
337
338 ASSERT(MUTEX_HELD(&pidlock));
339
340 /*
341 * Block the process against /proc so it can be freed.
342 * It cannot be freed while locked by some controlling process.
343 * Lock ordering:
344 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
345 */
346 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */
347 mutex_enter(&p->p_lock);
348 while (p->p_proc_flag & P_PR_LOCK) {
349 mutex_exit(&pr_pidlock);
350 cv_wait(&pr_pid_cv[slot], &p->p_lock);
351 mutex_exit(&p->p_lock);
352 mutex_enter(&pr_pidlock);
353 mutex_enter(&p->p_lock);
354 }
355
356 ASSERT(p->p_tlist == NULL);
357
358 prfreenotify(p->p_plist);
359 p->p_plist = NULL;
360
361 prfreenotify(p->p_trace);
362 p->p_trace = NULL;
363
364 /*
365 * We broadcast to wake up everyone waiting for this process.
366 * No one can reach this process from this point on.
367 */
368 cv_broadcast(&pr_pid_cv[slot]);
369
370 mutex_exit(&p->p_lock);
371 mutex_exit(&pr_pidlock);
372 }
373
374 /*
375 * Called from a hook in exit() when a traced process is becoming a zombie.
376 */
377 void
prexit(proc_t * p)378 prexit(proc_t *p)
379 {
380 ASSERT(MUTEX_HELD(&p->p_lock));
381
382 if (pr_watch_active(p)) {
383 pr_free_watchpoints(p);
384 watch_disable(curthread);
385 }
386 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */
387 if (p->p_trace) {
388 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
389 prnotify(p->p_trace);
390 }
391 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */
392 }
393
394 /*
395 * Called when a thread calls lwp_exit().
396 */
397 void
prlwpexit(kthread_t * t)398 prlwpexit(kthread_t *t)
399 {
400 vnode_t *vp;
401 prnode_t *pnp;
402 prcommon_t *pcp;
403 proc_t *p = ttoproc(t);
404 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
405
406 ASSERT(t == curthread);
407 ASSERT(MUTEX_HELD(&p->p_lock));
408
409 /*
410 * The process must be blocked against /proc to do this safely.
411 * The lwp must not disappear while the process is marked P_PR_LOCK.
412 * It is the caller's responsibility to have called prbarrier(p).
413 */
414 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
415
416 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
417 pnp = VTOP(vp);
418 pcp = pnp->pr_common;
419 if (pcp->prc_thread == t) {
420 pcp->prc_thread = NULL;
421 pcp->prc_flags |= PRC_DESTROY;
422 }
423 }
424
425 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
426 pnp = VTOP(vp);
427 pcp = pnp->pr_common;
428 pcp->prc_thread = NULL;
429 pcp->prc_flags |= PRC_DESTROY;
430 prnotify(vp);
431 }
432
433 if (p->p_trace)
434 prnotify(p->p_trace);
435 }
436
437 /*
438 * Called when a zombie thread is joined or when a
439 * detached lwp exits. Called from lwp_hash_out().
440 */
441 void
prlwpfree(proc_t * p,lwpent_t * lep)442 prlwpfree(proc_t *p, lwpent_t *lep)
443 {
444 vnode_t *vp;
445 prnode_t *pnp;
446 prcommon_t *pcp;
447
448 ASSERT(MUTEX_HELD(&p->p_lock));
449
450 /*
451 * The process must be blocked against /proc to do this safely.
452 * The lwp must not disappear while the process is marked P_PR_LOCK.
453 * It is the caller's responsibility to have called prbarrier(p).
454 */
455 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
456
457 vp = lep->le_trace;
458 lep->le_trace = NULL;
459 while (vp) {
460 prnotify(vp);
461 pnp = VTOP(vp);
462 pcp = pnp->pr_common;
463 ASSERT(pcp->prc_thread == NULL &&
464 (pcp->prc_flags & PRC_DESTROY));
465 pcp->prc_tslot = -1;
466 vp = pnp->pr_next;
467 pnp->pr_next = NULL;
468 }
469
470 if (p->p_trace)
471 prnotify(p->p_trace);
472 }
473
474 /*
475 * Called from a hook in exec() when a thread starts exec().
476 */
477 void
prexecstart(void)478 prexecstart(void)
479 {
480 proc_t *p = ttoproc(curthread);
481 klwp_t *lwp = ttolwp(curthread);
482
483 /*
484 * The P_PR_EXEC flag blocks /proc operations for
485 * the duration of the exec().
486 * We can't start exec() while the process is
487 * locked by /proc, so we call prbarrier().
488 * lwp_nostop keeps the process from being stopped
489 * via job control for the duration of the exec().
490 */
491
492 ASSERT(MUTEX_HELD(&p->p_lock));
493 prbarrier(p);
494 lwp->lwp_nostop++;
495 p->p_proc_flag |= P_PR_EXEC;
496 }
497
498 /*
499 * Called from a hook in exec() when a thread finishes exec().
500 * The thread may or may not have succeeded. Some other thread
501 * may have beat it to the punch.
502 */
503 void
prexecend(void)504 prexecend(void)
505 {
506 proc_t *p = ttoproc(curthread);
507 klwp_t *lwp = ttolwp(curthread);
508 vnode_t *vp;
509 prnode_t *pnp;
510 prcommon_t *pcp;
511 model_t model = p->p_model;
512 id_t tid = curthread->t_tid;
513 int tslot = curthread->t_dslot;
514
515 ASSERT(MUTEX_HELD(&p->p_lock));
516
517 lwp->lwp_nostop--;
518 if (p->p_flag & SEXITLWPS) {
519 /*
520 * We are on our way to exiting because some
521 * other thread beat us in the race to exec().
522 * Don't clear the P_PR_EXEC flag in this case.
523 */
524 return;
525 }
526
527 /*
528 * Wake up anyone waiting in /proc for the process to complete exec().
529 */
530 p->p_proc_flag &= ~P_PR_EXEC;
531 if ((vp = p->p_trace) != NULL) {
532 pcp = VTOP(vp)->pr_common;
533 mutex_enter(&pcp->prc_mutex);
534 cv_broadcast(&pcp->prc_wait);
535 mutex_exit(&pcp->prc_mutex);
536 for (; vp != NULL; vp = pnp->pr_next) {
537 pnp = VTOP(vp);
538 pnp->pr_common->prc_datamodel = model;
539 }
540 }
541 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
542 /*
543 * We dealt with the process common above.
544 */
545 ASSERT(p->p_trace != NULL);
546 pcp = VTOP(vp)->pr_common;
547 mutex_enter(&pcp->prc_mutex);
548 cv_broadcast(&pcp->prc_wait);
549 mutex_exit(&pcp->prc_mutex);
550 for (; vp != NULL; vp = pnp->pr_next) {
551 pnp = VTOP(vp);
552 pcp = pnp->pr_common;
553 pcp->prc_datamodel = model;
554 pcp->prc_tid = tid;
555 pcp->prc_tslot = tslot;
556 }
557 }
558 }
559
560 /*
561 * Called from a hook in relvm() just before freeing the address space.
562 * We free all the watched areas now.
563 */
564 void
prrelvm(void)565 prrelvm(void)
566 {
567 proc_t *p = ttoproc(curthread);
568
569 mutex_enter(&p->p_lock);
570 prbarrier(p); /* block all other /proc operations */
571 if (pr_watch_active(p)) {
572 pr_free_watchpoints(p);
573 watch_disable(curthread);
574 }
575 mutex_exit(&p->p_lock);
576 pr_free_watched_pages(p);
577 }
578
579 /*
580 * Called from hooks in exec-related code when a traced process
581 * attempts to exec(2) a setuid/setgid program or an unreadable
582 * file. Rather than fail the exec we invalidate the associated
583 * /proc vnodes so that subsequent attempts to use them will fail.
584 *
585 * All /proc vnodes, except directory vnodes, are retained on a linked
586 * list (rooted at p_plist in the process structure) until last close.
587 *
588 * A controlling process must re-open the /proc files in order to
589 * regain control.
590 */
591 void
prinvalidate(struct user * up)592 prinvalidate(struct user *up)
593 {
594 kthread_t *t = curthread;
595 proc_t *p = ttoproc(t);
596 vnode_t *vp;
597 prnode_t *pnp;
598 int writers = 0;
599
600 mutex_enter(&p->p_lock);
601 prbarrier(p); /* block all other /proc operations */
602
603 /*
604 * At this moment, there can be only one lwp in the process.
605 */
606 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
607
608 /*
609 * Invalidate any currently active /proc vnodes.
610 */
611 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
612 pnp = VTOP(vp);
613 switch (pnp->pr_type) {
614 case PR_PSINFO: /* these files can read by anyone */
615 case PR_LPSINFO:
616 case PR_LWPSINFO:
617 case PR_LWPDIR:
618 case PR_LWPIDDIR:
619 case PR_USAGE:
620 case PR_LUSAGE:
621 case PR_LWPUSAGE:
622 break;
623 default:
624 pnp->pr_flags |= PR_INVAL;
625 break;
626 }
627 }
628 /*
629 * Wake up anyone waiting for the process or lwp.
630 * p->p_trace is guaranteed to be non-NULL if there
631 * are any open /proc files for this process.
632 */
633 if ((vp = p->p_trace) != NULL) {
634 prcommon_t *pcp = VTOP(vp)->pr_pcommon;
635
636 prnotify(vp);
637 /*
638 * Are there any writers?
639 */
640 if ((writers = pcp->prc_writers) != 0) {
641 /*
642 * Clear the exclusive open flag (old /proc interface).
643 * Set prc_selfopens equal to prc_writers so that
644 * the next O_EXCL|O_WRITE open will succeed
645 * even with existing (though invalid) writers.
646 * prclose() must decrement prc_selfopens when
647 * the invalid files are closed.
648 */
649 pcp->prc_flags &= ~PRC_EXCL;
650 ASSERT(pcp->prc_selfopens <= writers);
651 pcp->prc_selfopens = writers;
652 }
653 }
654 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
655 while (vp != NULL) {
656 /*
657 * We should not invalidate the lwpiddir vnodes,
658 * but the necessities of maintaining the old
659 * ioctl()-based version of /proc require it.
660 */
661 pnp = VTOP(vp);
662 pnp->pr_flags |= PR_INVAL;
663 prnotify(vp);
664 vp = pnp->pr_next;
665 }
666
667 /*
668 * If any tracing flags are in effect and any vnodes are open for
669 * writing then set the requested-stop and run-on-last-close flags.
670 * Otherwise, clear all tracing flags.
671 */
672 t->t_proc_flag &= ~TP_PAUSE;
673 if ((p->p_proc_flag & P_PR_TRACE) && writers) {
674 t->t_proc_flag |= TP_PRSTOP;
675 aston(t); /* so ISSIG will see the flag */
676 p->p_proc_flag |= P_PR_RUNLCL;
677 } else {
678 premptyset(&up->u_entrymask); /* syscalls */
679 premptyset(&up->u_exitmask);
680 up->u_systrap = 0;
681 premptyset(&p->p_sigmask); /* signals */
682 premptyset(&p->p_fltmask); /* faults */
683 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
684 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
685 prnostep(ttolwp(t));
686 }
687
688 mutex_exit(&p->p_lock);
689 }
690
691 /*
692 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
693 * Return with pr_pidlock held in all cases.
694 * Return with p_lock held if the the process still exists.
695 * Return value is the process pointer if the process still exists, else NULL.
696 * If we lock the process, give ourself kernel priority to avoid deadlocks;
697 * this is undone in prunlock().
698 */
699 proc_t *
pr_p_lock(prnode_t * pnp)700 pr_p_lock(prnode_t *pnp)
701 {
702 proc_t *p;
703 prcommon_t *pcp;
704
705 mutex_enter(&pr_pidlock);
706 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
707 return (NULL);
708 mutex_enter(&p->p_lock);
709 while (p->p_proc_flag & P_PR_LOCK) {
710 /*
711 * This cv/mutex pair is persistent even if
712 * the process disappears while we sleep.
713 */
714 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
715 kmutex_t *mp = &p->p_lock;
716
717 mutex_exit(&pr_pidlock);
718 cv_wait(cv, mp);
719 mutex_exit(mp);
720 mutex_enter(&pr_pidlock);
721 if (pcp->prc_proc == NULL)
722 return (NULL);
723 ASSERT(p == pcp->prc_proc);
724 mutex_enter(&p->p_lock);
725 }
726 p->p_proc_flag |= P_PR_LOCK;
727 return (p);
728 }
729
730 /*
731 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
732 * This prevents any lwp of the process from disappearing and
733 * blocks most operations that a process can perform on itself.
734 * Returns 0 on success, a non-zero error number on failure.
735 *
736 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
737 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
738 *
739 * error returns:
740 * ENOENT: process or lwp has disappeared or process is exiting
741 * (or has become a zombie and zdisp == ZNO).
742 * EAGAIN: procfs vnode has become invalid.
743 * EINTR: signal arrived while waiting for exec to complete.
744 */
745 int
prlock(prnode_t * pnp,int zdisp)746 prlock(prnode_t *pnp, int zdisp)
747 {
748 prcommon_t *pcp;
749 proc_t *p;
750
751 again:
752 pcp = pnp->pr_common;
753 p = pr_p_lock(pnp);
754 mutex_exit(&pr_pidlock);
755
756 /*
757 * Return ENOENT immediately if there is no process.
758 */
759 if (p == NULL)
760 return (ENOENT);
761
762 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
763
764 /*
765 * Return ENOENT if process entered zombie state or is exiting
766 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
767 */
768 if (zdisp == ZNO &&
769 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
770 prunlock(pnp);
771 return (ENOENT);
772 }
773
774 /*
775 * If lwp-specific, check to see if lwp has disappeared.
776 */
777 if (pcp->prc_flags & PRC_LWP) {
778 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
779 pcp->prc_tslot == -1) {
780 prunlock(pnp);
781 return (ENOENT);
782 }
783 }
784
785 /*
786 * Return EAGAIN if we have encountered a security violation.
787 * (The process exec'd a set-id or unreadable executable file.)
788 */
789 if (pnp->pr_flags & PR_INVAL) {
790 prunlock(pnp);
791 return (EAGAIN);
792 }
793
794 /*
795 * If process is undergoing an exec(), wait for
796 * completion and then start all over again.
797 */
798 if (p->p_proc_flag & P_PR_EXEC) {
799 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */
800 mutex_enter(&pcp->prc_mutex);
801 prunlock(pnp);
802 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
803 mutex_exit(&pcp->prc_mutex);
804 return (EINTR);
805 }
806 mutex_exit(&pcp->prc_mutex);
807 goto again;
808 }
809
810 /*
811 * We return holding p->p_lock.
812 */
813 return (0);
814 }
815
816 /*
817 * Undo prlock() and pr_p_lock().
818 * p->p_lock is still held; pr_pidlock is no longer held.
819 *
820 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
821 * if any, waiting for the flag to be dropped; it retains p->p_lock.
822 *
823 * prunlock() calls prunmark() and then drops p->p_lock.
824 */
825 void
prunmark(proc_t * p)826 prunmark(proc_t *p)
827 {
828 ASSERT(p->p_proc_flag & P_PR_LOCK);
829 ASSERT(MUTEX_HELD(&p->p_lock));
830
831 cv_signal(&pr_pid_cv[p->p_slot]);
832 p->p_proc_flag &= ~P_PR_LOCK;
833 }
834
835 void
prunlock(prnode_t * pnp)836 prunlock(prnode_t *pnp)
837 {
838 prcommon_t *pcp = pnp->pr_common;
839 proc_t *p = pcp->prc_proc;
840
841 /*
842 * If we (or someone) gave it a SIGKILL, and it is not
843 * already a zombie, set it running unconditionally.
844 */
845 if ((p->p_flag & SKILLED) &&
846 !(p->p_flag & SEXITING) &&
847 !(pcp->prc_flags & PRC_DESTROY) &&
848 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
849 (void) pr_setrun(pnp, 0);
850 prunmark(p);
851 mutex_exit(&p->p_lock);
852 }
853
854 /*
855 * Called while holding p->p_lock to delay until the process is unlocked.
856 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
857 * The process cannot become locked again until p->p_lock is dropped.
858 */
859 void
prbarrier(proc_t * p)860 prbarrier(proc_t *p)
861 {
862 ASSERT(MUTEX_HELD(&p->p_lock));
863
864 if (p->p_proc_flag & P_PR_LOCK) {
865 /* The process is locked; delay until not locked */
866 uint_t slot = p->p_slot;
867
868 while (p->p_proc_flag & P_PR_LOCK)
869 cv_wait(&pr_pid_cv[slot], &p->p_lock);
870 cv_signal(&pr_pid_cv[slot]);
871 }
872 }
873
874 /*
875 * Return process/lwp status.
876 * The u-block is mapped in by this routine and unmapped at the end.
877 */
878 void
prgetstatus(proc_t * p,pstatus_t * sp,zone_t * zp)879 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
880 {
881 kthread_t *t;
882
883 ASSERT(MUTEX_HELD(&p->p_lock));
884
885 t = prchoose(p); /* returns locked thread */
886 ASSERT(t != NULL);
887 thread_unlock(t);
888
889 /* just bzero the process part, prgetlwpstatus() does the rest */
890 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
891 sp->pr_nlwp = p->p_lwpcnt;
892 sp->pr_nzomb = p->p_zombcnt;
893 prassignset(&sp->pr_sigpend, &p->p_sig);
894 sp->pr_brkbase = (uintptr_t)p->p_brkbase;
895 sp->pr_brksize = p->p_brksize;
896 sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
897 sp->pr_stksize = p->p_stksize;
898 sp->pr_pid = p->p_pid;
899 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
900 (p->p_flag & SZONETOP)) {
901 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
902 /*
903 * Inside local zones, fake zsched's pid as parent pids for
904 * processes which reference processes outside of the zone.
905 */
906 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
907 } else {
908 sp->pr_ppid = p->p_ppid;
909 }
910 sp->pr_pgid = p->p_pgrp;
911 sp->pr_sid = p->p_sessp->s_sid;
912 sp->pr_taskid = p->p_task->tk_tkid;
913 sp->pr_projid = p->p_task->tk_proj->kpj_id;
914 sp->pr_zoneid = p->p_zone->zone_id;
915 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
916 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
917 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
918 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
919 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
920 prassignset(&sp->pr_flttrace, &p->p_fltmask);
921 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
922 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
923 switch (p->p_model) {
924 case DATAMODEL_ILP32:
925 sp->pr_dmodel = PR_MODEL_ILP32;
926 break;
927 case DATAMODEL_LP64:
928 sp->pr_dmodel = PR_MODEL_LP64;
929 break;
930 }
931 if (p->p_agenttp)
932 sp->pr_agentid = p->p_agenttp->t_tid;
933
934 /* get the chosen lwp's status */
935 prgetlwpstatus(t, &sp->pr_lwp, zp);
936
937 /* replicate the flags */
938 sp->pr_flags = sp->pr_lwp.pr_flags;
939 }
940
941 /*
942 * Query mask of held signals for a given thread.
943 *
944 * This makes use of schedctl_sigblock() to query if userspace has requested
945 * that all maskable signals be held. While it would be tempting to call
946 * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be
947 * done safely without the risk of racing with the thread under consideration.
948 */
949 void
prgethold(kthread_t * t,sigset_t * sp)950 prgethold(kthread_t *t, sigset_t *sp)
951 {
952 k_sigset_t set;
953
954 if (schedctl_sigblock(t)) {
955 set.__sigbits[0] = FILLSET0 & ~CANTMASK0;
956 set.__sigbits[1] = FILLSET1 & ~CANTMASK1;
957 set.__sigbits[2] = FILLSET2 & ~CANTMASK2;
958 } else {
959 set = t->t_hold;
960 }
961 sigktou(&set, sp);
962 }
963
964 #ifdef _SYSCALL32_IMPL
965 void
prgetlwpstatus32(kthread_t * t,lwpstatus32_t * sp,zone_t * zp)966 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
967 {
968 proc_t *p = ttoproc(t);
969 klwp_t *lwp = ttolwp(t);
970 struct mstate *ms = &lwp->lwp_mstate;
971 hrtime_t usr, sys;
972 int flags;
973 ulong_t instr;
974
975 ASSERT(MUTEX_HELD(&p->p_lock));
976
977 bzero(sp, sizeof (*sp));
978 flags = 0L;
979 if (t->t_state == TS_STOPPED) {
980 flags |= PR_STOPPED;
981 if ((t->t_schedflag & TS_PSTART) == 0)
982 flags |= PR_ISTOP;
983 } else if (VSTOPPED(t)) {
984 flags |= PR_STOPPED|PR_ISTOP;
985 }
986 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
987 flags |= PR_DSTOP;
988 if (lwp->lwp_asleep)
989 flags |= PR_ASLEEP;
990 if (t == p->p_agenttp)
991 flags |= PR_AGENT;
992 if (!(t->t_proc_flag & TP_TWAIT))
993 flags |= PR_DETACH;
994 if (t->t_proc_flag & TP_DAEMON)
995 flags |= PR_DAEMON;
996 if (p->p_proc_flag & P_PR_FORK)
997 flags |= PR_FORK;
998 if (p->p_proc_flag & P_PR_RUNLCL)
999 flags |= PR_RLC;
1000 if (p->p_proc_flag & P_PR_KILLCL)
1001 flags |= PR_KLC;
1002 if (p->p_proc_flag & P_PR_ASYNC)
1003 flags |= PR_ASYNC;
1004 if (p->p_proc_flag & P_PR_BPTADJ)
1005 flags |= PR_BPTADJ;
1006 if (p->p_proc_flag & P_PR_PTRACE)
1007 flags |= PR_PTRACE;
1008 if (p->p_flag & SMSACCT)
1009 flags |= PR_MSACCT;
1010 if (p->p_flag & SMSFORK)
1011 flags |= PR_MSFORK;
1012 if (p->p_flag & SVFWAIT)
1013 flags |= PR_VFORKP;
1014 sp->pr_flags = flags;
1015 if (VSTOPPED(t)) {
1016 sp->pr_why = PR_REQUESTED;
1017 sp->pr_what = 0;
1018 } else {
1019 sp->pr_why = t->t_whystop;
1020 sp->pr_what = t->t_whatstop;
1021 }
1022 sp->pr_lwpid = t->t_tid;
1023 sp->pr_cursig = lwp->lwp_cursig;
1024 prassignset(&sp->pr_lwppend, &t->t_sig);
1025 prgethold(t, &sp->pr_lwphold);
1026 if (t->t_whystop == PR_FAULTED) {
1027 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
1028 if (t->t_whatstop == FLTPAGE)
1029 sp->pr_info.si_addr =
1030 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
1031 } else if (lwp->lwp_curinfo)
1032 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
1033 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1034 sp->pr_info.si_zoneid != zp->zone_id) {
1035 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1036 sp->pr_info.si_uid = 0;
1037 sp->pr_info.si_ctid = -1;
1038 sp->pr_info.si_zoneid = zp->zone_id;
1039 }
1040 sp->pr_altstack.ss_sp =
1041 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1042 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1043 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1044 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1045 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1046 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1047 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1048 sizeof (sp->pr_clname) - 1);
1049 if (flags & PR_STOPPED)
1050 hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1051 usr = ms->ms_acct[LMS_USER];
1052 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1053 scalehrtime(&usr);
1054 scalehrtime(&sys);
1055 hrt2ts32(usr, &sp->pr_utime);
1056 hrt2ts32(sys, &sp->pr_stime);
1057
1058 /*
1059 * Fetch the current instruction, if not a system process.
1060 * We don't attempt this unless the lwp is stopped.
1061 */
1062 if ((p->p_flag & SSYS) || p->p_as == &kas)
1063 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1064 else if (!(flags & PR_STOPPED))
1065 sp->pr_flags |= PR_PCINVAL;
1066 else if (!prfetchinstr(lwp, &instr))
1067 sp->pr_flags |= PR_PCINVAL;
1068 else
1069 sp->pr_instr = (uint32_t)instr;
1070
1071 /*
1072 * Drop p_lock while touching the lwp's stack.
1073 */
1074 mutex_exit(&p->p_lock);
1075 if (prisstep(lwp))
1076 sp->pr_flags |= PR_STEP;
1077 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1078 int i;
1079
1080 sp->pr_syscall = get_syscall32_args(lwp,
1081 (int *)sp->pr_sysarg, &i);
1082 sp->pr_nsysarg = (ushort_t)i;
1083 }
1084 if ((flags & PR_STOPPED) || t == curthread)
1085 prgetprregs32(lwp, sp->pr_reg);
1086 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1087 (flags & PR_VFORKP)) {
1088 long r1, r2;
1089 user_t *up;
1090 auxv_t *auxp;
1091 int i;
1092
1093 sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1094 if (sp->pr_errno == 0) {
1095 sp->pr_rval1 = (int32_t)r1;
1096 sp->pr_rval2 = (int32_t)r2;
1097 sp->pr_errpriv = PRIV_NONE;
1098 } else
1099 sp->pr_errpriv = lwp->lwp_badpriv;
1100
1101 if (t->t_sysnum == SYS_execve) {
1102 up = PTOU(p);
1103 sp->pr_sysarg[0] = 0;
1104 sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1105 sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1106 sp->pr_sysarg[3] = 0;
1107 for (i = 0, auxp = up->u_auxv;
1108 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1109 i++, auxp++) {
1110 if (auxp->a_type == AT_SUN_EXECNAME) {
1111 sp->pr_sysarg[0] =
1112 (caddr32_t)
1113 (uintptr_t)auxp->a_un.a_ptr;
1114 break;
1115 }
1116 }
1117 }
1118 }
1119 if (prhasfp())
1120 prgetprfpregs32(lwp, &sp->pr_fpreg);
1121 mutex_enter(&p->p_lock);
1122 }
1123
1124 void
prgetstatus32(proc_t * p,pstatus32_t * sp,zone_t * zp)1125 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1126 {
1127 kthread_t *t;
1128
1129 ASSERT(MUTEX_HELD(&p->p_lock));
1130
1131 t = prchoose(p); /* returns locked thread */
1132 ASSERT(t != NULL);
1133 thread_unlock(t);
1134
1135 /* just bzero the process part, prgetlwpstatus32() does the rest */
1136 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1137 sp->pr_nlwp = p->p_lwpcnt;
1138 sp->pr_nzomb = p->p_zombcnt;
1139 prassignset(&sp->pr_sigpend, &p->p_sig);
1140 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1141 sp->pr_brksize = (uint32_t)p->p_brksize;
1142 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1143 sp->pr_stksize = (uint32_t)p->p_stksize;
1144 sp->pr_pid = p->p_pid;
1145 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1146 (p->p_flag & SZONETOP)) {
1147 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1148 /*
1149 * Inside local zones, fake zsched's pid as parent pids for
1150 * processes which reference processes outside of the zone.
1151 */
1152 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1153 } else {
1154 sp->pr_ppid = p->p_ppid;
1155 }
1156 sp->pr_pgid = p->p_pgrp;
1157 sp->pr_sid = p->p_sessp->s_sid;
1158 sp->pr_taskid = p->p_task->tk_tkid;
1159 sp->pr_projid = p->p_task->tk_proj->kpj_id;
1160 sp->pr_zoneid = p->p_zone->zone_id;
1161 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1162 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1163 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1164 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1165 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1166 prassignset(&sp->pr_flttrace, &p->p_fltmask);
1167 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1168 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1169 switch (p->p_model) {
1170 case DATAMODEL_ILP32:
1171 sp->pr_dmodel = PR_MODEL_ILP32;
1172 break;
1173 case DATAMODEL_LP64:
1174 sp->pr_dmodel = PR_MODEL_LP64;
1175 break;
1176 }
1177 if (p->p_agenttp)
1178 sp->pr_agentid = p->p_agenttp->t_tid;
1179
1180 /* get the chosen lwp's status */
1181 prgetlwpstatus32(t, &sp->pr_lwp, zp);
1182
1183 /* replicate the flags */
1184 sp->pr_flags = sp->pr_lwp.pr_flags;
1185 }
1186 #endif /* _SYSCALL32_IMPL */
1187
1188 /*
1189 * Return lwp status.
1190 */
1191 void
prgetlwpstatus(kthread_t * t,lwpstatus_t * sp,zone_t * zp)1192 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1193 {
1194 proc_t *p = ttoproc(t);
1195 klwp_t *lwp = ttolwp(t);
1196 struct mstate *ms = &lwp->lwp_mstate;
1197 hrtime_t usr, sys;
1198 int flags;
1199 ulong_t instr;
1200
1201 ASSERT(MUTEX_HELD(&p->p_lock));
1202
1203 bzero(sp, sizeof (*sp));
1204 flags = 0L;
1205 if (t->t_state == TS_STOPPED) {
1206 flags |= PR_STOPPED;
1207 if ((t->t_schedflag & TS_PSTART) == 0)
1208 flags |= PR_ISTOP;
1209 } else if (VSTOPPED(t)) {
1210 flags |= PR_STOPPED|PR_ISTOP;
1211 }
1212 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1213 flags |= PR_DSTOP;
1214 if (lwp->lwp_asleep)
1215 flags |= PR_ASLEEP;
1216 if (t == p->p_agenttp)
1217 flags |= PR_AGENT;
1218 if (!(t->t_proc_flag & TP_TWAIT))
1219 flags |= PR_DETACH;
1220 if (t->t_proc_flag & TP_DAEMON)
1221 flags |= PR_DAEMON;
1222 if (p->p_proc_flag & P_PR_FORK)
1223 flags |= PR_FORK;
1224 if (p->p_proc_flag & P_PR_RUNLCL)
1225 flags |= PR_RLC;
1226 if (p->p_proc_flag & P_PR_KILLCL)
1227 flags |= PR_KLC;
1228 if (p->p_proc_flag & P_PR_ASYNC)
1229 flags |= PR_ASYNC;
1230 if (p->p_proc_flag & P_PR_BPTADJ)
1231 flags |= PR_BPTADJ;
1232 if (p->p_proc_flag & P_PR_PTRACE)
1233 flags |= PR_PTRACE;
1234 if (p->p_flag & SMSACCT)
1235 flags |= PR_MSACCT;
1236 if (p->p_flag & SMSFORK)
1237 flags |= PR_MSFORK;
1238 if (p->p_flag & SVFWAIT)
1239 flags |= PR_VFORKP;
1240 if (p->p_pgidp->pid_pgorphaned)
1241 flags |= PR_ORPHAN;
1242 if (p->p_pidflag & CLDNOSIGCHLD)
1243 flags |= PR_NOSIGCHLD;
1244 if (p->p_pidflag & CLDWAITPID)
1245 flags |= PR_WAITPID;
1246 sp->pr_flags = flags;
1247 if (VSTOPPED(t)) {
1248 sp->pr_why = PR_REQUESTED;
1249 sp->pr_what = 0;
1250 } else {
1251 sp->pr_why = t->t_whystop;
1252 sp->pr_what = t->t_whatstop;
1253 }
1254 sp->pr_lwpid = t->t_tid;
1255 sp->pr_cursig = lwp->lwp_cursig;
1256 prassignset(&sp->pr_lwppend, &t->t_sig);
1257 prgethold(t, &sp->pr_lwphold);
1258 if (t->t_whystop == PR_FAULTED)
1259 bcopy(&lwp->lwp_siginfo,
1260 &sp->pr_info, sizeof (k_siginfo_t));
1261 else if (lwp->lwp_curinfo)
1262 bcopy(&lwp->lwp_curinfo->sq_info,
1263 &sp->pr_info, sizeof (k_siginfo_t));
1264 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1265 sp->pr_info.si_zoneid != zp->zone_id) {
1266 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1267 sp->pr_info.si_uid = 0;
1268 sp->pr_info.si_ctid = -1;
1269 sp->pr_info.si_zoneid = zp->zone_id;
1270 }
1271 sp->pr_altstack = lwp->lwp_sigaltstack;
1272 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1273 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1274 sp->pr_ustack = lwp->lwp_ustack;
1275 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1276 sizeof (sp->pr_clname) - 1);
1277 if (flags & PR_STOPPED)
1278 hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1279 usr = ms->ms_acct[LMS_USER];
1280 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1281 scalehrtime(&usr);
1282 scalehrtime(&sys);
1283 hrt2ts(usr, &sp->pr_utime);
1284 hrt2ts(sys, &sp->pr_stime);
1285
1286 /*
1287 * Fetch the current instruction, if not a system process.
1288 * We don't attempt this unless the lwp is stopped.
1289 */
1290 if ((p->p_flag & SSYS) || p->p_as == &kas)
1291 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1292 else if (!(flags & PR_STOPPED))
1293 sp->pr_flags |= PR_PCINVAL;
1294 else if (!prfetchinstr(lwp, &instr))
1295 sp->pr_flags |= PR_PCINVAL;
1296 else
1297 sp->pr_instr = instr;
1298
1299 /*
1300 * Drop p_lock while touching the lwp's stack.
1301 */
1302 mutex_exit(&p->p_lock);
1303 if (prisstep(lwp))
1304 sp->pr_flags |= PR_STEP;
1305 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1306 int i;
1307
1308 sp->pr_syscall = get_syscall_args(lwp,
1309 (long *)sp->pr_sysarg, &i);
1310 sp->pr_nsysarg = (ushort_t)i;
1311 }
1312 if ((flags & PR_STOPPED) || t == curthread)
1313 prgetprregs(lwp, sp->pr_reg);
1314 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1315 (flags & PR_VFORKP)) {
1316 user_t *up;
1317 auxv_t *auxp;
1318 int i;
1319
1320 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1321 if (sp->pr_errno == 0)
1322 sp->pr_errpriv = PRIV_NONE;
1323 else
1324 sp->pr_errpriv = lwp->lwp_badpriv;
1325
1326 if (t->t_sysnum == SYS_execve) {
1327 up = PTOU(p);
1328 sp->pr_sysarg[0] = 0;
1329 sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1330 sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1331 sp->pr_sysarg[3] = 0;
1332 for (i = 0, auxp = up->u_auxv;
1333 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1334 i++, auxp++) {
1335 if (auxp->a_type == AT_SUN_EXECNAME) {
1336 sp->pr_sysarg[0] =
1337 (uintptr_t)auxp->a_un.a_ptr;
1338 break;
1339 }
1340 }
1341 }
1342 }
1343 if (prhasfp())
1344 prgetprfpregs(lwp, &sp->pr_fpreg);
1345 mutex_enter(&p->p_lock);
1346 }
1347
1348 /*
1349 * Get the sigaction structure for the specified signal. The u-block
1350 * must already have been mapped in by the caller.
1351 */
1352 void
prgetaction(proc_t * p,user_t * up,uint_t sig,struct sigaction * sp)1353 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1354 {
1355 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1356
1357 bzero(sp, sizeof (*sp));
1358
1359 if (sig != 0 && (unsigned)sig < nsig) {
1360 sp->sa_handler = up->u_signal[sig-1];
1361 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1362 if (sigismember(&up->u_sigonstack, sig))
1363 sp->sa_flags |= SA_ONSTACK;
1364 if (sigismember(&up->u_sigresethand, sig))
1365 sp->sa_flags |= SA_RESETHAND;
1366 if (sigismember(&up->u_sigrestart, sig))
1367 sp->sa_flags |= SA_RESTART;
1368 if (sigismember(&p->p_siginfo, sig))
1369 sp->sa_flags |= SA_SIGINFO;
1370 if (sigismember(&up->u_signodefer, sig))
1371 sp->sa_flags |= SA_NODEFER;
1372 if (sig == SIGCLD) {
1373 if (p->p_flag & SNOWAIT)
1374 sp->sa_flags |= SA_NOCLDWAIT;
1375 if ((p->p_flag & SJCTL) == 0)
1376 sp->sa_flags |= SA_NOCLDSTOP;
1377 }
1378 }
1379 }
1380
1381 #ifdef _SYSCALL32_IMPL
1382 void
prgetaction32(proc_t * p,user_t * up,uint_t sig,struct sigaction32 * sp)1383 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1384 {
1385 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1386
1387 bzero(sp, sizeof (*sp));
1388
1389 if (sig != 0 && (unsigned)sig < nsig) {
1390 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1391 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1392 if (sigismember(&up->u_sigonstack, sig))
1393 sp->sa_flags |= SA_ONSTACK;
1394 if (sigismember(&up->u_sigresethand, sig))
1395 sp->sa_flags |= SA_RESETHAND;
1396 if (sigismember(&up->u_sigrestart, sig))
1397 sp->sa_flags |= SA_RESTART;
1398 if (sigismember(&p->p_siginfo, sig))
1399 sp->sa_flags |= SA_SIGINFO;
1400 if (sigismember(&up->u_signodefer, sig))
1401 sp->sa_flags |= SA_NODEFER;
1402 if (sig == SIGCLD) {
1403 if (p->p_flag & SNOWAIT)
1404 sp->sa_flags |= SA_NOCLDWAIT;
1405 if ((p->p_flag & SJCTL) == 0)
1406 sp->sa_flags |= SA_NOCLDSTOP;
1407 }
1408 }
1409 }
1410 #endif /* _SYSCALL32_IMPL */
1411
1412 /*
1413 * Count the number of segments in this process's address space.
1414 */
1415 uint_t
prnsegs(struct as * as,int reserved)1416 prnsegs(struct as *as, int reserved)
1417 {
1418 uint_t n = 0;
1419 struct seg *seg;
1420
1421 ASSERT(as != &kas && AS_WRITE_HELD(as));
1422
1423 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1424 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1425 caddr_t saddr, naddr;
1426 void *tmp = NULL;
1427
1428 if ((seg->s_flags & S_HOLE) != 0) {
1429 continue;
1430 }
1431
1432 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1433 (void) pr_getprot(seg, reserved, &tmp,
1434 &saddr, &naddr, eaddr);
1435 if (saddr != naddr) {
1436 n++;
1437 /*
1438 * prnsegs() was formerly designated to return
1439 * an 'int' despite having no ability or use
1440 * for negative results. As part of changing
1441 * it to 'uint_t', keep the old effective limit
1442 * of INT_MAX in place.
1443 */
1444 if (n == INT_MAX) {
1445 pr_getprot_done(&tmp);
1446 ASSERT(tmp == NULL);
1447 return (n);
1448 }
1449 }
1450 }
1451
1452 ASSERT(tmp == NULL);
1453 }
1454
1455 return (n);
1456 }
1457
1458 /*
1459 * Convert uint32_t to decimal string w/o leading zeros.
1460 * Add trailing null characters if 'len' is greater than string length.
1461 * Return the string length.
1462 */
1463 int
pr_u32tos(uint32_t n,char * s,int len)1464 pr_u32tos(uint32_t n, char *s, int len)
1465 {
1466 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */
1467 char *cp = cbuf;
1468 char *end = s + len;
1469
1470 do {
1471 *cp++ = (char)(n % 10 + '0');
1472 n /= 10;
1473 } while (n);
1474
1475 len = (int)(cp - cbuf);
1476
1477 do {
1478 *s++ = *--cp;
1479 } while (cp > cbuf);
1480
1481 while (s < end) /* optional pad */
1482 *s++ = '\0';
1483
1484 return (len);
1485 }
1486
1487 /*
1488 * Convert uint64_t to decimal string w/o leading zeros.
1489 * Return the string length.
1490 */
1491 static int
pr_u64tos(uint64_t n,char * s)1492 pr_u64tos(uint64_t n, char *s)
1493 {
1494 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */
1495 char *cp = cbuf;
1496 int len;
1497
1498 do {
1499 *cp++ = (char)(n % 10 + '0');
1500 n /= 10;
1501 } while (n);
1502
1503 len = (int)(cp - cbuf);
1504
1505 do {
1506 *s++ = *--cp;
1507 } while (cp > cbuf);
1508
1509 return (len);
1510 }
1511
1512 /*
1513 * Similar to getf() / getf_gen(), but for the specified process. On success,
1514 * returns the fp with fp->f_count incremented. The caller MUST call
1515 * closef(fp) on the returned fp after completing any actions using that fp.
1516 * We return a reference-held (fp->f_count bumped) file_t so no other closef()
1517 * can invoke destructive VOP_CLOSE actions while we're inspecting the
1518 * process's FD.
1519 *
1520 * Returns NULL for errors: either an empty process-table slot post-fi_lock
1521 * and UF_ENTER, or too many mutex_tryenter() failures on the file_t's f_tlock.
1522 * Both failure modes have DTrace probes.
1523 *
1524 * The current design of the procfs "close" code path uses the following lock
1525 * order of:
1526 *
1527 * 1: (file_t) f_tlock
1528 * 2: (proc_t) p_lock AND setting p->p_proc_flag's P_PR_LOCK
1529 *
1530 * That happens because closef() holds f_tlock while calling fop_close(),
1531 * which can be prclose(), which currently waits on and sets P_PR_LOCK at its
1532 * beginning.
1533 *
1534 * That lock order creates a challenge for pr_getf, which needs to take those
1535 * locks in the opposite order when the fd points to a procfs file descriptor.
1536 * The solution chosen here is to use mutex_tryenter on f_tlock and retry some
1537 * (limited) number of times, failing if we don't get both locks.
1538 *
1539 * The cases where this can fail are rare, and all involve a procfs caller
1540 * asking for info (eg. FDINFO) on another procfs FD. In these cases,
1541 * returning EBADF (which results from a NULL return from pr_getf()) is
1542 * acceptable.
1543 *
1544 * One can increase the number of tries in pr_getf_maxtries if one is worried
1545 * about the contentuous case.
1546 */
1547
1548 uint64_t pr_getf_tryfails; /* Bumped for statistic purposes. */
1549 int pr_getf_maxtries = 3; /* So you can tune it from /etc/system */
1550
1551 file_t *
pr_getf(proc_t * p,uint_t fd,short * flag)1552 pr_getf(proc_t *p, uint_t fd, short *flag)
1553 {
1554 uf_entry_t *ufp;
1555 uf_info_t *fip;
1556 file_t *fp;
1557 int tries = 0;
1558
1559 ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1560
1561 retry:
1562 fip = P_FINFO(p);
1563
1564 if (fd >= fip->fi_nfiles)
1565 return (NULL);
1566
1567 mutex_exit(&p->p_lock);
1568 mutex_enter(&fip->fi_lock);
1569 UF_ENTER(ufp, fip, fd);
1570 if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
1571 if (mutex_tryenter(&fp->f_tlock)) {
1572 ASSERT(fp->f_count > 0);
1573 fp->f_count++;
1574 mutex_exit(&fp->f_tlock);
1575 if (flag != NULL)
1576 *flag = ufp->uf_flag;
1577 } else {
1578 /*
1579 * Note the number of mutex_trylock attempts.
1580 *
1581 * The exit path will catch this and try again if we
1582 * are below the retry threshhold (pr_getf_maxtries).
1583 */
1584 tries++;
1585 pr_getf_tryfails++;
1586 /*
1587 * If we hit pr_getf_maxtries, we'll return NULL.
1588 * DTrace scripts looking for this sort of failure
1589 * should check when arg1 is pr_getf_maxtries.
1590 */
1591 DTRACE_PROBE2(pr_getf_tryfail, file_t *, fp, int,
1592 tries);
1593 fp = NULL;
1594 }
1595 } else {
1596 fp = NULL;
1597 /* If we fail here, someone else closed this FD. */
1598 DTRACE_PROBE1(pr_getf_emptyslot, int, tries);
1599 tries = pr_getf_maxtries; /* Don't bother retrying. */
1600 }
1601 UF_EXIT(ufp);
1602 mutex_exit(&fip->fi_lock);
1603 mutex_enter(&p->p_lock);
1604
1605 /* Use goto instead of tail-recursion so we can keep "tries" around. */
1606 if (fp == NULL) {
1607 /* "tries" starts at 1. */
1608 if (tries < pr_getf_maxtries)
1609 goto retry;
1610 } else {
1611 /*
1612 * Probes here will detect successes after arg1's number of
1613 * mutex_tryenter() calls.
1614 */
1615 DTRACE_PROBE2(pr_getf_trysuccess, file_t *, fp, int, tries + 1);
1616 }
1617
1618 return (fp);
1619 }
1620
1621
1622 /*
1623 * Just as pr_getf() is a little unusual in how it goes about making the file_t
1624 * safe for procfs consumers to access it, so too is pr_releasef() for safely
1625 * releasing that "hold". The "hold" is unlike normal file descriptor activity
1626 * -- procfs is just an interloper here, wanting access to the vnode_t without
1627 * risk of a racing close() disrupting the state. Just as pr_getf() avoids some
1628 * of the typical file_t behavior (such as auditing) when establishing its hold,
1629 * so too should pr_releasef(). It should not go through the motions of
1630 * closef() (since it is not a true close()) unless racing activity causes it to
1631 * be the last actor holding the refcount above zero.
1632 *
1633 * Under normal circumstances, we expect to find file_t`f_count > 1 after
1634 * the successful pr_getf() call. We are, after all, accessing a resource
1635 * already held by the process in question. We would also expect to rarely race
1636 * with a close() of the underlying fd, meaning that file_t`f_count > 1 would
1637 * still holds at pr_releasef() time. That would mean we only need to decrement
1638 * f_count, leaving it to the process to later close the fd (thus triggering
1639 * VOP_CLOSE(), etc).
1640 *
1641 * It is only when that process manages to close() the fd while we have it
1642 * "held" in procfs that we must make a trip through the traditional closef()
1643 * logic to ensure proper tear-down of the file_t.
1644 */
1645 void
pr_releasef(file_t * fp)1646 pr_releasef(file_t *fp)
1647 {
1648 mutex_enter(&fp->f_tlock);
1649 if (fp->f_count > 1) {
1650 /*
1651 * This is the most common case: The file is still held open by
1652 * the process, and we simply need to release our hold by
1653 * decrementing f_count
1654 */
1655 fp->f_count--;
1656 mutex_exit(&fp->f_tlock);
1657 } else {
1658 /*
1659 * A rare occasion: The process snuck a close() of this file
1660 * while we were doing our business in procfs. Given that
1661 * f_count == 1, we are the only one with a reference to the
1662 * file_t and need to take a trip through closef() to free it.
1663 */
1664 mutex_exit(&fp->f_tlock);
1665 (void) closef(fp);
1666 }
1667 }
1668
1669 void
pr_object_name(char * name,vnode_t * vp,struct vattr * vattr)1670 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1671 {
1672 char *s = name;
1673 struct vfs *vfsp;
1674 struct vfssw *vfsswp;
1675
1676 if ((vfsp = vp->v_vfsp) != NULL &&
1677 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1678 *vfsswp->vsw_name) {
1679 (void) strcpy(s, vfsswp->vsw_name);
1680 s += strlen(s);
1681 *s++ = '.';
1682 }
1683 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1684 *s++ = '.';
1685 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1686 *s++ = '.';
1687 s += pr_u64tos(vattr->va_nodeid, s);
1688 *s++ = '\0';
1689 }
1690
1691 struct seg *
break_seg(proc_t * p)1692 break_seg(proc_t *p)
1693 {
1694 caddr_t addr = p->p_brkbase;
1695 struct seg *seg;
1696 struct vnode *vp;
1697
1698 if (p->p_brksize != 0)
1699 addr += p->p_brksize - 1;
1700 seg = as_segat(p->p_as, addr);
1701 if (seg != NULL && seg->s_ops == &segvn_ops &&
1702 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1703 return (seg);
1704 return (NULL);
1705 }
1706
1707 /*
1708 * Implementation of service functions to handle procfs generic chained
1709 * copyout buffers.
1710 */
1711 typedef struct pr_iobuf_list {
1712 list_node_t piol_link; /* buffer linkage */
1713 size_t piol_size; /* total size (header + data) */
1714 size_t piol_usedsize; /* amount to copy out from this buf */
1715 } piol_t;
1716
1717 #define MAPSIZE (64 * 1024)
1718 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1719
1720 void
pr_iol_initlist(list_t * iolhead,size_t itemsize,int n)1721 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1722 {
1723 piol_t *iol;
1724 size_t initial_size = MIN(1, n) * itemsize;
1725
1726 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1727
1728 ASSERT(list_head(iolhead) == NULL);
1729 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1730 ASSERT(initial_size > 0);
1731
1732 /*
1733 * Someone creating chained copyout buffers may ask for less than
1734 * MAPSIZE if the amount of data to be buffered is known to be
1735 * smaller than that.
1736 * But in order to prevent involuntary self-denial of service,
1737 * the requested input size is clamped at MAPSIZE.
1738 */
1739 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1740 iol = kmem_alloc(initial_size, KM_SLEEP);
1741 list_insert_head(iolhead, iol);
1742 iol->piol_usedsize = 0;
1743 iol->piol_size = initial_size;
1744 }
1745
1746 void *
pr_iol_newbuf(list_t * iolhead,size_t itemsize)1747 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1748 {
1749 piol_t *iol;
1750 char *new;
1751
1752 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1753 ASSERT(list_head(iolhead) != NULL);
1754
1755 iol = (piol_t *)list_tail(iolhead);
1756
1757 if (iol->piol_size <
1758 iol->piol_usedsize + sizeof (*iol) + itemsize) {
1759 /*
1760 * Out of space in the current buffer. Allocate more.
1761 */
1762 piol_t *newiol;
1763
1764 newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1765 newiol->piol_size = MAPSIZE;
1766 newiol->piol_usedsize = 0;
1767
1768 list_insert_after(iolhead, iol, newiol);
1769 iol = list_next(iolhead, iol);
1770 ASSERT(iol == newiol);
1771 }
1772 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1773 iol->piol_usedsize += itemsize;
1774 bzero(new, itemsize);
1775 return (new);
1776 }
1777
1778 void
pr_iol_freelist(list_t * iolhead)1779 pr_iol_freelist(list_t *iolhead)
1780 {
1781 piol_t *iol;
1782
1783 while ((iol = list_head(iolhead)) != NULL) {
1784 list_remove(iolhead, iol);
1785 kmem_free(iol, iol->piol_size);
1786 }
1787 list_destroy(iolhead);
1788 }
1789
1790 int
pr_iol_copyout_and_free(list_t * iolhead,caddr_t * tgt,int errin)1791 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1792 {
1793 int error = errin;
1794 piol_t *iol;
1795
1796 while ((iol = list_head(iolhead)) != NULL) {
1797 list_remove(iolhead, iol);
1798 if (!error) {
1799 if (copyout(PIOL_DATABUF(iol), *tgt,
1800 iol->piol_usedsize))
1801 error = EFAULT;
1802 *tgt += iol->piol_usedsize;
1803 }
1804 kmem_free(iol, iol->piol_size);
1805 }
1806 list_destroy(iolhead);
1807
1808 return (error);
1809 }
1810
1811 int
pr_iol_uiomove_and_free(list_t * iolhead,uio_t * uiop,int errin)1812 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1813 {
1814 offset_t off = uiop->uio_offset;
1815 char *base;
1816 size_t size;
1817 piol_t *iol;
1818 int error = errin;
1819
1820 while ((iol = list_head(iolhead)) != NULL) {
1821 list_remove(iolhead, iol);
1822 base = PIOL_DATABUF(iol);
1823 size = iol->piol_usedsize;
1824 if (off <= size && error == 0 && uiop->uio_resid > 0)
1825 error = uiomove(base + off, size - off,
1826 UIO_READ, uiop);
1827 off = MAX(0, off - (offset_t)size);
1828 kmem_free(iol, iol->piol_size);
1829 }
1830 list_destroy(iolhead);
1831
1832 return (error);
1833 }
1834
1835 /*
1836 * Return an array of structures with memory map information.
1837 * We allocate here; the caller must deallocate.
1838 */
1839 int
prgetmap(proc_t * p,int reserved,list_t * iolhead)1840 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1841 {
1842 struct as *as = p->p_as;
1843 prmap_t *mp;
1844 struct seg *seg;
1845 struct seg *brkseg, *stkseg;
1846 struct vnode *vp;
1847 struct vattr vattr;
1848 uint_t prot;
1849
1850 ASSERT(as != &kas && AS_WRITE_HELD(as));
1851
1852 /*
1853 * Request an initial buffer size that doesn't waste memory
1854 * if the address space has only a small number of segments.
1855 */
1856 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1857
1858 if ((seg = AS_SEGFIRST(as)) == NULL)
1859 return (0);
1860
1861 brkseg = break_seg(p);
1862 stkseg = as_segat(as, prgetstackbase(p));
1863
1864 do {
1865 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1866 caddr_t saddr, naddr;
1867 void *tmp = NULL;
1868
1869 if ((seg->s_flags & S_HOLE) != 0) {
1870 continue;
1871 }
1872
1873 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1874 prot = pr_getprot(seg, reserved, &tmp,
1875 &saddr, &naddr, eaddr);
1876 if (saddr == naddr)
1877 continue;
1878
1879 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1880
1881 mp->pr_vaddr = (uintptr_t)saddr;
1882 mp->pr_size = naddr - saddr;
1883 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1884 mp->pr_mflags = 0;
1885 if (prot & PROT_READ)
1886 mp->pr_mflags |= MA_READ;
1887 if (prot & PROT_WRITE)
1888 mp->pr_mflags |= MA_WRITE;
1889 if (prot & PROT_EXEC)
1890 mp->pr_mflags |= MA_EXEC;
1891 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1892 mp->pr_mflags |= MA_SHARED;
1893 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1894 mp->pr_mflags |= MA_NORESERVE;
1895 if (seg->s_ops == &segspt_shmops ||
1896 (seg->s_ops == &segvn_ops &&
1897 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1898 mp->pr_mflags |= MA_ANON;
1899 if (seg == brkseg)
1900 mp->pr_mflags |= MA_BREAK;
1901 else if (seg == stkseg) {
1902 mp->pr_mflags |= MA_STACK;
1903 if (reserved) {
1904 size_t maxstack =
1905 ((size_t)p->p_stk_ctl +
1906 PAGEOFFSET) & PAGEMASK;
1907 mp->pr_vaddr =
1908 (uintptr_t)prgetstackbase(p) +
1909 p->p_stksize - maxstack;
1910 mp->pr_size = (uintptr_t)naddr -
1911 mp->pr_vaddr;
1912 }
1913 }
1914 if (seg->s_ops == &segspt_shmops)
1915 mp->pr_mflags |= MA_ISM | MA_SHM;
1916 mp->pr_pagesize = PAGESIZE;
1917
1918 /*
1919 * Manufacture a filename for the "object" directory.
1920 */
1921 vattr.va_mask = AT_FSID|AT_NODEID;
1922 if (seg->s_ops == &segvn_ops &&
1923 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1924 vp != NULL && vp->v_type == VREG &&
1925 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1926 if (vp == p->p_exec)
1927 (void) strcpy(mp->pr_mapname, "a.out");
1928 else
1929 pr_object_name(mp->pr_mapname,
1930 vp, &vattr);
1931 }
1932
1933 /*
1934 * Get the SysV shared memory id, if any.
1935 */
1936 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1937 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1938 SHMID_NONE) {
1939 if (mp->pr_shmid == SHMID_FREE)
1940 mp->pr_shmid = -1;
1941
1942 mp->pr_mflags |= MA_SHM;
1943 } else {
1944 mp->pr_shmid = -1;
1945 }
1946 }
1947 ASSERT(tmp == NULL);
1948 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1949
1950 return (0);
1951 }
1952
1953 #ifdef _SYSCALL32_IMPL
1954 int
prgetmap32(proc_t * p,int reserved,list_t * iolhead)1955 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1956 {
1957 struct as *as = p->p_as;
1958 prmap32_t *mp;
1959 struct seg *seg;
1960 struct seg *brkseg, *stkseg;
1961 struct vnode *vp;
1962 struct vattr vattr;
1963 uint_t prot;
1964
1965 ASSERT(as != &kas && AS_WRITE_HELD(as));
1966
1967 /*
1968 * Request an initial buffer size that doesn't waste memory
1969 * if the address space has only a small number of segments.
1970 */
1971 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1972
1973 if ((seg = AS_SEGFIRST(as)) == NULL)
1974 return (0);
1975
1976 brkseg = break_seg(p);
1977 stkseg = as_segat(as, prgetstackbase(p));
1978
1979 do {
1980 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1981 caddr_t saddr, naddr;
1982 void *tmp = NULL;
1983
1984 if ((seg->s_flags & S_HOLE) != 0) {
1985 continue;
1986 }
1987
1988 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1989 prot = pr_getprot(seg, reserved, &tmp,
1990 &saddr, &naddr, eaddr);
1991 if (saddr == naddr)
1992 continue;
1993
1994 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1995
1996 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1997 mp->pr_size = (size32_t)(naddr - saddr);
1998 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1999 mp->pr_mflags = 0;
2000 if (prot & PROT_READ)
2001 mp->pr_mflags |= MA_READ;
2002 if (prot & PROT_WRITE)
2003 mp->pr_mflags |= MA_WRITE;
2004 if (prot & PROT_EXEC)
2005 mp->pr_mflags |= MA_EXEC;
2006 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2007 mp->pr_mflags |= MA_SHARED;
2008 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2009 mp->pr_mflags |= MA_NORESERVE;
2010 if (seg->s_ops == &segspt_shmops ||
2011 (seg->s_ops == &segvn_ops &&
2012 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2013 mp->pr_mflags |= MA_ANON;
2014 if (seg == brkseg)
2015 mp->pr_mflags |= MA_BREAK;
2016 else if (seg == stkseg) {
2017 mp->pr_mflags |= MA_STACK;
2018 if (reserved) {
2019 size_t maxstack =
2020 ((size_t)p->p_stk_ctl +
2021 PAGEOFFSET) & PAGEMASK;
2022 uintptr_t vaddr =
2023 (uintptr_t)prgetstackbase(p) +
2024 p->p_stksize - maxstack;
2025 mp->pr_vaddr = (caddr32_t)vaddr;
2026 mp->pr_size = (size32_t)
2027 ((uintptr_t)naddr - vaddr);
2028 }
2029 }
2030 if (seg->s_ops == &segspt_shmops)
2031 mp->pr_mflags |= MA_ISM | MA_SHM;
2032 mp->pr_pagesize = PAGESIZE;
2033
2034 /*
2035 * Manufacture a filename for the "object" directory.
2036 */
2037 vattr.va_mask = AT_FSID|AT_NODEID;
2038 if (seg->s_ops == &segvn_ops &&
2039 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2040 vp != NULL && vp->v_type == VREG &&
2041 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2042 if (vp == p->p_exec)
2043 (void) strcpy(mp->pr_mapname, "a.out");
2044 else
2045 pr_object_name(mp->pr_mapname,
2046 vp, &vattr);
2047 }
2048
2049 /*
2050 * Get the SysV shared memory id, if any.
2051 */
2052 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
2053 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
2054 SHMID_NONE) {
2055 if (mp->pr_shmid == SHMID_FREE)
2056 mp->pr_shmid = -1;
2057
2058 mp->pr_mflags |= MA_SHM;
2059 } else {
2060 mp->pr_shmid = -1;
2061 }
2062 }
2063 ASSERT(tmp == NULL);
2064 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2065
2066 return (0);
2067 }
2068 #endif /* _SYSCALL32_IMPL */
2069
2070 /*
2071 * Return the size of the /proc page data file.
2072 */
2073 size_t
prpdsize(struct as * as)2074 prpdsize(struct as *as)
2075 {
2076 struct seg *seg;
2077 size_t size;
2078
2079 ASSERT(as != &kas && AS_WRITE_HELD(as));
2080
2081 if ((seg = AS_SEGFIRST(as)) == NULL)
2082 return (0);
2083
2084 size = sizeof (prpageheader_t);
2085 do {
2086 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2087 caddr_t saddr, naddr;
2088 void *tmp = NULL;
2089 size_t npage;
2090
2091 if ((seg->s_flags & S_HOLE) != 0) {
2092 continue;
2093 }
2094
2095 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2096 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2097 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
2098 size += sizeof (prasmap_t) + round8(npage);
2099 }
2100 ASSERT(tmp == NULL);
2101 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2102
2103 return (size);
2104 }
2105
2106 #ifdef _SYSCALL32_IMPL
2107 size_t
prpdsize32(struct as * as)2108 prpdsize32(struct as *as)
2109 {
2110 struct seg *seg;
2111 size_t size;
2112
2113 ASSERT(as != &kas && AS_WRITE_HELD(as));
2114
2115 if ((seg = AS_SEGFIRST(as)) == NULL)
2116 return (0);
2117
2118 size = sizeof (prpageheader32_t);
2119 do {
2120 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2121 caddr_t saddr, naddr;
2122 void *tmp = NULL;
2123 size_t npage;
2124
2125 if ((seg->s_flags & S_HOLE) != 0) {
2126 continue;
2127 }
2128
2129 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2130 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2131 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
2132 size += sizeof (prasmap32_t) + round8(npage);
2133 }
2134 ASSERT(tmp == NULL);
2135 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2136
2137 return (size);
2138 }
2139 #endif /* _SYSCALL32_IMPL */
2140
2141 /*
2142 * Read page data information.
2143 */
2144 int
prpdread(proc_t * p,uint_t hatid,struct uio * uiop)2145 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
2146 {
2147 struct as *as = p->p_as;
2148 caddr_t buf;
2149 size_t size;
2150 prpageheader_t *php;
2151 prasmap_t *pmp;
2152 struct seg *seg;
2153 int error;
2154
2155 again:
2156 AS_LOCK_ENTER(as, RW_WRITER);
2157
2158 if ((seg = AS_SEGFIRST(as)) == NULL) {
2159 AS_LOCK_EXIT(as);
2160 return (0);
2161 }
2162 size = prpdsize(as);
2163 if (uiop->uio_resid < size) {
2164 AS_LOCK_EXIT(as);
2165 return (E2BIG);
2166 }
2167
2168 buf = kmem_zalloc(size, KM_SLEEP);
2169 php = (prpageheader_t *)buf;
2170 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
2171
2172 hrt2ts(gethrtime(), &php->pr_tstamp);
2173 php->pr_nmap = 0;
2174 php->pr_npage = 0;
2175 do {
2176 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2177 caddr_t saddr, naddr;
2178 void *tmp = NULL;
2179
2180 if ((seg->s_flags & S_HOLE) != 0) {
2181 continue;
2182 }
2183
2184 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2185 struct vnode *vp;
2186 struct vattr vattr;
2187 size_t len;
2188 size_t npage;
2189 uint_t prot;
2190 uintptr_t next;
2191
2192 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2193 if ((len = (size_t)(naddr - saddr)) == 0)
2194 continue;
2195 npage = len / PAGESIZE;
2196 next = (uintptr_t)(pmp + 1) + round8(npage);
2197 /*
2198 * It's possible that the address space can change
2199 * subtlely even though we're holding as->a_lock
2200 * due to the nondeterminism of page_exists() in
2201 * the presence of asychronously flushed pages or
2202 * mapped files whose sizes are changing.
2203 * page_exists() may be called indirectly from
2204 * pr_getprot() by a SEGOP_INCORE() routine.
2205 * If this happens we need to make sure we don't
2206 * overrun the buffer whose size we computed based
2207 * on the initial iteration through the segments.
2208 * Once we've detected an overflow, we need to clean
2209 * up the temporary memory allocated in pr_getprot()
2210 * and retry. If there's a pending signal, we return
2211 * EINTR so that this thread can be dislodged if
2212 * a latent bug causes us to spin indefinitely.
2213 */
2214 if (next > (uintptr_t)buf + size) {
2215 pr_getprot_done(&tmp);
2216 AS_LOCK_EXIT(as);
2217
2218 kmem_free(buf, size);
2219
2220 if (ISSIG(curthread, JUSTLOOKING))
2221 return (EINTR);
2222
2223 goto again;
2224 }
2225
2226 php->pr_nmap++;
2227 php->pr_npage += npage;
2228 pmp->pr_vaddr = (uintptr_t)saddr;
2229 pmp->pr_npage = npage;
2230 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2231 pmp->pr_mflags = 0;
2232 if (prot & PROT_READ)
2233 pmp->pr_mflags |= MA_READ;
2234 if (prot & PROT_WRITE)
2235 pmp->pr_mflags |= MA_WRITE;
2236 if (prot & PROT_EXEC)
2237 pmp->pr_mflags |= MA_EXEC;
2238 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2239 pmp->pr_mflags |= MA_SHARED;
2240 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2241 pmp->pr_mflags |= MA_NORESERVE;
2242 if (seg->s_ops == &segspt_shmops ||
2243 (seg->s_ops == &segvn_ops &&
2244 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2245 pmp->pr_mflags |= MA_ANON;
2246 if (seg->s_ops == &segspt_shmops)
2247 pmp->pr_mflags |= MA_ISM | MA_SHM;
2248 pmp->pr_pagesize = PAGESIZE;
2249 /*
2250 * Manufacture a filename for the "object" directory.
2251 */
2252 vattr.va_mask = AT_FSID|AT_NODEID;
2253 if (seg->s_ops == &segvn_ops &&
2254 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2255 vp != NULL && vp->v_type == VREG &&
2256 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2257 if (vp == p->p_exec)
2258 (void) strcpy(pmp->pr_mapname, "a.out");
2259 else
2260 pr_object_name(pmp->pr_mapname,
2261 vp, &vattr);
2262 }
2263
2264 /*
2265 * Get the SysV shared memory id, if any.
2266 */
2267 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2268 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2269 SHMID_NONE) {
2270 if (pmp->pr_shmid == SHMID_FREE)
2271 pmp->pr_shmid = -1;
2272
2273 pmp->pr_mflags |= MA_SHM;
2274 } else {
2275 pmp->pr_shmid = -1;
2276 }
2277
2278 hat_getstat(as, saddr, len, hatid,
2279 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2280 pmp = (prasmap_t *)next;
2281 }
2282 ASSERT(tmp == NULL);
2283 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2284
2285 AS_LOCK_EXIT(as);
2286
2287 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2288 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2289 kmem_free(buf, size);
2290
2291 return (error);
2292 }
2293
2294 #ifdef _SYSCALL32_IMPL
2295 int
prpdread32(proc_t * p,uint_t hatid,struct uio * uiop)2296 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2297 {
2298 struct as *as = p->p_as;
2299 caddr_t buf;
2300 size_t size;
2301 prpageheader32_t *php;
2302 prasmap32_t *pmp;
2303 struct seg *seg;
2304 int error;
2305
2306 again:
2307 AS_LOCK_ENTER(as, RW_WRITER);
2308
2309 if ((seg = AS_SEGFIRST(as)) == NULL) {
2310 AS_LOCK_EXIT(as);
2311 return (0);
2312 }
2313 size = prpdsize32(as);
2314 if (uiop->uio_resid < size) {
2315 AS_LOCK_EXIT(as);
2316 return (E2BIG);
2317 }
2318
2319 buf = kmem_zalloc(size, KM_SLEEP);
2320 php = (prpageheader32_t *)buf;
2321 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2322
2323 hrt2ts32(gethrtime(), &php->pr_tstamp);
2324 php->pr_nmap = 0;
2325 php->pr_npage = 0;
2326 do {
2327 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2328 caddr_t saddr, naddr;
2329 void *tmp = NULL;
2330
2331 if ((seg->s_flags & S_HOLE) != 0) {
2332 continue;
2333 }
2334
2335 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2336 struct vnode *vp;
2337 struct vattr vattr;
2338 size_t len;
2339 size_t npage;
2340 uint_t prot;
2341 uintptr_t next;
2342
2343 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2344 if ((len = (size_t)(naddr - saddr)) == 0)
2345 continue;
2346 npage = len / PAGESIZE;
2347 next = (uintptr_t)(pmp + 1) + round8(npage);
2348 /*
2349 * It's possible that the address space can change
2350 * subtlely even though we're holding as->a_lock
2351 * due to the nondeterminism of page_exists() in
2352 * the presence of asychronously flushed pages or
2353 * mapped files whose sizes are changing.
2354 * page_exists() may be called indirectly from
2355 * pr_getprot() by a SEGOP_INCORE() routine.
2356 * If this happens we need to make sure we don't
2357 * overrun the buffer whose size we computed based
2358 * on the initial iteration through the segments.
2359 * Once we've detected an overflow, we need to clean
2360 * up the temporary memory allocated in pr_getprot()
2361 * and retry. If there's a pending signal, we return
2362 * EINTR so that this thread can be dislodged if
2363 * a latent bug causes us to spin indefinitely.
2364 */
2365 if (next > (uintptr_t)buf + size) {
2366 pr_getprot_done(&tmp);
2367 AS_LOCK_EXIT(as);
2368
2369 kmem_free(buf, size);
2370
2371 if (ISSIG(curthread, JUSTLOOKING))
2372 return (EINTR);
2373
2374 goto again;
2375 }
2376
2377 php->pr_nmap++;
2378 php->pr_npage += npage;
2379 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2380 pmp->pr_npage = (size32_t)npage;
2381 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2382 pmp->pr_mflags = 0;
2383 if (prot & PROT_READ)
2384 pmp->pr_mflags |= MA_READ;
2385 if (prot & PROT_WRITE)
2386 pmp->pr_mflags |= MA_WRITE;
2387 if (prot & PROT_EXEC)
2388 pmp->pr_mflags |= MA_EXEC;
2389 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2390 pmp->pr_mflags |= MA_SHARED;
2391 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2392 pmp->pr_mflags |= MA_NORESERVE;
2393 if (seg->s_ops == &segspt_shmops ||
2394 (seg->s_ops == &segvn_ops &&
2395 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2396 pmp->pr_mflags |= MA_ANON;
2397 if (seg->s_ops == &segspt_shmops)
2398 pmp->pr_mflags |= MA_ISM | MA_SHM;
2399 pmp->pr_pagesize = PAGESIZE;
2400 /*
2401 * Manufacture a filename for the "object" directory.
2402 */
2403 vattr.va_mask = AT_FSID|AT_NODEID;
2404 if (seg->s_ops == &segvn_ops &&
2405 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2406 vp != NULL && vp->v_type == VREG &&
2407 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2408 if (vp == p->p_exec)
2409 (void) strcpy(pmp->pr_mapname, "a.out");
2410 else
2411 pr_object_name(pmp->pr_mapname,
2412 vp, &vattr);
2413 }
2414
2415 /*
2416 * Get the SysV shared memory id, if any.
2417 */
2418 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2419 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2420 SHMID_NONE) {
2421 if (pmp->pr_shmid == SHMID_FREE)
2422 pmp->pr_shmid = -1;
2423
2424 pmp->pr_mflags |= MA_SHM;
2425 } else {
2426 pmp->pr_shmid = -1;
2427 }
2428
2429 hat_getstat(as, saddr, len, hatid,
2430 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2431 pmp = (prasmap32_t *)next;
2432 }
2433 ASSERT(tmp == NULL);
2434 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2435
2436 AS_LOCK_EXIT(as);
2437
2438 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2439 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2440 kmem_free(buf, size);
2441
2442 return (error);
2443 }
2444 #endif /* _SYSCALL32_IMPL */
2445
2446 ushort_t
prgetpctcpu(uint64_t pct)2447 prgetpctcpu(uint64_t pct)
2448 {
2449 /*
2450 * The value returned will be relevant in the zone of the examiner,
2451 * which may not be the same as the zone which performed the procfs
2452 * mount.
2453 */
2454 int nonline = zone_ncpus_online_get(curproc->p_zone);
2455
2456 /*
2457 * Prorate over online cpus so we don't exceed 100%
2458 */
2459 if (nonline > 1)
2460 pct /= nonline;
2461 pct >>= 16; /* convert to 16-bit scaled integer */
2462 if (pct > 0x8000) /* might happen, due to rounding */
2463 pct = 0x8000;
2464 return ((ushort_t)pct);
2465 }
2466
2467 /*
2468 * Return information used by ps(1).
2469 */
2470 void
prgetpsinfo(proc_t * p,psinfo_t * psp)2471 prgetpsinfo(proc_t *p, psinfo_t *psp)
2472 {
2473 kthread_t *t;
2474 struct cred *cred;
2475 hrtime_t hrutime, hrstime;
2476
2477 ASSERT(MUTEX_HELD(&p->p_lock));
2478
2479 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2480 bzero(psp, sizeof (*psp));
2481 else {
2482 thread_unlock(t);
2483 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2484 }
2485
2486 /*
2487 * only export SSYS and SMSACCT; everything else is off-limits to
2488 * userland apps.
2489 */
2490 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2491 psp->pr_nlwp = p->p_lwpcnt;
2492 psp->pr_nzomb = p->p_zombcnt;
2493 mutex_enter(&p->p_crlock);
2494 cred = p->p_cred;
2495 psp->pr_uid = crgetruid(cred);
2496 psp->pr_euid = crgetuid(cred);
2497 psp->pr_gid = crgetrgid(cred);
2498 psp->pr_egid = crgetgid(cred);
2499 mutex_exit(&p->p_crlock);
2500 psp->pr_pid = p->p_pid;
2501 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2502 (p->p_flag & SZONETOP)) {
2503 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2504 /*
2505 * Inside local zones, fake zsched's pid as parent pids for
2506 * processes which reference processes outside of the zone.
2507 */
2508 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2509 } else {
2510 psp->pr_ppid = p->p_ppid;
2511 }
2512 psp->pr_pgid = p->p_pgrp;
2513 psp->pr_sid = p->p_sessp->s_sid;
2514 psp->pr_taskid = p->p_task->tk_tkid;
2515 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2516 psp->pr_poolid = p->p_pool->pool_id;
2517 psp->pr_zoneid = p->p_zone->zone_id;
2518 if ((psp->pr_contract = PRCTID(p)) == 0)
2519 psp->pr_contract = -1;
2520 psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2521 switch (p->p_model) {
2522 case DATAMODEL_ILP32:
2523 psp->pr_dmodel = PR_MODEL_ILP32;
2524 break;
2525 case DATAMODEL_LP64:
2526 psp->pr_dmodel = PR_MODEL_LP64;
2527 break;
2528 }
2529 hrutime = mstate_aggr_state(p, LMS_USER);
2530 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2531 hrt2ts((hrutime + hrstime), &psp->pr_time);
2532 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2533
2534 if (t == NULL) {
2535 int wcode = p->p_wcode; /* must be atomic read */
2536
2537 if (wcode)
2538 psp->pr_wstat = wstat(wcode, p->p_wdata);
2539 psp->pr_ttydev = PRNODEV;
2540 psp->pr_lwp.pr_state = SZOMB;
2541 psp->pr_lwp.pr_sname = 'Z';
2542 psp->pr_lwp.pr_bindpro = PBIND_NONE;
2543 psp->pr_lwp.pr_bindpset = PS_NONE;
2544 } else {
2545 user_t *up = PTOU(p);
2546 struct as *as;
2547 dev_t d;
2548 extern dev_t rwsconsdev, rconsdev, uconsdev;
2549
2550 d = cttydev(p);
2551 /*
2552 * If the controlling terminal is the real
2553 * or workstation console device, map to what the
2554 * user thinks is the console device. Handle case when
2555 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2556 */
2557 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2558 d = uconsdev;
2559 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2560 psp->pr_start = up->u_start;
2561 bcopy(up->u_comm, psp->pr_fname,
2562 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2563 bcopy(up->u_psargs, psp->pr_psargs,
2564 MIN(PRARGSZ-1, PSARGSZ));
2565 psp->pr_argc = up->u_argc;
2566 psp->pr_argv = up->u_argv;
2567 psp->pr_envp = up->u_envp;
2568
2569 /* get the chosen lwp's lwpsinfo */
2570 prgetlwpsinfo(t, &psp->pr_lwp);
2571
2572 /* compute %cpu for the process */
2573 if (p->p_lwpcnt == 1)
2574 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2575 else {
2576 uint64_t pct = 0;
2577 hrtime_t cur_time = gethrtime_unscaled();
2578
2579 t = p->p_tlist;
2580 do {
2581 pct += cpu_update_pct(t, cur_time);
2582 } while ((t = t->t_forw) != p->p_tlist);
2583
2584 psp->pr_pctcpu = prgetpctcpu(pct);
2585 }
2586 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2587 psp->pr_size = 0;
2588 psp->pr_rssize = 0;
2589 } else {
2590 mutex_exit(&p->p_lock);
2591 AS_LOCK_ENTER(as, RW_READER);
2592 psp->pr_size = btopr(as->a_resvsize) *
2593 (PAGESIZE / 1024);
2594 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2595 psp->pr_pctmem = rm_pctmemory(as);
2596 AS_LOCK_EXIT(as);
2597 mutex_enter(&p->p_lock);
2598 }
2599 }
2600 }
2601
2602 static size_t
prfdinfomisc(list_t * data,uint_t type,const void * val,size_t vlen)2603 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen)
2604 {
2605 pr_misc_header_t *misc;
2606 size_t len;
2607
2608 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2609
2610 if (data != NULL) {
2611 misc = pr_iol_newbuf(data, len);
2612 misc->pr_misc_type = type;
2613 misc->pr_misc_size = len;
2614 misc++;
2615 bcopy((char *)val, (char *)misc, vlen);
2616 }
2617
2618 return (len);
2619 }
2620
2621 /*
2622 * There's no elegant way to determine if a character device
2623 * supports TLI, so just check a hardcoded list of known TLI
2624 * devices.
2625 */
2626
2627 static boolean_t
pristli(vnode_t * vp)2628 pristli(vnode_t *vp)
2629 {
2630 static const char *tlidevs[] = {
2631 "udp", "udp6", "tcp", "tcp6"
2632 };
2633 char *devname;
2634 uint_t i;
2635
2636 ASSERT(vp != NULL);
2637
2638 if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0)
2639 return (B_FALSE);
2640
2641 if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL)
2642 return (B_FALSE);
2643
2644 for (i = 0; i < ARRAY_SIZE(tlidevs); i++) {
2645 if (strcmp(devname, tlidevs[i]) == 0)
2646 return (B_TRUE);
2647 }
2648
2649 return (B_FALSE);
2650 }
2651
2652 static size_t
prfdinfopath(proc_t * p,vnode_t * vp,list_t * data,cred_t * cred)2653 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
2654 {
2655 char *pathname;
2656 size_t pathlen;
2657 size_t sz = 0;
2658
2659 /*
2660 * The global zone's path to a file in a non-global zone can exceed
2661 * MAXPATHLEN.
2662 */
2663 pathlen = MAXPATHLEN * 2 + 1;
2664 pathname = kmem_alloc(pathlen, KM_SLEEP);
2665
2666 if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
2667 sz += prfdinfomisc(data, PR_PATHNAME,
2668 pathname, strlen(pathname) + 1);
2669 }
2670
2671 kmem_free(pathname, pathlen);
2672
2673 return (sz);
2674 }
2675
2676 static size_t
prfdinfotlisockopt(vnode_t * vp,list_t * data,cred_t * cred)2677 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
2678 {
2679 strcmd_t strcmd;
2680 int32_t rval;
2681 size_t sz = 0;
2682
2683 strcmd.sc_cmd = TI_GETMYNAME;
2684 strcmd.sc_timeout = 1;
2685 strcmd.sc_len = STRCMDBUFSIZE;
2686
2687 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2688 &rval, NULL) == 0 && strcmd.sc_len > 0) {
2689 sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
2690 strcmd.sc_len);
2691 }
2692
2693 strcmd.sc_cmd = TI_GETPEERNAME;
2694 strcmd.sc_timeout = 1;
2695 strcmd.sc_len = STRCMDBUFSIZE;
2696
2697 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2698 &rval, NULL) == 0 && strcmd.sc_len > 0) {
2699 sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
2700 strcmd.sc_len);
2701 }
2702
2703 return (sz);
2704 }
2705
2706 static size_t
prfdinfosockopt(vnode_t * vp,list_t * data,cred_t * cred)2707 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
2708 {
2709 sonode_t *so;
2710 socklen_t vlen;
2711 size_t sz = 0;
2712 uint_t i;
2713
2714 if (vp->v_stream != NULL) {
2715 so = VTOSO(vp->v_stream->sd_vnode);
2716
2717 if (so->so_version == SOV_STREAM)
2718 so = NULL;
2719 } else {
2720 so = VTOSO(vp);
2721 }
2722
2723 if (so == NULL)
2724 return (0);
2725
2726 DTRACE_PROBE1(sonode, sonode_t *, so);
2727
2728 /* prmisc - PR_SOCKETNAME */
2729
2730 struct sockaddr_storage buf;
2731 struct sockaddr *name = (struct sockaddr *)&buf;
2732
2733 vlen = sizeof (buf);
2734 if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
2735 sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);
2736
2737 /* prmisc - PR_PEERSOCKNAME */
2738
2739 vlen = sizeof (buf);
2740 if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
2741 sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);
2742
2743 /* prmisc - PR_SOCKOPTS_BOOL_OPTS */
2744
2745 static struct boolopt {
2746 int level;
2747 int opt;
2748 int bopt;
2749 } boolopts[] = {
2750 { SOL_SOCKET, SO_DEBUG, PR_SO_DEBUG },
2751 { SOL_SOCKET, SO_REUSEADDR, PR_SO_REUSEADDR },
2752 #ifdef SO_REUSEPORT
2753 /* SmartOS and OmniOS have SO_REUSEPORT */
2754 { SOL_SOCKET, SO_REUSEPORT, PR_SO_REUSEPORT },
2755 #endif
2756 { SOL_SOCKET, SO_KEEPALIVE, PR_SO_KEEPALIVE },
2757 { SOL_SOCKET, SO_DONTROUTE, PR_SO_DONTROUTE },
2758 { SOL_SOCKET, SO_BROADCAST, PR_SO_BROADCAST },
2759 { SOL_SOCKET, SO_OOBINLINE, PR_SO_OOBINLINE },
2760 { SOL_SOCKET, SO_DGRAM_ERRIND, PR_SO_DGRAM_ERRIND },
2761 { SOL_SOCKET, SO_ALLZONES, PR_SO_ALLZONES },
2762 { SOL_SOCKET, SO_MAC_EXEMPT, PR_SO_MAC_EXEMPT },
2763 { SOL_SOCKET, SO_MAC_IMPLICIT, PR_SO_MAC_IMPLICIT },
2764 { SOL_SOCKET, SO_EXCLBIND, PR_SO_EXCLBIND },
2765 { SOL_SOCKET, SO_VRRP, PR_SO_VRRP },
2766 { IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
2767 PR_UDP_NAT_T_ENDPOINT }
2768 };
2769 prsockopts_bool_opts_t opts;
2770 int val;
2771
2772 if (data != NULL) {
2773 opts.prsock_bool_opts = 0;
2774
2775 for (i = 0; i < ARRAY_SIZE(boolopts); i++) {
2776 vlen = sizeof (val);
2777 if (SOP_GETSOCKOPT(so, boolopts[i].level,
2778 boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
2779 val != 0) {
2780 opts.prsock_bool_opts |= boolopts[i].bopt;
2781 }
2782 }
2783 }
2784
2785 sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));
2786
2787 /* prmisc - PR_SOCKOPT_LINGER */
2788
2789 struct linger l;
2790
2791 vlen = sizeof (l);
2792 if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
2793 0, cred) == 0 && vlen > 0) {
2794 sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
2795 }
2796
2797 /* prmisc - PR_SOCKOPT_* int types */
2798
2799 static struct sopt {
2800 int level;
2801 int opt;
2802 int bopt;
2803 } sopts[] = {
2804 { SOL_SOCKET, SO_TYPE, PR_SOCKOPT_TYPE },
2805 { SOL_SOCKET, SO_SNDBUF, PR_SOCKOPT_SNDBUF },
2806 { SOL_SOCKET, SO_RCVBUF, PR_SOCKOPT_RCVBUF }
2807 };
2808
2809 for (i = 0; i < ARRAY_SIZE(sopts); i++) {
2810 vlen = sizeof (val);
2811 if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
2812 &val, &vlen, 0, cred) == 0 && vlen > 0) {
2813 sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
2814 }
2815 }
2816
2817 /* prmisc - PR_SOCKOPT_IP_NEXTHOP */
2818
2819 in_addr_t nexthop_val;
2820
2821 vlen = sizeof (nexthop_val);
2822 if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
2823 &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
2824 sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
2825 &nexthop_val, vlen);
2826 }
2827
2828 /* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */
2829
2830 struct sockaddr_in6 nexthop6_val;
2831
2832 vlen = sizeof (nexthop6_val);
2833 if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
2834 &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
2835 sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
2836 &nexthop6_val, vlen);
2837 }
2838
2839 /* prmisc - PR_SOCKOPT_TCP_CONGESTION */
2840
2841 char cong[CC_ALGO_NAME_MAX];
2842
2843 vlen = sizeof (cong);
2844 if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
2845 &cong, &vlen, 0, cred) == 0 && vlen > 0) {
2846 sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
2847 }
2848
2849 /* prmisc - PR_SOCKFILTERS_PRIV */
2850
2851 struct fil_info fi;
2852
2853 vlen = sizeof (fi);
2854 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2855 &fi, &vlen, 0, cred) == 0 && vlen != 0) {
2856 pr_misc_header_t *misc;
2857 size_t len;
2858
2859 /*
2860 * We limit the number of returned filters to 32.
2861 * This is the maximum number that pfiles will print
2862 * anyway.
2863 */
2864 vlen = MIN(32, fi.fi_pos + 1);
2865 vlen *= sizeof (fi);
2866
2867 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2868 sz += len;
2869
2870 if (data != NULL) {
2871 /*
2872 * So that the filter list can be built incrementally,
2873 * prfdinfomisc() is not used here. Instead we
2874 * allocate a buffer directly on the copyout list using
2875 * pr_iol_newbuf()
2876 */
2877 misc = pr_iol_newbuf(data, len);
2878 misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
2879 misc->pr_misc_size = len;
2880 misc++;
2881 len = vlen;
2882 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2883 misc, &vlen, 0, cred) == 0) {
2884 /*
2885 * In case the number of filters has reduced
2886 * since the first call, explicitly zero out
2887 * any unpopulated space.
2888 */
2889 if (vlen < len)
2890 bzero(misc + vlen, len - vlen);
2891 } else {
2892 /* Something went wrong, zero out the result */
2893 bzero(misc, vlen);
2894 }
2895 }
2896 }
2897
2898 return (sz);
2899 }
2900
2901 typedef struct prfdinfo_nm_path_cbdata {
2902 proc_t *nmp_p;
2903 u_offset_t nmp_sz;
2904 list_t *nmp_data;
2905 } prfdinfo_nm_path_cbdata_t;
2906
2907 static int
prfdinfo_nm_path(const struct namenode * np,cred_t * cred,void * arg)2908 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg)
2909 {
2910 prfdinfo_nm_path_cbdata_t *cb = arg;
2911
2912 cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred);
2913
2914 return (0);
2915 }
2916
2917 u_offset_t
prgetfdinfosize(proc_t * p,vnode_t * vp,cred_t * cred)2918 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
2919 {
2920 u_offset_t sz;
2921
2922 /*
2923 * All fdinfo files will be at least this big -
2924 * sizeof fdinfo struct + zero length trailer
2925 */
2926 sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t);
2927
2928 /* Pathname */
2929 switch (vp->v_type) {
2930 case VDOOR: {
2931 prfdinfo_nm_path_cbdata_t cb = {
2932 .nmp_p = p,
2933 .nmp_data = NULL,
2934 .nmp_sz = 0
2935 };
2936
2937 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2938 sz += cb.nmp_sz;
2939 break;
2940 }
2941 case VSOCK:
2942 break;
2943 default:
2944 sz += prfdinfopath(p, vp, NULL, cred);
2945 }
2946
2947 /* Socket options */
2948 if (vp->v_type == VSOCK)
2949 sz += prfdinfosockopt(vp, NULL, cred);
2950
2951 /* TLI/XTI sockets */
2952 if (pristli(vp))
2953 sz += prfdinfotlisockopt(vp, NULL, cred);
2954
2955 return (sz);
2956 }
2957
2958 int
prgetfdinfo(proc_t * p,vnode_t * vp,prfdinfo_t * fdinfo,cred_t * cred,cred_t * file_cred,list_t * data)2959 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
2960 cred_t *file_cred, list_t *data)
2961 {
2962 vattr_t vattr;
2963 int error;
2964
2965 /*
2966 * The buffer has been initialised to zero by pr_iol_newbuf().
2967 * Initialise defaults for any values that should not default to zero.
2968 */
2969 fdinfo->pr_uid = (uid_t)-1;
2970 fdinfo->pr_gid = (gid_t)-1;
2971 fdinfo->pr_size = -1;
2972 fdinfo->pr_locktype = F_UNLCK;
2973 fdinfo->pr_lockpid = -1;
2974 fdinfo->pr_locksysid = -1;
2975 fdinfo->pr_peerpid = -1;
2976
2977 /* Offset */
2978
2979 /*
2980 * pr_offset has already been set from the underlying file_t.
2981 * Check if it is plausible and reset to -1 if not.
2982 */
2983 if (fdinfo->pr_offset != -1 &&
2984 VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0)
2985 fdinfo->pr_offset = -1;
2986
2987 /*
2988 * Attributes
2989 *
2990 * We have two cred_t structures available here.
2991 * 'cred' is the caller's credential, and 'file_cred' is the credential
2992 * for the file being inspected.
2993 *
2994 * When looking up the file attributes, file_cred is used in order
2995 * that the correct ownership is set for doors and FIFOs. Since the
2996 * caller has permission to read the fdinfo file in proc, this does
2997 * not expose any additional information.
2998 */
2999 vattr.va_mask = AT_STAT;
3000 if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) {
3001 fdinfo->pr_major = getmajor(vattr.va_fsid);
3002 fdinfo->pr_minor = getminor(vattr.va_fsid);
3003 fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
3004 fdinfo->pr_rminor = getminor(vattr.va_rdev);
3005 fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
3006 fdinfo->pr_size = (off64_t)vattr.va_size;
3007 fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
3008 fdinfo->pr_uid = vattr.va_uid;
3009 fdinfo->pr_gid = vattr.va_gid;
3010 if (vp->v_type == VSOCK)
3011 fdinfo->pr_fileflags |= sock_getfasync(vp);
3012 }
3013
3014 /* locks */
3015
3016 flock64_t bf;
3017
3018 bzero(&bf, sizeof (bf));
3019 bf.l_type = F_WRLCK;
3020
3021 if (VOP_FRLOCK(vp, F_GETLK, &bf,
3022 (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL,
3023 cred, NULL) == 0 && bf.l_type != F_UNLCK) {
3024 fdinfo->pr_locktype = bf.l_type;
3025 fdinfo->pr_lockpid = bf.l_pid;
3026 fdinfo->pr_locksysid = bf.l_sysid;
3027 }
3028
3029 /* peer cred */
3030
3031 k_peercred_t kpc;
3032
3033 switch (vp->v_type) {
3034 case VFIFO:
3035 case VSOCK: {
3036 int32_t rval;
3037
3038 error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc,
3039 FKIOCTL, cred, &rval, NULL);
3040 break;
3041 }
3042 case VCHR: {
3043 struct strioctl strioc;
3044 int32_t rval;
3045
3046 if (vp->v_stream == NULL) {
3047 error = ENOTSUP;
3048 break;
3049 }
3050 strioc.ic_cmd = _I_GETPEERCRED;
3051 strioc.ic_timout = INFTIM;
3052 strioc.ic_len = (int)sizeof (k_peercred_t);
3053 strioc.ic_dp = (char *)&kpc;
3054
3055 error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL,
3056 STR_NOSIG | K_TO_K, cred, &rval);
3057 break;
3058 }
3059 default:
3060 error = ENOTSUP;
3061 break;
3062 }
3063
3064 if (error == 0 && kpc.pc_cr != NULL) {
3065 proc_t *peerp;
3066
3067 fdinfo->pr_peerpid = kpc.pc_cpid;
3068
3069 crfree(kpc.pc_cr);
3070
3071 mutex_enter(&pidlock);
3072 if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) {
3073 user_t *up;
3074
3075 mutex_enter(&peerp->p_lock);
3076 mutex_exit(&pidlock);
3077
3078 up = PTOU(peerp);
3079 bcopy(up->u_comm, fdinfo->pr_peername,
3080 MIN(sizeof (up->u_comm),
3081 sizeof (fdinfo->pr_peername) - 1));
3082
3083 mutex_exit(&peerp->p_lock);
3084 } else {
3085 mutex_exit(&pidlock);
3086 }
3087 }
3088
3089 /* pathname */
3090
3091 switch (vp->v_type) {
3092 case VDOOR: {
3093 prfdinfo_nm_path_cbdata_t cb = {
3094 .nmp_p = p,
3095 .nmp_data = data,
3096 .nmp_sz = 0
3097 };
3098
3099 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
3100 break;
3101 }
3102 case VSOCK:
3103 /*
3104 * Don't attempt to determine the path for a socket as the
3105 * vnode has no associated v_path. It will cause a linear scan
3106 * of the dnlc table and result in no path being found.
3107 */
3108 break;
3109 default:
3110 (void) prfdinfopath(p, vp, data, cred);
3111 }
3112
3113 /* socket options */
3114 if (vp->v_type == VSOCK)
3115 (void) prfdinfosockopt(vp, data, cred);
3116
3117 /* TLI/XTI stream sockets */
3118 if (pristli(vp))
3119 (void) prfdinfotlisockopt(vp, data, cred);
3120
3121 /*
3122 * Add a terminating header with a zero size.
3123 */
3124 pr_misc_header_t *misc;
3125
3126 misc = pr_iol_newbuf(data, sizeof (*misc));
3127 misc->pr_misc_size = 0;
3128 misc->pr_misc_type = (uint_t)-1;
3129
3130 return (0);
3131 }
3132
3133 #ifdef _SYSCALL32_IMPL
3134 void
prgetpsinfo32(proc_t * p,psinfo32_t * psp)3135 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
3136 {
3137 kthread_t *t;
3138 struct cred *cred;
3139 hrtime_t hrutime, hrstime;
3140
3141 ASSERT(MUTEX_HELD(&p->p_lock));
3142
3143 if ((t = prchoose(p)) == NULL) /* returns locked thread */
3144 bzero(psp, sizeof (*psp));
3145 else {
3146 thread_unlock(t);
3147 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
3148 }
3149
3150 /*
3151 * only export SSYS and SMSACCT; everything else is off-limits to
3152 * userland apps.
3153 */
3154 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
3155 psp->pr_nlwp = p->p_lwpcnt;
3156 psp->pr_nzomb = p->p_zombcnt;
3157 mutex_enter(&p->p_crlock);
3158 cred = p->p_cred;
3159 psp->pr_uid = crgetruid(cred);
3160 psp->pr_euid = crgetuid(cred);
3161 psp->pr_gid = crgetrgid(cred);
3162 psp->pr_egid = crgetgid(cred);
3163 mutex_exit(&p->p_crlock);
3164 psp->pr_pid = p->p_pid;
3165 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
3166 (p->p_flag & SZONETOP)) {
3167 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
3168 /*
3169 * Inside local zones, fake zsched's pid as parent pids for
3170 * processes which reference processes outside of the zone.
3171 */
3172 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
3173 } else {
3174 psp->pr_ppid = p->p_ppid;
3175 }
3176 psp->pr_pgid = p->p_pgrp;
3177 psp->pr_sid = p->p_sessp->s_sid;
3178 psp->pr_taskid = p->p_task->tk_tkid;
3179 psp->pr_projid = p->p_task->tk_proj->kpj_id;
3180 psp->pr_poolid = p->p_pool->pool_id;
3181 psp->pr_zoneid = p->p_zone->zone_id;
3182 if ((psp->pr_contract = PRCTID(p)) == 0)
3183 psp->pr_contract = -1;
3184 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
3185 switch (p->p_model) {
3186 case DATAMODEL_ILP32:
3187 psp->pr_dmodel = PR_MODEL_ILP32;
3188 break;
3189 case DATAMODEL_LP64:
3190 psp->pr_dmodel = PR_MODEL_LP64;
3191 break;
3192 }
3193 hrutime = mstate_aggr_state(p, LMS_USER);
3194 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
3195 hrt2ts32(hrutime + hrstime, &psp->pr_time);
3196 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
3197
3198 if (t == NULL) {
3199 extern int wstat(int, int); /* needs a header file */
3200 int wcode = p->p_wcode; /* must be atomic read */
3201
3202 if (wcode)
3203 psp->pr_wstat = wstat(wcode, p->p_wdata);
3204 psp->pr_ttydev = PRNODEV32;
3205 psp->pr_lwp.pr_state = SZOMB;
3206 psp->pr_lwp.pr_sname = 'Z';
3207 } else {
3208 user_t *up = PTOU(p);
3209 struct as *as;
3210 dev_t d;
3211 extern dev_t rwsconsdev, rconsdev, uconsdev;
3212
3213 d = cttydev(p);
3214 /*
3215 * If the controlling terminal is the real
3216 * or workstation console device, map to what the
3217 * user thinks is the console device. Handle case when
3218 * rwsconsdev or rconsdev is set to NODEV for Starfire.
3219 */
3220 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
3221 d = uconsdev;
3222 (void) cmpldev(&psp->pr_ttydev, d);
3223 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
3224 bcopy(up->u_comm, psp->pr_fname,
3225 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
3226 bcopy(up->u_psargs, psp->pr_psargs,
3227 MIN(PRARGSZ-1, PSARGSZ));
3228 psp->pr_argc = up->u_argc;
3229 psp->pr_argv = (caddr32_t)up->u_argv;
3230 psp->pr_envp = (caddr32_t)up->u_envp;
3231
3232 /* get the chosen lwp's lwpsinfo */
3233 prgetlwpsinfo32(t, &psp->pr_lwp);
3234
3235 /* compute %cpu for the process */
3236 if (p->p_lwpcnt == 1)
3237 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
3238 else {
3239 uint64_t pct = 0;
3240 hrtime_t cur_time;
3241
3242 t = p->p_tlist;
3243 cur_time = gethrtime_unscaled();
3244 do {
3245 pct += cpu_update_pct(t, cur_time);
3246 } while ((t = t->t_forw) != p->p_tlist);
3247
3248 psp->pr_pctcpu = prgetpctcpu(pct);
3249 }
3250 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
3251 psp->pr_size = 0;
3252 psp->pr_rssize = 0;
3253 } else {
3254 mutex_exit(&p->p_lock);
3255 AS_LOCK_ENTER(as, RW_READER);
3256 psp->pr_size = (size32_t)
3257 (btopr(as->a_resvsize) * (PAGESIZE / 1024));
3258 psp->pr_rssize = (size32_t)
3259 (rm_asrss(as) * (PAGESIZE / 1024));
3260 psp->pr_pctmem = rm_pctmemory(as);
3261 AS_LOCK_EXIT(as);
3262 mutex_enter(&p->p_lock);
3263 }
3264 }
3265
3266 /*
3267 * If we are looking at an LP64 process, zero out
3268 * the fields that cannot be represented in ILP32.
3269 */
3270 if (p->p_model != DATAMODEL_ILP32) {
3271 psp->pr_size = 0;
3272 psp->pr_rssize = 0;
3273 psp->pr_argv = 0;
3274 psp->pr_envp = 0;
3275 }
3276 }
3277
3278 #endif /* _SYSCALL32_IMPL */
3279
3280 void
prgetlwpsinfo(kthread_t * t,lwpsinfo_t * psp)3281 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
3282 {
3283 klwp_t *lwp = ttolwp(t);
3284 sobj_ops_t *sobj;
3285 char c, state;
3286 uint64_t pct;
3287 int retval, niceval;
3288 hrtime_t hrutime, hrstime;
3289
3290 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3291
3292 bzero(psp, sizeof (*psp));
3293
3294 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
3295 psp->pr_lwpid = t->t_tid;
3296 psp->pr_addr = (uintptr_t)t;
3297 psp->pr_wchan = (uintptr_t)t->t_wchan;
3298
3299 /* map the thread state enum into a process state enum */
3300 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3301 switch (state) {
3302 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
3303 case TS_RUN: state = SRUN; c = 'R'; break;
3304 case TS_ONPROC: state = SONPROC; c = 'O'; break;
3305 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
3306 case TS_STOPPED: state = SSTOP; c = 'T'; break;
3307 case TS_WAIT: state = SWAIT; c = 'W'; break;
3308 default: state = 0; c = '?'; break;
3309 }
3310 psp->pr_state = state;
3311 psp->pr_sname = c;
3312 if ((sobj = t->t_sobj_ops) != NULL)
3313 psp->pr_stype = SOBJ_TYPE(sobj);
3314 retval = CL_DONICE(t, NULL, 0, &niceval);
3315 if (retval == 0) {
3316 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3317 psp->pr_nice = niceval + NZERO;
3318 }
3319 psp->pr_syscall = t->t_sysnum;
3320 psp->pr_pri = t->t_pri;
3321 psp->pr_start.tv_sec = t->t_start;
3322 psp->pr_start.tv_nsec = 0L;
3323 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3324 scalehrtime(&hrutime);
3325 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3326 lwp->lwp_mstate.ms_acct[LMS_TRAP];
3327 scalehrtime(&hrstime);
3328 hrt2ts(hrutime + hrstime, &psp->pr_time);
3329 /* compute %cpu for the lwp */
3330 pct = cpu_update_pct(t, gethrtime_unscaled());
3331 psp->pr_pctcpu = prgetpctcpu(pct);
3332 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
3333 if (psp->pr_cpu > 99)
3334 psp->pr_cpu = 99;
3335
3336 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3337 sizeof (psp->pr_clname) - 1);
3338 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
3339 psp->pr_onpro = t->t_cpu->cpu_id;
3340 psp->pr_bindpro = t->t_bind_cpu;
3341 psp->pr_bindpset = t->t_bind_pset;
3342 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3343 }
3344
3345 #ifdef _SYSCALL32_IMPL
3346 void
prgetlwpsinfo32(kthread_t * t,lwpsinfo32_t * psp)3347 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
3348 {
3349 klwp_t *lwp = ttolwp(t);
3350 sobj_ops_t *sobj;
3351 char c, state;
3352 uint64_t pct;
3353 int retval, niceval;
3354 hrtime_t hrutime, hrstime;
3355
3356 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3357
3358 bzero(psp, sizeof (*psp));
3359
3360 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
3361 psp->pr_lwpid = t->t_tid;
3362 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
3363 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */
3364
3365 /* map the thread state enum into a process state enum */
3366 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3367 switch (state) {
3368 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
3369 case TS_RUN: state = SRUN; c = 'R'; break;
3370 case TS_ONPROC: state = SONPROC; c = 'O'; break;
3371 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
3372 case TS_STOPPED: state = SSTOP; c = 'T'; break;
3373 case TS_WAIT: state = SWAIT; c = 'W'; break;
3374 default: state = 0; c = '?'; break;
3375 }
3376 psp->pr_state = state;
3377 psp->pr_sname = c;
3378 if ((sobj = t->t_sobj_ops) != NULL)
3379 psp->pr_stype = SOBJ_TYPE(sobj);
3380 retval = CL_DONICE(t, NULL, 0, &niceval);
3381 if (retval == 0) {
3382 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3383 psp->pr_nice = niceval + NZERO;
3384 } else {
3385 psp->pr_oldpri = 0;
3386 psp->pr_nice = 0;
3387 }
3388 psp->pr_syscall = t->t_sysnum;
3389 psp->pr_pri = t->t_pri;
3390 psp->pr_start.tv_sec = (time32_t)t->t_start;
3391 psp->pr_start.tv_nsec = 0L;
3392 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3393 scalehrtime(&hrutime);
3394 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3395 lwp->lwp_mstate.ms_acct[LMS_TRAP];
3396 scalehrtime(&hrstime);
3397 hrt2ts32(hrutime + hrstime, &psp->pr_time);
3398 /* compute %cpu for the lwp */
3399 pct = cpu_update_pct(t, gethrtime_unscaled());
3400 psp->pr_pctcpu = prgetpctcpu(pct);
3401 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
3402 if (psp->pr_cpu > 99)
3403 psp->pr_cpu = 99;
3404
3405 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3406 sizeof (psp->pr_clname) - 1);
3407 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
3408 psp->pr_onpro = t->t_cpu->cpu_id;
3409 psp->pr_bindpro = t->t_bind_cpu;
3410 psp->pr_bindpset = t->t_bind_pset;
3411 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3412 }
3413 #endif /* _SYSCALL32_IMPL */
3414
3415 #ifdef _SYSCALL32_IMPL
3416
3417 #define PR_COPY_FIELD(s, d, field) d->field = s->field
3418
3419 #define PR_COPY_FIELD_ILP32(s, d, field) \
3420 if (s->pr_dmodel == PR_MODEL_ILP32) { \
3421 d->field = s->field; \
3422 }
3423
3424 #define PR_COPY_TIMESPEC(s, d, field) \
3425 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
3426
3427 #define PR_COPY_BUF(s, d, field) \
3428 bcopy(s->field, d->field, sizeof (d->field));
3429
3430 #define PR_IGNORE_FIELD(s, d, field)
3431
3432 void
lwpsinfo_kto32(const struct lwpsinfo * src,struct lwpsinfo32 * dest)3433 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
3434 {
3435 bzero(dest, sizeof (*dest));
3436
3437 PR_COPY_FIELD(src, dest, pr_flag);
3438 PR_COPY_FIELD(src, dest, pr_lwpid);
3439 PR_IGNORE_FIELD(src, dest, pr_addr);
3440 PR_IGNORE_FIELD(src, dest, pr_wchan);
3441 PR_COPY_FIELD(src, dest, pr_stype);
3442 PR_COPY_FIELD(src, dest, pr_state);
3443 PR_COPY_FIELD(src, dest, pr_sname);
3444 PR_COPY_FIELD(src, dest, pr_nice);
3445 PR_COPY_FIELD(src, dest, pr_syscall);
3446 PR_COPY_FIELD(src, dest, pr_oldpri);
3447 PR_COPY_FIELD(src, dest, pr_cpu);
3448 PR_COPY_FIELD(src, dest, pr_pri);
3449 PR_COPY_FIELD(src, dest, pr_pctcpu);
3450 PR_COPY_TIMESPEC(src, dest, pr_start);
3451 PR_COPY_BUF(src, dest, pr_clname);
3452 PR_COPY_BUF(src, dest, pr_name);
3453 PR_COPY_FIELD(src, dest, pr_onpro);
3454 PR_COPY_FIELD(src, dest, pr_bindpro);
3455 PR_COPY_FIELD(src, dest, pr_bindpset);
3456 PR_COPY_FIELD(src, dest, pr_lgrp);
3457 }
3458
3459 void
psinfo_kto32(const struct psinfo * src,struct psinfo32 * dest)3460 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
3461 {
3462 bzero(dest, sizeof (*dest));
3463
3464 PR_COPY_FIELD(src, dest, pr_flag);
3465 PR_COPY_FIELD(src, dest, pr_nlwp);
3466 PR_COPY_FIELD(src, dest, pr_pid);
3467 PR_COPY_FIELD(src, dest, pr_ppid);
3468 PR_COPY_FIELD(src, dest, pr_pgid);
3469 PR_COPY_FIELD(src, dest, pr_sid);
3470 PR_COPY_FIELD(src, dest, pr_uid);
3471 PR_COPY_FIELD(src, dest, pr_euid);
3472 PR_COPY_FIELD(src, dest, pr_gid);
3473 PR_COPY_FIELD(src, dest, pr_egid);
3474 PR_IGNORE_FIELD(src, dest, pr_addr);
3475 PR_COPY_FIELD_ILP32(src, dest, pr_size);
3476 PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
3477 PR_COPY_FIELD(src, dest, pr_ttydev);
3478 PR_COPY_FIELD(src, dest, pr_pctcpu);
3479 PR_COPY_FIELD(src, dest, pr_pctmem);
3480 PR_COPY_TIMESPEC(src, dest, pr_start);
3481 PR_COPY_TIMESPEC(src, dest, pr_time);
3482 PR_COPY_TIMESPEC(src, dest, pr_ctime);
3483 PR_COPY_BUF(src, dest, pr_fname);
3484 PR_COPY_BUF(src, dest, pr_psargs);
3485 PR_COPY_FIELD(src, dest, pr_wstat);
3486 PR_COPY_FIELD(src, dest, pr_argc);
3487 PR_COPY_FIELD_ILP32(src, dest, pr_argv);
3488 PR_COPY_FIELD_ILP32(src, dest, pr_envp);
3489 PR_COPY_FIELD(src, dest, pr_dmodel);
3490 PR_COPY_FIELD(src, dest, pr_taskid);
3491 PR_COPY_FIELD(src, dest, pr_projid);
3492 PR_COPY_FIELD(src, dest, pr_nzomb);
3493 PR_COPY_FIELD(src, dest, pr_poolid);
3494 PR_COPY_FIELD(src, dest, pr_contract);
3495 PR_COPY_FIELD(src, dest, pr_poolid);
3496 PR_COPY_FIELD(src, dest, pr_poolid);
3497
3498 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
3499 }
3500
3501 #undef PR_COPY_FIELD
3502 #undef PR_COPY_FIELD_ILP32
3503 #undef PR_COPY_TIMESPEC
3504 #undef PR_COPY_BUF
3505 #undef PR_IGNORE_FIELD
3506
3507 #endif /* _SYSCALL32_IMPL */
3508
3509 /*
3510 * This used to get called when microstate accounting was disabled but
3511 * microstate information was requested. Since Microstate accounting is on
3512 * regardless of the proc flags, this simply makes it appear to procfs that
3513 * microstate accounting is on. This is relatively meaningless since you
3514 * can't turn it off, but this is here for the sake of appearances.
3515 */
3516
3517 /*ARGSUSED*/
3518 void
estimate_msacct(kthread_t * t,hrtime_t curtime)3519 estimate_msacct(kthread_t *t, hrtime_t curtime)
3520 {
3521 proc_t *p;
3522
3523 if (t == NULL)
3524 return;
3525
3526 p = ttoproc(t);
3527 ASSERT(MUTEX_HELD(&p->p_lock));
3528
3529 /*
3530 * A system process (p0) could be referenced if the thread is
3531 * in the process of exiting. Don't turn on microstate accounting
3532 * in that case.
3533 */
3534 if (p->p_flag & SSYS)
3535 return;
3536
3537 /*
3538 * Loop through all the LWPs (kernel threads) in the process.
3539 */
3540 t = p->p_tlist;
3541 do {
3542 t->t_proc_flag |= TP_MSACCT;
3543 } while ((t = t->t_forw) != p->p_tlist);
3544
3545 p->p_flag |= SMSACCT; /* set process-wide MSACCT */
3546 }
3547
3548 /*
3549 * It's not really possible to disable microstate accounting anymore.
3550 * However, this routine simply turns off the ms accounting flags in a process
3551 * This way procfs can still pretend to turn microstate accounting on and
3552 * off for a process, but it actually doesn't do anything. This is
3553 * a neutered form of preemptive idiot-proofing.
3554 */
3555 void
disable_msacct(proc_t * p)3556 disable_msacct(proc_t *p)
3557 {
3558 kthread_t *t;
3559
3560 ASSERT(MUTEX_HELD(&p->p_lock));
3561
3562 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */
3563 /*
3564 * Loop through all the LWPs (kernel threads) in the process.
3565 */
3566 if ((t = p->p_tlist) != NULL) {
3567 do {
3568 /* clear per-thread flag */
3569 t->t_proc_flag &= ~TP_MSACCT;
3570 } while ((t = t->t_forw) != p->p_tlist);
3571 }
3572 }
3573
3574 /*
3575 * Return resource usage information.
3576 */
3577 void
prgetusage(kthread_t * t,prhusage_t * pup)3578 prgetusage(kthread_t *t, prhusage_t *pup)
3579 {
3580 klwp_t *lwp = ttolwp(t);
3581 hrtime_t *mstimep;
3582 struct mstate *ms = &lwp->lwp_mstate;
3583 int state;
3584 int i;
3585 hrtime_t curtime;
3586 hrtime_t waitrq;
3587 hrtime_t tmp1;
3588
3589 curtime = gethrtime_unscaled();
3590
3591 pup->pr_lwpid = t->t_tid;
3592 pup->pr_count = 1;
3593 pup->pr_create = ms->ms_start;
3594 pup->pr_term = ms->ms_term;
3595 scalehrtime(&pup->pr_create);
3596 scalehrtime(&pup->pr_term);
3597 if (ms->ms_term == 0) {
3598 pup->pr_rtime = curtime - ms->ms_start;
3599 scalehrtime(&pup->pr_rtime);
3600 } else {
3601 pup->pr_rtime = ms->ms_term - ms->ms_start;
3602 scalehrtime(&pup->pr_rtime);
3603 }
3604
3605
3606 pup->pr_utime = ms->ms_acct[LMS_USER];
3607 pup->pr_stime = ms->ms_acct[LMS_SYSTEM];
3608 pup->pr_ttime = ms->ms_acct[LMS_TRAP];
3609 pup->pr_tftime = ms->ms_acct[LMS_TFAULT];
3610 pup->pr_dftime = ms->ms_acct[LMS_DFAULT];
3611 pup->pr_kftime = ms->ms_acct[LMS_KFAULT];
3612 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3613 pup->pr_slptime = ms->ms_acct[LMS_SLEEP];
3614 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3615 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
3616
3617 prscaleusage(pup);
3618
3619 /*
3620 * Adjust for time waiting in the dispatcher queue.
3621 */
3622 waitrq = t->t_waitrq; /* hopefully atomic */
3623 if (waitrq != 0) {
3624 if (waitrq > curtime) {
3625 curtime = gethrtime_unscaled();
3626 }
3627 tmp1 = curtime - waitrq;
3628 scalehrtime(&tmp1);
3629 pup->pr_wtime += tmp1;
3630 curtime = waitrq;
3631 }
3632
3633 /*
3634 * Adjust for time spent in current microstate.
3635 */
3636 if (ms->ms_state_start > curtime) {
3637 curtime = gethrtime_unscaled();
3638 }
3639
3640 i = 0;
3641 do {
3642 switch (state = t->t_mstate) {
3643 case LMS_SLEEP:
3644 /*
3645 * Update the timer for the current sleep state.
3646 */
3647 switch (state = ms->ms_prev) {
3648 case LMS_TFAULT:
3649 case LMS_DFAULT:
3650 case LMS_KFAULT:
3651 case LMS_USER_LOCK:
3652 break;
3653 default:
3654 state = LMS_SLEEP;
3655 break;
3656 }
3657 break;
3658 case LMS_TFAULT:
3659 case LMS_DFAULT:
3660 case LMS_KFAULT:
3661 case LMS_USER_LOCK:
3662 state = LMS_SYSTEM;
3663 break;
3664 }
3665 switch (state) {
3666 case LMS_USER: mstimep = &pup->pr_utime; break;
3667 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3668 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3669 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3670 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3671 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3672 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3673 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3674 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3675 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3676 default: panic("prgetusage: unknown microstate");
3677 }
3678 tmp1 = curtime - ms->ms_state_start;
3679 if (tmp1 < 0) {
3680 curtime = gethrtime_unscaled();
3681 i++;
3682 continue;
3683 }
3684 scalehrtime(&tmp1);
3685 } while (tmp1 < 0 && i < MAX_ITERS_SPIN);
3686
3687 *mstimep += tmp1;
3688
3689 /* update pup timestamp */
3690 pup->pr_tstamp = curtime;
3691 scalehrtime(&pup->pr_tstamp);
3692
3693 /*
3694 * Resource usage counters.
3695 */
3696 pup->pr_minf = lwp->lwp_ru.minflt;
3697 pup->pr_majf = lwp->lwp_ru.majflt;
3698 pup->pr_nswap = lwp->lwp_ru.nswap;
3699 pup->pr_inblk = lwp->lwp_ru.inblock;
3700 pup->pr_oublk = lwp->lwp_ru.oublock;
3701 pup->pr_msnd = lwp->lwp_ru.msgsnd;
3702 pup->pr_mrcv = lwp->lwp_ru.msgrcv;
3703 pup->pr_sigs = lwp->lwp_ru.nsignals;
3704 pup->pr_vctx = lwp->lwp_ru.nvcsw;
3705 pup->pr_ictx = lwp->lwp_ru.nivcsw;
3706 pup->pr_sysc = lwp->lwp_ru.sysc;
3707 pup->pr_ioch = lwp->lwp_ru.ioch;
3708 }
3709
3710 /*
3711 * Convert ms_acct stats from unscaled high-res time to nanoseconds
3712 */
3713 void
prscaleusage(prhusage_t * usg)3714 prscaleusage(prhusage_t *usg)
3715 {
3716 scalehrtime(&usg->pr_utime);
3717 scalehrtime(&usg->pr_stime);
3718 scalehrtime(&usg->pr_ttime);
3719 scalehrtime(&usg->pr_tftime);
3720 scalehrtime(&usg->pr_dftime);
3721 scalehrtime(&usg->pr_kftime);
3722 scalehrtime(&usg->pr_ltime);
3723 scalehrtime(&usg->pr_slptime);
3724 scalehrtime(&usg->pr_wtime);
3725 scalehrtime(&usg->pr_stoptime);
3726 }
3727
3728
3729 /*
3730 * Sum resource usage information.
3731 */
3732 void
praddusage(kthread_t * t,prhusage_t * pup)3733 praddusage(kthread_t *t, prhusage_t *pup)
3734 {
3735 klwp_t *lwp = ttolwp(t);
3736 hrtime_t *mstimep;
3737 struct mstate *ms = &lwp->lwp_mstate;
3738 int state;
3739 int i;
3740 hrtime_t curtime;
3741 hrtime_t waitrq;
3742 hrtime_t tmp;
3743 prhusage_t conv;
3744
3745 curtime = gethrtime_unscaled();
3746
3747 if (ms->ms_term == 0) {
3748 tmp = curtime - ms->ms_start;
3749 scalehrtime(&tmp);
3750 pup->pr_rtime += tmp;
3751 } else {
3752 tmp = ms->ms_term - ms->ms_start;
3753 scalehrtime(&tmp);
3754 pup->pr_rtime += tmp;
3755 }
3756
3757 conv.pr_utime = ms->ms_acct[LMS_USER];
3758 conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
3759 conv.pr_ttime = ms->ms_acct[LMS_TRAP];
3760 conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
3761 conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
3762 conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
3763 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3764 conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
3765 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3766 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
3767
3768 prscaleusage(&conv);
3769
3770 pup->pr_utime += conv.pr_utime;
3771 pup->pr_stime += conv.pr_stime;
3772 pup->pr_ttime += conv.pr_ttime;
3773 pup->pr_tftime += conv.pr_tftime;
3774 pup->pr_dftime += conv.pr_dftime;
3775 pup->pr_kftime += conv.pr_kftime;
3776 pup->pr_ltime += conv.pr_ltime;
3777 pup->pr_slptime += conv.pr_slptime;
3778 pup->pr_wtime += conv.pr_wtime;
3779 pup->pr_stoptime += conv.pr_stoptime;
3780
3781 /*
3782 * Adjust for time waiting in the dispatcher queue.
3783 */
3784 waitrq = t->t_waitrq; /* hopefully atomic */
3785 if (waitrq != 0) {
3786 if (waitrq > curtime) {
3787 curtime = gethrtime_unscaled();
3788 }
3789 tmp = curtime - waitrq;
3790 scalehrtime(&tmp);
3791 pup->pr_wtime += tmp;
3792 curtime = waitrq;
3793 }
3794
3795 /*
3796 * Adjust for time spent in current microstate.
3797 */
3798 if (ms->ms_state_start > curtime) {
3799 curtime = gethrtime_unscaled();
3800 }
3801
3802 i = 0;
3803 do {
3804 switch (state = t->t_mstate) {
3805 case LMS_SLEEP:
3806 /*
3807 * Update the timer for the current sleep state.
3808 */
3809 switch (state = ms->ms_prev) {
3810 case LMS_TFAULT:
3811 case LMS_DFAULT:
3812 case LMS_KFAULT:
3813 case LMS_USER_LOCK:
3814 break;
3815 default:
3816 state = LMS_SLEEP;
3817 break;
3818 }
3819 break;
3820 case LMS_TFAULT:
3821 case LMS_DFAULT:
3822 case LMS_KFAULT:
3823 case LMS_USER_LOCK:
3824 state = LMS_SYSTEM;
3825 break;
3826 }
3827 switch (state) {
3828 case LMS_USER: mstimep = &pup->pr_utime; break;
3829 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3830 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3831 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3832 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3833 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3834 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3835 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3836 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3837 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3838 default: panic("praddusage: unknown microstate");
3839 }
3840 tmp = curtime - ms->ms_state_start;
3841 if (tmp < 0) {
3842 curtime = gethrtime_unscaled();
3843 i++;
3844 continue;
3845 }
3846 scalehrtime(&tmp);
3847 } while (tmp < 0 && i < MAX_ITERS_SPIN);
3848
3849 *mstimep += tmp;
3850
3851 /* update pup timestamp */
3852 pup->pr_tstamp = curtime;
3853 scalehrtime(&pup->pr_tstamp);
3854
3855 /*
3856 * Resource usage counters.
3857 */
3858 pup->pr_minf += lwp->lwp_ru.minflt;
3859 pup->pr_majf += lwp->lwp_ru.majflt;
3860 pup->pr_nswap += lwp->lwp_ru.nswap;
3861 pup->pr_inblk += lwp->lwp_ru.inblock;
3862 pup->pr_oublk += lwp->lwp_ru.oublock;
3863 pup->pr_msnd += lwp->lwp_ru.msgsnd;
3864 pup->pr_mrcv += lwp->lwp_ru.msgrcv;
3865 pup->pr_sigs += lwp->lwp_ru.nsignals;
3866 pup->pr_vctx += lwp->lwp_ru.nvcsw;
3867 pup->pr_ictx += lwp->lwp_ru.nivcsw;
3868 pup->pr_sysc += lwp->lwp_ru.sysc;
3869 pup->pr_ioch += lwp->lwp_ru.ioch;
3870 }
3871
3872 /*
3873 * Convert a prhusage_t to a prusage_t.
3874 * This means convert each hrtime_t to a timestruc_t
3875 * and copy the count fields uint64_t => ulong_t.
3876 */
3877 void
prcvtusage(prhusage_t * pup,prusage_t * upup)3878 prcvtusage(prhusage_t *pup, prusage_t *upup)
3879 {
3880 uint64_t *ullp;
3881 ulong_t *ulp;
3882 int i;
3883
3884 upup->pr_lwpid = pup->pr_lwpid;
3885 upup->pr_count = pup->pr_count;
3886
3887 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp);
3888 hrt2ts(pup->pr_create, &upup->pr_create);
3889 hrt2ts(pup->pr_term, &upup->pr_term);
3890 hrt2ts(pup->pr_rtime, &upup->pr_rtime);
3891 hrt2ts(pup->pr_utime, &upup->pr_utime);
3892 hrt2ts(pup->pr_stime, &upup->pr_stime);
3893 hrt2ts(pup->pr_ttime, &upup->pr_ttime);
3894 hrt2ts(pup->pr_tftime, &upup->pr_tftime);
3895 hrt2ts(pup->pr_dftime, &upup->pr_dftime);
3896 hrt2ts(pup->pr_kftime, &upup->pr_kftime);
3897 hrt2ts(pup->pr_ltime, &upup->pr_ltime);
3898 hrt2ts(pup->pr_slptime, &upup->pr_slptime);
3899 hrt2ts(pup->pr_wtime, &upup->pr_wtime);
3900 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3901 bzero(upup->filltime, sizeof (upup->filltime));
3902
3903 ullp = &pup->pr_minf;
3904 ulp = &upup->pr_minf;
3905 for (i = 0; i < 22; i++)
3906 *ulp++ = (ulong_t)*ullp++;
3907 }
3908
3909 #ifdef _SYSCALL32_IMPL
3910 void
prcvtusage32(prhusage_t * pup,prusage32_t * upup)3911 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3912 {
3913 uint64_t *ullp;
3914 uint32_t *ulp;
3915 int i;
3916
3917 upup->pr_lwpid = pup->pr_lwpid;
3918 upup->pr_count = pup->pr_count;
3919
3920 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp);
3921 hrt2ts32(pup->pr_create, &upup->pr_create);
3922 hrt2ts32(pup->pr_term, &upup->pr_term);
3923 hrt2ts32(pup->pr_rtime, &upup->pr_rtime);
3924 hrt2ts32(pup->pr_utime, &upup->pr_utime);
3925 hrt2ts32(pup->pr_stime, &upup->pr_stime);
3926 hrt2ts32(pup->pr_ttime, &upup->pr_ttime);
3927 hrt2ts32(pup->pr_tftime, &upup->pr_tftime);
3928 hrt2ts32(pup->pr_dftime, &upup->pr_dftime);
3929 hrt2ts32(pup->pr_kftime, &upup->pr_kftime);
3930 hrt2ts32(pup->pr_ltime, &upup->pr_ltime);
3931 hrt2ts32(pup->pr_slptime, &upup->pr_slptime);
3932 hrt2ts32(pup->pr_wtime, &upup->pr_wtime);
3933 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime);
3934 bzero(upup->filltime, sizeof (upup->filltime));
3935
3936 ullp = &pup->pr_minf;
3937 ulp = &upup->pr_minf;
3938 for (i = 0; i < 22; i++)
3939 *ulp++ = (uint32_t)*ullp++;
3940 }
3941 #endif /* _SYSCALL32_IMPL */
3942
3943 /*
3944 * Determine whether a set is empty.
3945 */
3946 int
setisempty(uint32_t * sp,uint_t n)3947 setisempty(uint32_t *sp, uint_t n)
3948 {
3949 while (n--)
3950 if (*sp++)
3951 return (0);
3952 return (1);
3953 }
3954
3955 /*
3956 * Utility routine for establishing a watched area in the process.
3957 * Keep the list of watched areas sorted by virtual address.
3958 */
3959 int
set_watched_area(proc_t * p,struct watched_area * pwa)3960 set_watched_area(proc_t *p, struct watched_area *pwa)
3961 {
3962 caddr_t vaddr = pwa->wa_vaddr;
3963 caddr_t eaddr = pwa->wa_eaddr;
3964 ulong_t flags = pwa->wa_flags;
3965 struct watched_area *target;
3966 avl_index_t where;
3967 int error = 0;
3968
3969 /* we must not be holding p->p_lock, but the process must be locked */
3970 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3971 ASSERT(p->p_proc_flag & P_PR_LOCK);
3972
3973 /*
3974 * If this is our first watchpoint, enable watchpoints for the process.
3975 */
3976 if (!pr_watch_active(p)) {
3977 kthread_t *t;
3978
3979 mutex_enter(&p->p_lock);
3980 if ((t = p->p_tlist) != NULL) {
3981 do {
3982 watch_enable(t);
3983 } while ((t = t->t_forw) != p->p_tlist);
3984 }
3985 mutex_exit(&p->p_lock);
3986 }
3987
3988 target = pr_find_watched_area(p, pwa, &where);
3989 if (target != NULL) {
3990 /*
3991 * We discovered an existing, overlapping watched area.
3992 * Allow it only if it is an exact match.
3993 */
3994 if (target->wa_vaddr != vaddr ||
3995 target->wa_eaddr != eaddr)
3996 error = EINVAL;
3997 else if (target->wa_flags != flags) {
3998 error = set_watched_page(p, vaddr, eaddr,
3999 flags, target->wa_flags);
4000 target->wa_flags = flags;
4001 }
4002 kmem_free(pwa, sizeof (struct watched_area));
4003 } else {
4004 avl_insert(&p->p_warea, pwa, where);
4005 error = set_watched_page(p, vaddr, eaddr, flags, 0);
4006 }
4007
4008 return (error);
4009 }
4010
4011 /*
4012 * Utility routine for clearing a watched area in the process.
4013 * Must be an exact match of the virtual address.
4014 * size and flags don't matter.
4015 */
4016 int
clear_watched_area(proc_t * p,struct watched_area * pwa)4017 clear_watched_area(proc_t *p, struct watched_area *pwa)
4018 {
4019 struct watched_area *found;
4020
4021 /* we must not be holding p->p_lock, but the process must be locked */
4022 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
4023 ASSERT(p->p_proc_flag & P_PR_LOCK);
4024
4025
4026 if (!pr_watch_active(p)) {
4027 kmem_free(pwa, sizeof (struct watched_area));
4028 return (0);
4029 }
4030
4031 /*
4032 * Look for a matching address in the watched areas. If a match is
4033 * found, clear the old watched area and adjust the watched page(s). It
4034 * is not an error if there is no match.
4035 */
4036 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
4037 found->wa_vaddr == pwa->wa_vaddr) {
4038 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
4039 found->wa_flags);
4040 avl_remove(&p->p_warea, found);
4041 kmem_free(found, sizeof (struct watched_area));
4042 }
4043
4044 kmem_free(pwa, sizeof (struct watched_area));
4045
4046 /*
4047 * If we removed the last watched area from the process, disable
4048 * watchpoints.
4049 */
4050 if (!pr_watch_active(p)) {
4051 kthread_t *t;
4052
4053 mutex_enter(&p->p_lock);
4054 if ((t = p->p_tlist) != NULL) {
4055 do {
4056 watch_disable(t);
4057 } while ((t = t->t_forw) != p->p_tlist);
4058 }
4059 mutex_exit(&p->p_lock);
4060 }
4061
4062 return (0);
4063 }
4064
4065 /*
4066 * Frees all the watched_area structures
4067 */
4068 void
pr_free_watchpoints(proc_t * p)4069 pr_free_watchpoints(proc_t *p)
4070 {
4071 struct watched_area *delp;
4072 void *cookie;
4073
4074 cookie = NULL;
4075 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
4076 kmem_free(delp, sizeof (struct watched_area));
4077
4078 avl_destroy(&p->p_warea);
4079 }
4080
4081 /*
4082 * This one is called by the traced process to unwatch all the
4083 * pages while deallocating the list of watched_page structs.
4084 */
4085 void
pr_free_watched_pages(proc_t * p)4086 pr_free_watched_pages(proc_t *p)
4087 {
4088 struct as *as = p->p_as;
4089 struct watched_page *pwp;
4090 uint_t prot;
4091 int retrycnt, err;
4092 void *cookie;
4093
4094 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
4095 return;
4096
4097 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
4098 AS_LOCK_ENTER(as, RW_WRITER);
4099
4100 pwp = avl_first(&as->a_wpage);
4101
4102 cookie = NULL;
4103 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
4104 retrycnt = 0;
4105 if ((prot = pwp->wp_oprot) != 0) {
4106 caddr_t addr = pwp->wp_vaddr;
4107 struct seg *seg;
4108 retry:
4109
4110 if ((pwp->wp_prot != prot ||
4111 (pwp->wp_flags & WP_NOWATCH)) &&
4112 (seg = as_segat(as, addr)) != NULL) {
4113 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
4114 if (err == IE_RETRY) {
4115 ASSERT(retrycnt == 0);
4116 retrycnt++;
4117 goto retry;
4118 }
4119 }
4120 }
4121 kmem_free(pwp, sizeof (struct watched_page));
4122 }
4123
4124 avl_destroy(&as->a_wpage);
4125 p->p_wprot = NULL;
4126
4127 AS_LOCK_EXIT(as);
4128 }
4129
4130 /*
4131 * Insert a watched area into the list of watched pages.
4132 * If oflags is zero then we are adding a new watched area.
4133 * Otherwise we are changing the flags of an existing watched area.
4134 */
4135 static int
set_watched_page(proc_t * p,caddr_t vaddr,caddr_t eaddr,ulong_t flags,ulong_t oflags)4136 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
4137 ulong_t flags, ulong_t oflags)
4138 {
4139 struct as *as = p->p_as;
4140 avl_tree_t *pwp_tree;
4141 struct watched_page *pwp, *newpwp;
4142 struct watched_page tpw;
4143 avl_index_t where;
4144 struct seg *seg;
4145 uint_t prot;
4146 caddr_t addr;
4147
4148 /*
4149 * We need to pre-allocate a list of structures before we grab the
4150 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
4151 * held.
4152 */
4153 newpwp = NULL;
4154 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4155 addr < eaddr; addr += PAGESIZE) {
4156 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
4157 pwp->wp_list = newpwp;
4158 newpwp = pwp;
4159 }
4160
4161 AS_LOCK_ENTER(as, RW_WRITER);
4162
4163 /*
4164 * Search for an existing watched page to contain the watched area.
4165 * If none is found, grab a new one from the available list
4166 * and insert it in the active list, keeping the list sorted
4167 * by user-level virtual address.
4168 */
4169 if (p->p_flag & SVFWAIT)
4170 pwp_tree = &p->p_wpage;
4171 else
4172 pwp_tree = &as->a_wpage;
4173
4174 again:
4175 if (avl_numnodes(pwp_tree) > prnwatch) {
4176 AS_LOCK_EXIT(as);
4177 while (newpwp != NULL) {
4178 pwp = newpwp->wp_list;
4179 kmem_free(newpwp, sizeof (struct watched_page));
4180 newpwp = pwp;
4181 }
4182 return (E2BIG);
4183 }
4184
4185 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4186 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
4187 pwp = newpwp;
4188 newpwp = newpwp->wp_list;
4189 pwp->wp_list = NULL;
4190 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
4191 (uintptr_t)PAGEMASK);
4192 avl_insert(pwp_tree, pwp, where);
4193 }
4194
4195 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
4196
4197 if (oflags & WA_READ)
4198 pwp->wp_read--;
4199 if (oflags & WA_WRITE)
4200 pwp->wp_write--;
4201 if (oflags & WA_EXEC)
4202 pwp->wp_exec--;
4203
4204 ASSERT(pwp->wp_read >= 0);
4205 ASSERT(pwp->wp_write >= 0);
4206 ASSERT(pwp->wp_exec >= 0);
4207
4208 if (flags & WA_READ)
4209 pwp->wp_read++;
4210 if (flags & WA_WRITE)
4211 pwp->wp_write++;
4212 if (flags & WA_EXEC)
4213 pwp->wp_exec++;
4214
4215 if (!(p->p_flag & SVFWAIT)) {
4216 vaddr = pwp->wp_vaddr;
4217 if (pwp->wp_oprot == 0 &&
4218 (seg = as_segat(as, vaddr)) != NULL) {
4219 SEGOP_GETPROT(seg, vaddr, 0, &prot);
4220 pwp->wp_oprot = (uchar_t)prot;
4221 pwp->wp_prot = (uchar_t)prot;
4222 }
4223 if (pwp->wp_oprot != 0) {
4224 prot = pwp->wp_oprot;
4225 if (pwp->wp_read)
4226 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4227 if (pwp->wp_write)
4228 prot &= ~PROT_WRITE;
4229 if (pwp->wp_exec)
4230 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4231 if (!(pwp->wp_flags & WP_NOWATCH) &&
4232 pwp->wp_prot != prot &&
4233 (pwp->wp_flags & WP_SETPROT) == 0) {
4234 pwp->wp_flags |= WP_SETPROT;
4235 pwp->wp_list = p->p_wprot;
4236 p->p_wprot = pwp;
4237 }
4238 pwp->wp_prot = (uchar_t)prot;
4239 }
4240 }
4241
4242 /*
4243 * If the watched area extends into the next page then do
4244 * it over again with the virtual address of the next page.
4245 */
4246 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
4247 goto again;
4248
4249 AS_LOCK_EXIT(as);
4250
4251 /*
4252 * Free any pages we may have over-allocated
4253 */
4254 while (newpwp != NULL) {
4255 pwp = newpwp->wp_list;
4256 kmem_free(newpwp, sizeof (struct watched_page));
4257 newpwp = pwp;
4258 }
4259
4260 return (0);
4261 }
4262
4263 /*
4264 * Remove a watched area from the list of watched pages.
4265 * A watched area may extend over more than one page.
4266 */
4267 static void
clear_watched_page(proc_t * p,caddr_t vaddr,caddr_t eaddr,ulong_t flags)4268 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
4269 {
4270 struct as *as = p->p_as;
4271 struct watched_page *pwp;
4272 struct watched_page tpw;
4273 avl_tree_t *tree;
4274 avl_index_t where;
4275
4276 AS_LOCK_ENTER(as, RW_WRITER);
4277
4278 if (p->p_flag & SVFWAIT)
4279 tree = &p->p_wpage;
4280 else
4281 tree = &as->a_wpage;
4282
4283 tpw.wp_vaddr = vaddr =
4284 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4285 pwp = avl_find(tree, &tpw, &where);
4286 if (pwp == NULL)
4287 pwp = avl_nearest(tree, where, AVL_AFTER);
4288
4289 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
4290 ASSERT(vaddr <= pwp->wp_vaddr);
4291
4292 if (flags & WA_READ)
4293 pwp->wp_read--;
4294 if (flags & WA_WRITE)
4295 pwp->wp_write--;
4296 if (flags & WA_EXEC)
4297 pwp->wp_exec--;
4298
4299 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
4300 /*
4301 * Reset the hat layer's protections on this page.
4302 */
4303 if (pwp->wp_oprot != 0) {
4304 uint_t prot = pwp->wp_oprot;
4305
4306 if (pwp->wp_read)
4307 prot &=
4308 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4309 if (pwp->wp_write)
4310 prot &= ~PROT_WRITE;
4311 if (pwp->wp_exec)
4312 prot &=
4313 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4314 if (!(pwp->wp_flags & WP_NOWATCH) &&
4315 pwp->wp_prot != prot &&
4316 (pwp->wp_flags & WP_SETPROT) == 0) {
4317 pwp->wp_flags |= WP_SETPROT;
4318 pwp->wp_list = p->p_wprot;
4319 p->p_wprot = pwp;
4320 }
4321 pwp->wp_prot = (uchar_t)prot;
4322 }
4323 } else {
4324 /*
4325 * No watched areas remain in this page.
4326 * Reset everything to normal.
4327 */
4328 if (pwp->wp_oprot != 0) {
4329 pwp->wp_prot = pwp->wp_oprot;
4330 if ((pwp->wp_flags & WP_SETPROT) == 0) {
4331 pwp->wp_flags |= WP_SETPROT;
4332 pwp->wp_list = p->p_wprot;
4333 p->p_wprot = pwp;
4334 }
4335 }
4336 }
4337
4338 pwp = AVL_NEXT(tree, pwp);
4339 }
4340
4341 AS_LOCK_EXIT(as);
4342 }
4343
4344 /*
4345 * Return the original protections for the specified page.
4346 */
4347 static void
getwatchprot(struct as * as,caddr_t addr,uint_t * prot)4348 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
4349 {
4350 struct watched_page *pwp;
4351 struct watched_page tpw;
4352
4353 ASSERT(AS_LOCK_HELD(as));
4354
4355 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
4356 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
4357 *prot = pwp->wp_oprot;
4358 }
4359
4360 static prpagev_t *
pr_pagev_create(struct seg * seg,int check_noreserve)4361 pr_pagev_create(struct seg *seg, int check_noreserve)
4362 {
4363 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
4364 size_t total_pages = seg_pages(seg);
4365
4366 /*
4367 * Limit the size of our vectors to pagev_lim pages at a time. We need
4368 * 4 or 5 bytes of storage per page, so this means we limit ourself
4369 * to about a megabyte of kernel heap by default.
4370 */
4371 pagev->pg_npages = MIN(total_pages, pagev_lim);
4372 pagev->pg_pnbase = 0;
4373
4374 pagev->pg_protv =
4375 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
4376
4377 if (check_noreserve)
4378 pagev->pg_incore =
4379 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
4380 else
4381 pagev->pg_incore = NULL;
4382
4383 return (pagev);
4384 }
4385
4386 static void
pr_pagev_destroy(prpagev_t * pagev)4387 pr_pagev_destroy(prpagev_t *pagev)
4388 {
4389 if (pagev->pg_incore != NULL)
4390 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
4391
4392 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
4393 kmem_free(pagev, sizeof (prpagev_t));
4394 }
4395
4396 static caddr_t
pr_pagev_fill(prpagev_t * pagev,struct seg * seg,caddr_t addr,caddr_t eaddr)4397 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
4398 {
4399 ulong_t lastpg = seg_page(seg, eaddr - 1);
4400 ulong_t pn, pnlim;
4401 caddr_t saddr;
4402 size_t len;
4403
4404 ASSERT(addr >= seg->s_base && addr <= eaddr);
4405
4406 if (addr == eaddr)
4407 return (eaddr);
4408
4409 refill:
4410 ASSERT(addr < eaddr);
4411 pagev->pg_pnbase = seg_page(seg, addr);
4412 pnlim = pagev->pg_pnbase + pagev->pg_npages;
4413 saddr = addr;
4414
4415 if (lastpg < pnlim)
4416 len = (size_t)(eaddr - addr);
4417 else
4418 len = pagev->pg_npages * PAGESIZE;
4419
4420 if (pagev->pg_incore != NULL) {
4421 /*
4422 * INCORE cleverly has different semantics than GETPROT:
4423 * it returns info on pages up to but NOT including addr + len.
4424 */
4425 SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
4426 pn = pagev->pg_pnbase;
4427
4428 do {
4429 /*
4430 * Guilty knowledge here: We know that segvn_incore
4431 * returns more than just the low-order bit that
4432 * indicates the page is actually in memory. If any
4433 * bits are set, then the page has backing store.
4434 */
4435 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
4436 goto out;
4437
4438 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
4439
4440 /*
4441 * If we examined all the pages in the vector but we're not
4442 * at the end of the segment, take another lap.
4443 */
4444 if (addr < eaddr)
4445 goto refill;
4446 }
4447
4448 /*
4449 * Need to take len - 1 because addr + len is the address of the
4450 * first byte of the page just past the end of what we want.
4451 */
4452 out:
4453 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
4454 return (addr);
4455 }
4456
4457 static caddr_t
pr_pagev_nextprot(prpagev_t * pagev,struct seg * seg,caddr_t * saddrp,caddr_t eaddr,uint_t * protp)4458 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
4459 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
4460 {
4461 /*
4462 * Our starting address is either the specified address, or the base
4463 * address from the start of the pagev. If the latter is greater,
4464 * this means a previous call to pr_pagev_fill has already scanned
4465 * further than the end of the previous mapping.
4466 */
4467 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
4468 caddr_t addr = MAX(*saddrp, base);
4469 ulong_t pn = seg_page(seg, addr);
4470 uint_t prot, nprot;
4471
4472 /*
4473 * If we're dealing with noreserve pages, then advance addr to
4474 * the address of the next page which has backing store.
4475 */
4476 if (pagev->pg_incore != NULL) {
4477 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
4478 if ((addr += PAGESIZE) == eaddr) {
4479 *saddrp = addr;
4480 prot = 0;
4481 goto out;
4482 }
4483 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4484 addr = pr_pagev_fill(pagev, seg, addr, eaddr);
4485 if (addr == eaddr) {
4486 *saddrp = addr;
4487 prot = 0;
4488 goto out;
4489 }
4490 pn = seg_page(seg, addr);
4491 }
4492 }
4493 }
4494
4495 /*
4496 * Get the protections on the page corresponding to addr.
4497 */
4498 pn = seg_page(seg, addr);
4499 ASSERT(pn >= pagev->pg_pnbase);
4500 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
4501
4502 prot = pagev->pg_protv[pn - pagev->pg_pnbase];
4503 getwatchprot(seg->s_as, addr, &prot);
4504 *saddrp = addr;
4505
4506 /*
4507 * Now loop until we find a backed page with different protections
4508 * or we reach the end of this segment.
4509 */
4510 while ((addr += PAGESIZE) < eaddr) {
4511 /*
4512 * If pn has advanced to the page number following what we
4513 * have information on, refill the page vector and reset
4514 * addr and pn. If pr_pagev_fill does not return the
4515 * address of the next page, we have a discontiguity and
4516 * thus have reached the end of the current mapping.
4517 */
4518 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4519 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
4520 if (naddr != addr)
4521 goto out;
4522 pn = seg_page(seg, addr);
4523 }
4524
4525 /*
4526 * The previous page's protections are in prot, and it has
4527 * backing. If this page is MAP_NORESERVE and has no backing,
4528 * then end this mapping and return the previous protections.
4529 */
4530 if (pagev->pg_incore != NULL &&
4531 pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
4532 break;
4533
4534 /*
4535 * Otherwise end the mapping if this page's protections (nprot)
4536 * are different than those in the previous page (prot).
4537 */
4538 nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
4539 getwatchprot(seg->s_as, addr, &nprot);
4540
4541 if (nprot != prot)
4542 break;
4543 }
4544
4545 out:
4546 *protp = prot;
4547 return (addr);
4548 }
4549
4550 size_t
pr_getsegsize(struct seg * seg,int reserved)4551 pr_getsegsize(struct seg *seg, int reserved)
4552 {
4553 size_t size = seg->s_size;
4554
4555 /*
4556 * If we're interested in the reserved space, return the size of the
4557 * segment itself. Everything else in this function is a special case
4558 * to determine the actual underlying size of various segment types.
4559 */
4560 if (reserved)
4561 return (size);
4562
4563 /*
4564 * If this is a segvn mapping of a regular file, return the smaller
4565 * of the segment size and the remaining size of the file beyond
4566 * the file offset corresponding to seg->s_base.
4567 */
4568 if (seg->s_ops == &segvn_ops) {
4569 vattr_t vattr;
4570 vnode_t *vp;
4571
4572 vattr.va_mask = AT_SIZE;
4573
4574 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
4575 vp != NULL && vp->v_type == VREG &&
4576 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
4577
4578 u_offset_t fsize = vattr.va_size;
4579 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
4580
4581 if (fsize < offset)
4582 fsize = 0;
4583 else
4584 fsize -= offset;
4585
4586 fsize = roundup(fsize, (u_offset_t)PAGESIZE);
4587
4588 if (fsize < (u_offset_t)size)
4589 size = (size_t)fsize;
4590 }
4591
4592 return (size);
4593 }
4594
4595 /*
4596 * If this is an ISM shared segment, don't include pages that are
4597 * beyond the real size of the spt segment that backs it.
4598 */
4599 if (seg->s_ops == &segspt_shmops)
4600 return (MIN(spt_realsize(seg), size));
4601
4602 /*
4603 * If this is segment is a mapping from /dev/null, then this is a
4604 * reservation of virtual address space and has no actual size.
4605 * Such segments are backed by segdev and have type set to neither
4606 * MAP_SHARED nor MAP_PRIVATE.
4607 */
4608 if (seg->s_ops == &segdev_ops &&
4609 ((SEGOP_GETTYPE(seg, seg->s_base) &
4610 (MAP_SHARED | MAP_PRIVATE)) == 0))
4611 return (0);
4612
4613 /*
4614 * If this segment doesn't match one of the special types we handle,
4615 * just return the size of the segment itself.
4616 */
4617 return (size);
4618 }
4619
4620 uint_t
pr_getprot(struct seg * seg,int reserved,void ** tmp,caddr_t * saddrp,caddr_t * naddrp,caddr_t eaddr)4621 pr_getprot(struct seg *seg, int reserved, void **tmp,
4622 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
4623 {
4624 struct as *as = seg->s_as;
4625
4626 caddr_t saddr = *saddrp;
4627 caddr_t naddr;
4628
4629 int check_noreserve;
4630 uint_t prot;
4631
4632 union {
4633 struct segvn_data *svd;
4634 struct segdev_data *sdp;
4635 void *data;
4636 } s;
4637
4638 s.data = seg->s_data;
4639
4640 ASSERT(AS_WRITE_HELD(as));
4641 ASSERT(saddr >= seg->s_base && saddr < eaddr);
4642 ASSERT(eaddr <= seg->s_base + seg->s_size);
4643
4644 /*
4645 * Don't include MAP_NORESERVE pages in the address range
4646 * unless their mappings have actually materialized.
4647 * We cheat by knowing that segvn is the only segment
4648 * driver that supports MAP_NORESERVE.
4649 */
4650 check_noreserve =
4651 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
4652 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
4653 (s.svd->flags & MAP_NORESERVE));
4654
4655 /*
4656 * Examine every page only as a last resort. We use guilty knowledge
4657 * of segvn and segdev to avoid this: if there are no per-page
4658 * protections present in the segment and we don't care about
4659 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
4660 */
4661 if (!check_noreserve && saddr == seg->s_base &&
4662 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
4663 prot = s.svd->prot;
4664 getwatchprot(as, saddr, &prot);
4665 naddr = eaddr;
4666
4667 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
4668 s.sdp != NULL && s.sdp->pageprot == 0) {
4669 prot = s.sdp->prot;
4670 getwatchprot(as, saddr, &prot);
4671 naddr = eaddr;
4672
4673 } else {
4674 prpagev_t *pagev;
4675
4676 /*
4677 * If addr is sitting at the start of the segment, then
4678 * create a page vector to store protection and incore
4679 * information for pages in the segment, and fill it.
4680 * Otherwise, we expect *tmp to address the prpagev_t
4681 * allocated by a previous call to this function.
4682 */
4683 if (saddr == seg->s_base) {
4684 pagev = pr_pagev_create(seg, check_noreserve);
4685 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
4686
4687 ASSERT(*tmp == NULL);
4688 *tmp = pagev;
4689
4690 ASSERT(saddr <= eaddr);
4691 *saddrp = saddr;
4692
4693 if (saddr == eaddr) {
4694 naddr = saddr;
4695 prot = 0;
4696 goto out;
4697 }
4698
4699 } else {
4700 ASSERT(*tmp != NULL);
4701 pagev = (prpagev_t *)*tmp;
4702 }
4703
4704 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
4705 ASSERT(naddr <= eaddr);
4706 }
4707
4708 out:
4709 if (naddr == eaddr)
4710 pr_getprot_done(tmp);
4711 *naddrp = naddr;
4712 return (prot);
4713 }
4714
4715 void
pr_getprot_done(void ** tmp)4716 pr_getprot_done(void **tmp)
4717 {
4718 if (*tmp != NULL) {
4719 pr_pagev_destroy((prpagev_t *)*tmp);
4720 *tmp = NULL;
4721 }
4722 }
4723
4724 /*
4725 * Return true iff the vnode is a /proc file from the object directory.
4726 */
4727 int
pr_isobject(vnode_t * vp)4728 pr_isobject(vnode_t *vp)
4729 {
4730 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
4731 }
4732
4733 /*
4734 * Return true iff the vnode is a /proc file opened by the process itself.
4735 */
4736 int
pr_isself(vnode_t * vp)4737 pr_isself(vnode_t *vp)
4738 {
4739 /*
4740 * XXX: To retain binary compatibility with the old
4741 * ioctl()-based version of /proc, we exempt self-opens
4742 * of /proc/<pid> from being marked close-on-exec.
4743 */
4744 return (vn_matchops(vp, prvnodeops) &&
4745 (VTOP(vp)->pr_flags & PR_ISSELF) &&
4746 VTOP(vp)->pr_type != PR_PIDDIR);
4747 }
4748
4749 static ssize_t
pr_getpagesize(struct seg * seg,caddr_t saddr,caddr_t * naddrp,caddr_t eaddr)4750 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
4751 {
4752 ssize_t pagesize, hatsize;
4753
4754 ASSERT(AS_WRITE_HELD(seg->s_as));
4755 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
4756 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
4757 ASSERT(saddr < eaddr);
4758
4759 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
4760 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
4761 ASSERT(pagesize != 0);
4762
4763 if (pagesize == -1)
4764 pagesize = PAGESIZE;
4765
4766 saddr += P2NPHASE((uintptr_t)saddr, pagesize);
4767
4768 while (saddr < eaddr) {
4769 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
4770 break;
4771 ASSERT(IS_P2ALIGNED(saddr, pagesize));
4772 saddr += pagesize;
4773 }
4774
4775 *naddrp = ((saddr < eaddr) ? saddr : eaddr);
4776 return (hatsize);
4777 }
4778
4779 /*
4780 * Return an array of structures with extended memory map information.
4781 * We allocate here; the caller must deallocate.
4782 */
4783 int
prgetxmap(proc_t * p,list_t * iolhead)4784 prgetxmap(proc_t *p, list_t *iolhead)
4785 {
4786 struct as *as = p->p_as;
4787 prxmap_t *mp;
4788 struct seg *seg;
4789 struct seg *brkseg, *stkseg;
4790 struct vnode *vp;
4791 struct vattr vattr;
4792 uint_t prot;
4793
4794 ASSERT(as != &kas && AS_WRITE_HELD(as));
4795
4796 /*
4797 * Request an initial buffer size that doesn't waste memory
4798 * if the address space has only a small number of segments.
4799 */
4800 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4801
4802 if ((seg = AS_SEGFIRST(as)) == NULL)
4803 return (0);
4804
4805 brkseg = break_seg(p);
4806 stkseg = as_segat(as, prgetstackbase(p));
4807
4808 do {
4809 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4810 caddr_t saddr, naddr, baddr;
4811 void *tmp = NULL;
4812 ssize_t psz;
4813 char *parr;
4814 uint64_t npages;
4815 uint64_t pagenum;
4816
4817 if ((seg->s_flags & S_HOLE) != 0) {
4818 continue;
4819 }
4820 /*
4821 * Segment loop part one: iterate from the base of the segment
4822 * to its end, pausing at each address boundary (baddr) between
4823 * ranges that have different virtual memory protections.
4824 */
4825 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4826 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4827 ASSERT(baddr >= saddr && baddr <= eaddr);
4828
4829 /*
4830 * Segment loop part two: iterate from the current
4831 * position to the end of the protection boundary,
4832 * pausing at each address boundary (naddr) between
4833 * ranges that have different underlying page sizes.
4834 */
4835 for (; saddr < baddr; saddr = naddr) {
4836 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4837 ASSERT(naddr >= saddr && naddr <= baddr);
4838
4839 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4840
4841 mp->pr_vaddr = (uintptr_t)saddr;
4842 mp->pr_size = naddr - saddr;
4843 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4844 mp->pr_mflags = 0;
4845 if (prot & PROT_READ)
4846 mp->pr_mflags |= MA_READ;
4847 if (prot & PROT_WRITE)
4848 mp->pr_mflags |= MA_WRITE;
4849 if (prot & PROT_EXEC)
4850 mp->pr_mflags |= MA_EXEC;
4851 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4852 mp->pr_mflags |= MA_SHARED;
4853 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4854 mp->pr_mflags |= MA_NORESERVE;
4855 if (seg->s_ops == &segspt_shmops ||
4856 (seg->s_ops == &segvn_ops &&
4857 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4858 vp == NULL)))
4859 mp->pr_mflags |= MA_ANON;
4860 if (seg == brkseg)
4861 mp->pr_mflags |= MA_BREAK;
4862 else if (seg == stkseg)
4863 mp->pr_mflags |= MA_STACK;
4864 if (seg->s_ops == &segspt_shmops)
4865 mp->pr_mflags |= MA_ISM | MA_SHM;
4866
4867 mp->pr_pagesize = PAGESIZE;
4868 if (psz == -1) {
4869 mp->pr_hatpagesize = 0;
4870 } else {
4871 mp->pr_hatpagesize = psz;
4872 }
4873
4874 /*
4875 * Manufacture a filename for the "object" dir.
4876 */
4877 mp->pr_dev = PRNODEV;
4878 vattr.va_mask = AT_FSID|AT_NODEID;
4879 if (seg->s_ops == &segvn_ops &&
4880 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4881 vp != NULL && vp->v_type == VREG &&
4882 VOP_GETATTR(vp, &vattr, 0, CRED(),
4883 NULL) == 0) {
4884 mp->pr_dev = vattr.va_fsid;
4885 mp->pr_ino = vattr.va_nodeid;
4886 if (vp == p->p_exec)
4887 (void) strcpy(mp->pr_mapname,
4888 "a.out");
4889 else
4890 pr_object_name(mp->pr_mapname,
4891 vp, &vattr);
4892 }
4893
4894 /*
4895 * Get the SysV shared memory id, if any.
4896 */
4897 if ((mp->pr_mflags & MA_SHARED) &&
4898 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4899 seg->s_base)) != SHMID_NONE) {
4900 if (mp->pr_shmid == SHMID_FREE)
4901 mp->pr_shmid = -1;
4902
4903 mp->pr_mflags |= MA_SHM;
4904 } else {
4905 mp->pr_shmid = -1;
4906 }
4907
4908 npages = ((uintptr_t)(naddr - saddr)) >>
4909 PAGESHIFT;
4910 parr = kmem_zalloc(npages, KM_SLEEP);
4911
4912 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4913
4914 for (pagenum = 0; pagenum < npages; pagenum++) {
4915 if (parr[pagenum] & SEG_PAGE_INCORE)
4916 mp->pr_rss++;
4917 if (parr[pagenum] & SEG_PAGE_ANON)
4918 mp->pr_anon++;
4919 if (parr[pagenum] & SEG_PAGE_LOCKED)
4920 mp->pr_locked++;
4921 }
4922 kmem_free(parr, npages);
4923 }
4924 }
4925 ASSERT(tmp == NULL);
4926 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4927
4928 return (0);
4929 }
4930
4931 /*
4932 * Return the process's credentials. We don't need a 32-bit equivalent of
4933 * this function because prcred_t and prcred32_t are actually the same.
4934 */
4935 void
prgetcred(proc_t * p,prcred_t * pcrp)4936 prgetcred(proc_t *p, prcred_t *pcrp)
4937 {
4938 mutex_enter(&p->p_crlock);
4939 cred2prcred(p->p_cred, pcrp);
4940 mutex_exit(&p->p_crlock);
4941 }
4942
4943 void
prgetsecflags(proc_t * p,prsecflags_t * psfp)4944 prgetsecflags(proc_t *p, prsecflags_t *psfp)
4945 {
4946 ASSERT(psfp != NULL);
4947
4948 bzero(psfp, sizeof (*psfp));
4949 psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
4950 psfp->pr_lower = p->p_secflags.psf_lower;
4951 psfp->pr_upper = p->p_secflags.psf_upper;
4952 psfp->pr_effective = p->p_secflags.psf_effective;
4953 psfp->pr_inherit = p->p_secflags.psf_inherit;
4954 }
4955
4956 /*
4957 * Compute actual size of the prpriv_t structure.
4958 */
4959
4960 size_t
prgetprivsize(void)4961 prgetprivsize(void)
4962 {
4963 return (priv_prgetprivsize(NULL));
4964 }
4965
4966 /*
4967 * Return the process's privileges. We don't need a 32-bit equivalent of
4968 * this function because prpriv_t and prpriv32_t are actually the same.
4969 */
4970 void
prgetpriv(proc_t * p,prpriv_t * pprp)4971 prgetpriv(proc_t *p, prpriv_t *pprp)
4972 {
4973 mutex_enter(&p->p_crlock);
4974 cred2prpriv(p->p_cred, pprp);
4975 mutex_exit(&p->p_crlock);
4976 }
4977
4978 #ifdef _SYSCALL32_IMPL
4979 /*
4980 * Return an array of structures with HAT memory map information.
4981 * We allocate here; the caller must deallocate.
4982 */
4983 int
prgetxmap32(proc_t * p,list_t * iolhead)4984 prgetxmap32(proc_t *p, list_t *iolhead)
4985 {
4986 struct as *as = p->p_as;
4987 prxmap32_t *mp;
4988 struct seg *seg;
4989 struct seg *brkseg, *stkseg;
4990 struct vnode *vp;
4991 struct vattr vattr;
4992 uint_t prot;
4993
4994 ASSERT(as != &kas && AS_WRITE_HELD(as));
4995
4996 /*
4997 * Request an initial buffer size that doesn't waste memory
4998 * if the address space has only a small number of segments.
4999 */
5000 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
5001
5002 if ((seg = AS_SEGFIRST(as)) == NULL)
5003 return (0);
5004
5005 brkseg = break_seg(p);
5006 stkseg = as_segat(as, prgetstackbase(p));
5007
5008 do {
5009 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
5010 caddr_t saddr, naddr, baddr;
5011 void *tmp = NULL;
5012 ssize_t psz;
5013 char *parr;
5014 uint64_t npages;
5015 uint64_t pagenum;
5016
5017 if ((seg->s_flags & S_HOLE) != 0) {
5018 continue;
5019 }
5020
5021 /*
5022 * Segment loop part one: iterate from the base of the segment
5023 * to its end, pausing at each address boundary (baddr) between
5024 * ranges that have different virtual memory protections.
5025 */
5026 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
5027 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
5028 ASSERT(baddr >= saddr && baddr <= eaddr);
5029
5030 /*
5031 * Segment loop part two: iterate from the current
5032 * position to the end of the protection boundary,
5033 * pausing at each address boundary (naddr) between
5034 * ranges that have different underlying page sizes.
5035 */
5036 for (; saddr < baddr; saddr = naddr) {
5037 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
5038 ASSERT(naddr >= saddr && naddr <= baddr);
5039
5040 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
5041
5042 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
5043 mp->pr_size = (size32_t)(naddr - saddr);
5044 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
5045 mp->pr_mflags = 0;
5046 if (prot & PROT_READ)
5047 mp->pr_mflags |= MA_READ;
5048 if (prot & PROT_WRITE)
5049 mp->pr_mflags |= MA_WRITE;
5050 if (prot & PROT_EXEC)
5051 mp->pr_mflags |= MA_EXEC;
5052 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
5053 mp->pr_mflags |= MA_SHARED;
5054 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
5055 mp->pr_mflags |= MA_NORESERVE;
5056 if (seg->s_ops == &segspt_shmops ||
5057 (seg->s_ops == &segvn_ops &&
5058 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
5059 vp == NULL)))
5060 mp->pr_mflags |= MA_ANON;
5061 if (seg == brkseg)
5062 mp->pr_mflags |= MA_BREAK;
5063 else if (seg == stkseg)
5064 mp->pr_mflags |= MA_STACK;
5065 if (seg->s_ops == &segspt_shmops)
5066 mp->pr_mflags |= MA_ISM | MA_SHM;
5067
5068 mp->pr_pagesize = PAGESIZE;
5069 if (psz == -1) {
5070 mp->pr_hatpagesize = 0;
5071 } else {
5072 mp->pr_hatpagesize = psz;
5073 }
5074
5075 /*
5076 * Manufacture a filename for the "object" dir.
5077 */
5078 mp->pr_dev = PRNODEV32;
5079 vattr.va_mask = AT_FSID|AT_NODEID;
5080 if (seg->s_ops == &segvn_ops &&
5081 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
5082 vp != NULL && vp->v_type == VREG &&
5083 VOP_GETATTR(vp, &vattr, 0, CRED(),
5084 NULL) == 0) {
5085 (void) cmpldev(&mp->pr_dev,
5086 vattr.va_fsid);
5087 mp->pr_ino = vattr.va_nodeid;
5088 if (vp == p->p_exec)
5089 (void) strcpy(mp->pr_mapname,
5090 "a.out");
5091 else
5092 pr_object_name(mp->pr_mapname,
5093 vp, &vattr);
5094 }
5095
5096 /*
5097 * Get the SysV shared memory id, if any.
5098 */
5099 if ((mp->pr_mflags & MA_SHARED) &&
5100 p->p_segacct && (mp->pr_shmid = shmgetid(p,
5101 seg->s_base)) != SHMID_NONE) {
5102 if (mp->pr_shmid == SHMID_FREE)
5103 mp->pr_shmid = -1;
5104
5105 mp->pr_mflags |= MA_SHM;
5106 } else {
5107 mp->pr_shmid = -1;
5108 }
5109
5110 npages = ((uintptr_t)(naddr - saddr)) >>
5111 PAGESHIFT;
5112 parr = kmem_zalloc(npages, KM_SLEEP);
5113
5114 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
5115
5116 for (pagenum = 0; pagenum < npages; pagenum++) {
5117 if (parr[pagenum] & SEG_PAGE_INCORE)
5118 mp->pr_rss++;
5119 if (parr[pagenum] & SEG_PAGE_ANON)
5120 mp->pr_anon++;
5121 if (parr[pagenum] & SEG_PAGE_LOCKED)
5122 mp->pr_locked++;
5123 }
5124 kmem_free(parr, npages);
5125 }
5126 }
5127 ASSERT(tmp == NULL);
5128 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
5129
5130 return (0);
5131 }
5132 #endif /* _SYSCALL32_IMPL */
5133